Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save joreilly86/71bc39b52fcfc93bbb9a8724c99e9896 to your computer and use it in GitHub Desktop.
Save joreilly86/71bc39b52fcfc93bbb9a8724c99e9896 to your computer and use it in GitHub Desktop.
#027 - Data Structures for Engineers: Pandas 101
# -----------------------------------------------------------------------------
# #027 - Data Structures for Civil/Structural Engineers: Pandas 101
# -----------------------------------------------------------------------------
'''
Hi everybody from the flocode newsletter 👋
This is all the code from article #027 - Data Structures for Civil/Structural Engineers:
Pandas 101
You can find the supporting data files from the examples here - https://github.com/joreilly86/Engineering-Fundamentals/tree/main/Pandas
'''
# Importing pandas library
import pandas as pd
# -----------------------------------------------------------------------------
# Case Study 2: Analyzing Flow Data
# -----------------------------------------------------------------------------
import pandas as pd
# Read the CSV file into a pandas DataFrame
df = pd.read_csv('flows.csv', parse_dates=['time'])
# Print the first few rows
print(df.head())
# Get basic information about the DataFrame
print(df.info())
# Describe the data (summary statistics)
print(df.describe())
# Filter data based on a condition
print(df[df['flow'] > 35])
# Group data by day and calculate mean flow
daily_mean = df.groupby(pd.Grouper(key='time', freq='D')).mean()
print(daily_mean)
# Plot the data
import matplotlib.pyplot as plt
plt.figure(figsize=(12, 6))
plt.plot(df['time'], df['flow'])
plt.xlabel('Time')
plt.ylabel('Flow (m³/s)')
plt.title('River Flow over Time')
plt.show()
# Resample data to a different frequency (e.g., hourly to daily)
daily_flows = df.resample('D', on='time').mean()
print(daily_flows)
# Handle missing data
print(df.isnull().sum()) # Check for missing values
df = df.dropna() # Drop rows with missing values
# Write the DataFrame back to a CSV file
df.to_csv('cleaned_flows.csv', index=False)
# -----------------------------------------------------------------------------
# Case Study 4: Analyzing AISC Steel Beam Data with Pandas
# -----------------------------------------------------------------------------
import pandas as pd
# Constants
load = 80 # kip
load_factor = 1.6
length_ft = 50 # feet
max_depth_in = 36 # inches
# Load the Excel file
df = pd.read_excel('aisc-shapes-database-v16.0.xlsx', sheet_name='Database v16.0')
# Convert relevant columns to numeric types
columns_to_convert = ['d', 'Zx', 'W']
df[columns_to_convert] = df[columns_to_convert].apply(pd.to_numeric, errors='coerce')
# Calculate the factored load
factored_load = load * load_factor
# Calculate the required maximum moment (kip-ft)
max_moment = factored_load * (length_ft / 4)
# Convert the maximum moment to kip-in by multiplying by 12 (1 ft = 12 in)
max_moment_kip_in = max_moment * 12
# Filter beams by depth, checking moment capacity, and ensuring only W shapes
suitable_beams = df[
(df['d'] <= max_depth_in) &
(df['Zx'] * 36 >= max_moment_kip_in) & # Assuming a yield stress of 36 ksi
(df['Type'].str.strip().str.startswith('W')) # Filter only W shapes
]
# Sort by weight to find the lightest beam
lightest_beam = suitable_beams.sort_values(by='W', ascending=True).head(1)
# Required Section Modulus
required_Zx = max_moment_kip_in / 36 # 36 ksi is the yield stress for A36 steel
print("Required Section Modulus:", required_Zx, "in^3")
# Display the lightest suitable W shape beam
print("The lightest suitable W shape beam that can resist a 50 kip point load at midspan within the given constraints:")
print(lightest_beam[['AISC_Manual_Label', 'd', 'Zx', 'W']])
# -----------------------------------------------------------------------------
# Pandas Methods Overview
# -----------------------------------------------------------------------------
# Data Loading and Saving
data = pd.read_csv('material_tests.csv')
data.to_csv('processed_data.csv', index=False)
# DataFrame Creation
data = pd.DataFrame({'Element': ['Beam', 'Column', 'Slab'], 'Capacity': [120, 180, 90]})
# DataFrame Exploration
data.head(3)
data.tail(3)
data.info()
data.describe()
# Data Manipulation
data.sort_values(by=['Element', 'Capacity'], ascending=[True, False])
clean_data = data.dropna(subset=['LoadingType'])
data['Displacement'] = data['Displacement'].fillna(0)
# Grouping and Aggregation
grouped = data.groupby('Element')
average_capacity = grouped['Capacity'].mean()
# Merging DataFrames
tests = pd.DataFrame({'SampleID': [1, 2, 3], 'Material': ['Steel', 'Concrete', 'Wood']})
locations = pd.DataFrame({'SampleID': [1, 3, 4], 'Location': ['Building A', 'Building B', 'Building C']})
combined = pd.merge(tests, locations, on='SampleID')
# Applying Functions
data['LoadRating'] = data.apply(lambda x: x['Load'] / x['Area'], axis=1)
# Pivot Tables
pivot = data.pivot_table(values='Deflection', index='Element', columns='LoadCase', aggfunc='mean')
# Data Visualization
data.plot(kind='scatter', x='Time', y='Displacement')
# Data Transformations
smoothed = data['Strain'].rolling(window=5).mean()
data = data[data['Description'].str.contains('Concrete')]
# Converting to NumPy Array
arr = data[['X', 'Y', 'Z']].to_numpy()
# Data Cleaning
unique_data = data.drop_duplicates(subset=['Station', 'Reading'])
data.loc[data['Capacity'] > 100, 'Status'] = 'High'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment