Last active
April 12, 2024 15:10
-
-
Save joreilly86/71bc39b52fcfc93bbb9a8724c99e9896 to your computer and use it in GitHub Desktop.
#027 - Data Structures for Engineers: Pandas 101
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# ----------------------------------------------------------------------------- | |
# #027 - Data Structures for Civil/Structural Engineers: Pandas 101 | |
# ----------------------------------------------------------------------------- | |
''' | |
Hi everybody from the flocode newsletter 👋 | |
This is all the code from article #027 - Data Structures for Civil/Structural Engineers: | |
Pandas 101 | |
You can find the supporting data files from the examples here - https://github.com/joreilly86/Engineering-Fundamentals/tree/main/Pandas | |
''' | |
# Importing pandas library | |
import pandas as pd | |
# ----------------------------------------------------------------------------- | |
# Case Study 2: Analyzing Flow Data | |
# ----------------------------------------------------------------------------- | |
import pandas as pd | |
# Read the CSV file into a pandas DataFrame | |
df = pd.read_csv('flows.csv', parse_dates=['time']) | |
# Print the first few rows | |
print(df.head()) | |
# Get basic information about the DataFrame | |
print(df.info()) | |
# Describe the data (summary statistics) | |
print(df.describe()) | |
# Filter data based on a condition | |
print(df[df['flow'] > 35]) | |
# Group data by day and calculate mean flow | |
daily_mean = df.groupby(pd.Grouper(key='time', freq='D')).mean() | |
print(daily_mean) | |
# Plot the data | |
import matplotlib.pyplot as plt | |
plt.figure(figsize=(12, 6)) | |
plt.plot(df['time'], df['flow']) | |
plt.xlabel('Time') | |
plt.ylabel('Flow (m³/s)') | |
plt.title('River Flow over Time') | |
plt.show() | |
# Resample data to a different frequency (e.g., hourly to daily) | |
daily_flows = df.resample('D', on='time').mean() | |
print(daily_flows) | |
# Handle missing data | |
print(df.isnull().sum()) # Check for missing values | |
df = df.dropna() # Drop rows with missing values | |
# Write the DataFrame back to a CSV file | |
df.to_csv('cleaned_flows.csv', index=False) | |
# ----------------------------------------------------------------------------- | |
# Case Study 4: Analyzing AISC Steel Beam Data with Pandas | |
# ----------------------------------------------------------------------------- | |
import pandas as pd | |
# Constants | |
load = 80 # kip | |
load_factor = 1.6 | |
length_ft = 50 # feet | |
max_depth_in = 36 # inches | |
# Load the Excel file | |
df = pd.read_excel('aisc-shapes-database-v16.0.xlsx', sheet_name='Database v16.0') | |
# Convert relevant columns to numeric types | |
columns_to_convert = ['d', 'Zx', 'W'] | |
df[columns_to_convert] = df[columns_to_convert].apply(pd.to_numeric, errors='coerce') | |
# Calculate the factored load | |
factored_load = load * load_factor | |
# Calculate the required maximum moment (kip-ft) | |
max_moment = factored_load * (length_ft / 4) | |
# Convert the maximum moment to kip-in by multiplying by 12 (1 ft = 12 in) | |
max_moment_kip_in = max_moment * 12 | |
# Filter beams by depth, checking moment capacity, and ensuring only W shapes | |
suitable_beams = df[ | |
(df['d'] <= max_depth_in) & | |
(df['Zx'] * 36 >= max_moment_kip_in) & # Assuming a yield stress of 36 ksi | |
(df['Type'].str.strip().str.startswith('W')) # Filter only W shapes | |
] | |
# Sort by weight to find the lightest beam | |
lightest_beam = suitable_beams.sort_values(by='W', ascending=True).head(1) | |
# Required Section Modulus | |
required_Zx = max_moment_kip_in / 36 # 36 ksi is the yield stress for A36 steel | |
print("Required Section Modulus:", required_Zx, "in^3") | |
# Display the lightest suitable W shape beam | |
print("The lightest suitable W shape beam that can resist a 50 kip point load at midspan within the given constraints:") | |
print(lightest_beam[['AISC_Manual_Label', 'd', 'Zx', 'W']]) | |
# ----------------------------------------------------------------------------- | |
# Pandas Methods Overview | |
# ----------------------------------------------------------------------------- | |
# Data Loading and Saving | |
data = pd.read_csv('material_tests.csv') | |
data.to_csv('processed_data.csv', index=False) | |
# DataFrame Creation | |
data = pd.DataFrame({'Element': ['Beam', 'Column', 'Slab'], 'Capacity': [120, 180, 90]}) | |
# DataFrame Exploration | |
data.head(3) | |
data.tail(3) | |
data.info() | |
data.describe() | |
# Data Manipulation | |
data.sort_values(by=['Element', 'Capacity'], ascending=[True, False]) | |
clean_data = data.dropna(subset=['LoadingType']) | |
data['Displacement'] = data['Displacement'].fillna(0) | |
# Grouping and Aggregation | |
grouped = data.groupby('Element') | |
average_capacity = grouped['Capacity'].mean() | |
# Merging DataFrames | |
tests = pd.DataFrame({'SampleID': [1, 2, 3], 'Material': ['Steel', 'Concrete', 'Wood']}) | |
locations = pd.DataFrame({'SampleID': [1, 3, 4], 'Location': ['Building A', 'Building B', 'Building C']}) | |
combined = pd.merge(tests, locations, on='SampleID') | |
# Applying Functions | |
data['LoadRating'] = data.apply(lambda x: x['Load'] / x['Area'], axis=1) | |
# Pivot Tables | |
pivot = data.pivot_table(values='Deflection', index='Element', columns='LoadCase', aggfunc='mean') | |
# Data Visualization | |
data.plot(kind='scatter', x='Time', y='Displacement') | |
# Data Transformations | |
smoothed = data['Strain'].rolling(window=5).mean() | |
data = data[data['Description'].str.contains('Concrete')] | |
# Converting to NumPy Array | |
arr = data[['X', 'Y', 'Z']].to_numpy() | |
# Data Cleaning | |
unique_data = data.drop_duplicates(subset=['Station', 'Reading']) | |
data.loc[data['Capacity'] > 100, 'Status'] = 'High' | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment