joreilly86/#027 - Data Structures for Engineers: Pandas 101.py

## #027 - Data Structures for Engineers: Pandas 101.py
# -----------------------------------------------------------------------------
# #027 - Data Structures for Civil/Structural Engineers: Pandas 101
# -----------------------------------------------------------------------------

'''
Hi everybody from the flocode newsletter 👋

This is all the code from article #027 - Data Structures for Civil/Structural Engineers:
Pandas 101

You can find the supporting data files from the examples here - https://github.com/joreilly86/Engineering-Fundamentals/tree/main/Pandas
'''

# Importing pandas library
import pandas as pd

# -----------------------------------------------------------------------------
# Case Study 2: Analyzing Flow Data
# -----------------------------------------------------------------------------

import pandas as pd

# Read the CSV file into a pandas DataFrame
df = pd.read_csv('flows.csv', parse_dates=['time'])

# Print the first few rows
print(df.head())

# Get basic information about the DataFrame
print(df.info())

# Describe the data (summary statistics)
print(df.describe())

# Filter data based on a condition
print(df[df['flow'] > 35])

# Group data by day and calculate mean flow
daily_mean = df.groupby(pd.Grouper(key='time', freq='D')).mean()
print(daily_mean)

# Plot the data
import matplotlib.pyplot as plt

plt.figure(figsize=(12, 6))
plt.plot(df['time'], df['flow'])
plt.xlabel('Time')
plt.ylabel('Flow (m³/s)')
plt.title('River Flow over Time')
plt.show()

# Resample data to a different frequency (e.g., hourly to daily)
daily_flows = df.resample('D', on='time').mean()
print(daily_flows)

# Handle missing data
print(df.isnull().sum())  # Check for missing values
df = df.dropna()  # Drop rows with missing values

# Write the DataFrame back to a CSV file
df.to_csv('cleaned_flows.csv', index=False)

# -----------------------------------------------------------------------------
# Case Study 4: Analyzing AISC Steel Beam Data with Pandas
# -----------------------------------------------------------------------------

import pandas as pd

# Constants
load = 80  # kip
load_factor = 1.6
length_ft = 50  # feet
max_depth_in = 36  # inches

# Load the Excel file
df = pd.read_excel('aisc-shapes-database-v16.0.xlsx', sheet_name='Database v16.0')

# Convert relevant columns to numeric types
columns_to_convert = ['d', 'Zx', 'W']
df[columns_to_convert] = df[columns_to_convert].apply(pd.to_numeric, errors='coerce')

# Calculate the factored load
factored_load = load * load_factor

# Calculate the required maximum moment (kip-ft)
max_moment = factored_load * (length_ft / 4)

# Convert the maximum moment to kip-in by multiplying by 12 (1 ft = 12 in)
max_moment_kip_in = max_moment * 12

# Filter beams by depth, checking moment capacity, and ensuring only W shapes
suitable_beams = df[
    (df['d'] <= max_depth_in) &
    (df['Zx'] * 36 >= max_moment_kip_in) &  # Assuming a yield stress of 36 ksi
    (df['Type'].str.strip().str.startswith('W'))  # Filter only W shapes
]

# Sort by weight to find the lightest beam
lightest_beam = suitable_beams.sort_values(by='W', ascending=True).head(1)

# Required Section Modulus
required_Zx = max_moment_kip_in / 36  # 36 ksi is the yield stress for A36 steel
print("Required Section Modulus:", required_Zx, "in^3")

# Display the lightest suitable W shape beam
print("The lightest suitable W shape beam that can resist a 50 kip point load at midspan within the given constraints:")
print(lightest_beam[['AISC_Manual_Label', 'd', 'Zx', 'W']])


# -----------------------------------------------------------------------------
# Pandas Methods Overview
# -----------------------------------------------------------------------------

# Data Loading and Saving
data = pd.read_csv('material_tests.csv')
data.to_csv('processed_data.csv', index=False)

# DataFrame Creation
data = pd.DataFrame({'Element': ['Beam', 'Column', 'Slab'], 'Capacity': [120, 180, 90]})

# DataFrame Exploration
data.head(3)
data.tail(3)
data.info()
data.describe()

# Data Manipulation
data.sort_values(by=['Element', 'Capacity'], ascending=[True, False])
clean_data = data.dropna(subset=['LoadingType'])
data['Displacement'] = data['Displacement'].fillna(0)

# Grouping and Aggregation
grouped = data.groupby('Element')
average_capacity = grouped['Capacity'].mean()

# Merging DataFrames
tests = pd.DataFrame({'SampleID': [1, 2, 3], 'Material': ['Steel', 'Concrete', 'Wood']})
locations = pd.DataFrame({'SampleID': [1, 3, 4], 'Location': ['Building A', 'Building B', 'Building C']})
combined = pd.merge(tests, locations, on='SampleID')

# Applying Functions
data['LoadRating'] = data.apply(lambda x: x['Load'] / x['Area'], axis=1)

# Pivot Tables
pivot = data.pivot_table(values='Deflection', index='Element', columns='LoadCase', aggfunc='mean')

# Data Visualization
data.plot(kind='scatter', x='Time', y='Displacement')

# Data Transformations
smoothed = data['Strain'].rolling(window=5).mean()
data = data[data['Description'].str.contains('Concrete')]

# Converting to NumPy Array
arr = data[['X', 'Y', 'Z']].to_numpy()

# Data Cleaning
unique_data = data.drop_duplicates(subset=['Station', 'Reading'])
data.loc[data['Capacity'] > 100, 'Status'] = 'High'
	# -----------------------------------------------------------------------------
	# #027 - Data Structures for Civil/Structural Engineers: Pandas 101
	# -----------------------------------------------------------------------------

	'''
	Hi everybody from the flocode newsletter 👋

	This is all the code from article #027 - Data Structures for Civil/Structural Engineers:
	Pandas 101

	You can find the supporting data files from the examples here - https://github.com/joreilly86/Engineering-Fundamentals/tree/main/Pandas
	'''

	# Importing pandas library
	import pandas as pd

	# -----------------------------------------------------------------------------
	# Case Study 2: Analyzing Flow Data
	# -----------------------------------------------------------------------------

	import pandas as pd

	# Read the CSV file into a pandas DataFrame
	df = pd.read_csv('flows.csv', parse_dates=['time'])

	# Print the first few rows
	print(df.head())

	# Get basic information about the DataFrame
	print(df.info())

	# Describe the data (summary statistics)
	print(df.describe())

	# Filter data based on a condition
	print(df[df['flow'] > 35])

	# Group data by day and calculate mean flow
	daily_mean = df.groupby(pd.Grouper(key='time', freq='D')).mean()
	print(daily_mean)

	# Plot the data
	import matplotlib.pyplot as plt

	plt.figure(figsize=(12, 6))
	plt.plot(df['time'], df['flow'])
	plt.xlabel('Time')
	plt.ylabel('Flow (m³/s)')
	plt.title('River Flow over Time')
	plt.show()

	# Resample data to a different frequency (e.g., hourly to daily)
	daily_flows = df.resample('D', on='time').mean()
	print(daily_flows)

	# Handle missing data
	print(df.isnull().sum()) # Check for missing values
	df = df.dropna() # Drop rows with missing values

	# Write the DataFrame back to a CSV file
	df.to_csv('cleaned_flows.csv', index=False)

	# -----------------------------------------------------------------------------
	# Case Study 4: Analyzing AISC Steel Beam Data with Pandas
	# -----------------------------------------------------------------------------

	import pandas as pd

	# Constants
	load = 80 # kip
	load_factor = 1.6
	length_ft = 50 # feet
	max_depth_in = 36 # inches

	# Load the Excel file
	df = pd.read_excel('aisc-shapes-database-v16.0.xlsx', sheet_name='Database v16.0')

	# Convert relevant columns to numeric types
	columns_to_convert = ['d', 'Zx', 'W']
	df[columns_to_convert] = df[columns_to_convert].apply(pd.to_numeric, errors='coerce')

	# Calculate the factored load
	factored_load = load * load_factor

	# Calculate the required maximum moment (kip-ft)
	max_moment = factored_load * (length_ft / 4)

	# Convert the maximum moment to kip-in by multiplying by 12 (1 ft = 12 in)
	max_moment_kip_in = max_moment * 12

	# Filter beams by depth, checking moment capacity, and ensuring only W shapes
	suitable_beams = df[
	(df['d'] <= max_depth_in) &
	(df['Zx'] * 36 >= max_moment_kip_in) & # Assuming a yield stress of 36 ksi
	(df['Type'].str.strip().str.startswith('W')) # Filter only W shapes
	]

	# Sort by weight to find the lightest beam
	lightest_beam = suitable_beams.sort_values(by='W', ascending=True).head(1)

	# Required Section Modulus
	required_Zx = max_moment_kip_in / 36 # 36 ksi is the yield stress for A36 steel
	print("Required Section Modulus:", required_Zx, "in^3")

	# Display the lightest suitable W shape beam
	print("The lightest suitable W shape beam that can resist a 50 kip point load at midspan within the given constraints:")
	print(lightest_beam[['AISC_Manual_Label', 'd', 'Zx', 'W']])


	# -----------------------------------------------------------------------------
	# Pandas Methods Overview
	# -----------------------------------------------------------------------------

	# Data Loading and Saving
	data = pd.read_csv('material_tests.csv')
	data.to_csv('processed_data.csv', index=False)

	# DataFrame Creation
	data = pd.DataFrame({'Element': ['Beam', 'Column', 'Slab'], 'Capacity': [120, 180, 90]})

	# DataFrame Exploration
	data.head(3)
	data.tail(3)
	data.info()
	data.describe()

	# Data Manipulation
	data.sort_values(by=['Element', 'Capacity'], ascending=[True, False])
	clean_data = data.dropna(subset=['LoadingType'])
	data['Displacement'] = data['Displacement'].fillna(0)

	# Grouping and Aggregation
	grouped = data.groupby('Element')
	average_capacity = grouped['Capacity'].mean()

	# Merging DataFrames
	tests = pd.DataFrame({'SampleID': [1, 2, 3], 'Material': ['Steel', 'Concrete', 'Wood']})
	locations = pd.DataFrame({'SampleID': [1, 3, 4], 'Location': ['Building A', 'Building B', 'Building C']})
	combined = pd.merge(tests, locations, on='SampleID')

	# Applying Functions
	data['LoadRating'] = data.apply(lambda x: x['Load'] / x['Area'], axis=1)

	# Pivot Tables
	pivot = data.pivot_table(values='Deflection', index='Element', columns='LoadCase', aggfunc='mean')

	# Data Visualization
	data.plot(kind='scatter', x='Time', y='Displacement')

	# Data Transformations
	smoothed = data['Strain'].rolling(window=5).mean()
	data = data[data['Description'].str.contains('Concrete')]

	# Converting to NumPy Array
	arr = data[['X', 'Y', 'Z']].to_numpy()

	# Data Cleaning
	unique_data = data.drop_duplicates(subset=['Station', 'Reading'])
	data.loc[data['Capacity'] > 100, 'Status'] = 'High'