Skip to content

Instantly share code, notes, and snippets.

@aialenti
Last active March 27, 2024 14:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save aialenti/d7215cc3de25af8b8dfec4fa3a181637 to your computer and use it in GitHub Desktop.
Save aialenti/d7215cc3de25af8b8dfec4fa3a181637 to your computer and use it in GitHub Desktop.
import pandas as pd
import ast
from datetime import datetime
import numpy as np
def read_dataframes():
"""
Reads the activity weights and details CSV files into Pandas dataframes.
Returns:
tuple: A tuple containing two pandas DataFrames, the first for
activity weights and the second for activity details.
"""
activity_weights_df = pd.read_csv('activity_weights.csv')
activity_details_df = pd.read_csv('activity_details.csv')
daily_summaries_filtered_df = pd.read_csv('daily_summaries_filtered.csv')
return activity_weights_df, activity_details_df, daily_summaries_filtered_df
def find_max_weights(activity_weights_df):
"""
Finds the maximum weight for each exercise across all activities.
Args:
activity_weights_df (pd.DataFrame): Dataframe containing activity weights.
Returns:
dict: A dictionary with exercises as keys and their maximum weights as values.
"""
max_weights = {}
for index, row in activity_weights_df.iterrows():
weights_dict = ast.literal_eval(row['wkt_step_weights'])
for exercise, weights in weights_dict.items():
max_weight = max(weights)
max_weights[exercise] = max(max_weights.get(exercise, 0), max_weight)
return max_weights
def aggregate_activity_data(activity_details_df, activity_weights_df):
"""
Merges activity details and weights dataframes on 'activityId' and
calculates aggregates for each activity.
Args:
activity_details_df (pd.DataFrame): Dataframe containing activity details.
activity_weights_df (pd.DataFrame): Dataframe containing activity weights.
Returns:
dict: A dictionary with activity names as keys and dictionaries of aggregates as values.
"""
# Ensure 'Start Date' is in datetime format
activity_details_df['Start Date'] = pd.to_datetime(activity_details_df['Start Date'])
# Merge dataframes on 'activityId' and calculate aggregates
merged_df = pd.merge(activity_details_df, activity_weights_df, on="activityId", how="inner")
activity_aggregates_with_frequency = {}
for name, group in merged_df.groupby('Activity Name'):
total_sessions = len(group)
date_range = (group['Start Date'].max() - group['Start Date'].min()).days / 7
frequency_per_week = total_sessions / max(date_range, 1) # Avoid division by zero
# Existing aggregates
aggregates = {
'Average Moving Duration': group['Moving Duration'].mean(),
'Moving Duration/Duration Ratio': (group['Moving Duration'] / group['Duration']).mean(),
'Average HR': group['Average HR'].mean(),
'Average Calories': group['Calories'].mean(),
'Average Total Sets': group['Total Sets'].mean(),
'Average Total Reps': group['Total Reps'].mean(),
'Total Sessions': total_sessions,
'Frequency per Week': frequency_per_week
}
activity_aggregates_with_frequency[name] = aggregates
return activity_aggregates_with_frequency
def calculate_filtered_medians(df):
"""
Calculate medians for specified quantities in a DataFrame,
excluding rows where sleepingSeconds is less than 5000.
Args:
df (pd.DataFrame): The input DataFrame with columns 'totalKilocalories',
'activeKilocalories', 'restingHeartRate', and 'sleepingSeconds'.
Returns:
pd.Series: Median values for 'totalKilocalories', 'activeKilocalories',
'restingHeartRate', and 'sleepingSeconds', excluding
rows with sleepingSeconds < 5000.
"""
# Filter the DataFrame to exclude rows with sleepingSeconds < 5000
filtered_df = df[df['sleepingSeconds'] >= 5000]
# Calculate the median for the specified columns
median_values = filtered_df.median()
return median_values
activity_weights_df, activity_details_df, daily_summaries_filtered_df = read_dataframes()
max_weights = find_max_weights(activity_weights_df)
print(max_weights)
activity_aggregates = aggregate_activity_data(activity_details_df, activity_weights_df)
print(activity_aggregates)
medians = calculate_filtered_medians(daily_summaries_filtered_df)
print(medians)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment