Skip to content

Instantly share code, notes, and snippets.

@quinnkeast
Created May 17, 2024 08:51
Show Gist options
  • Save quinnkeast/287defec9aab68d6dcff867248d2c853 to your computer and use it in GitHub Desktop.
Save quinnkeast/287defec9aab68d6dcff867248d2c853 to your computer and use it in GitHub Desktop.
Convert and filter apple health data by device and time range
import pandas as pd
import sys
from datetime import datetime, timedelta
import pytz
# Check if the correct number of arguments was passed
if len(sys.argv) != 2:
print("Usage: python filter_csv.py <path_to_csv_file>")
sys.exit(1)
# Get the file path from command line argument
file_path = sys.argv[1]
# Define the device and date range
# Replace with
device_id = 'INSERT DEVICE ID'
# fixed reference date
reference_date = datetime(datetime.now(). year, 4, 30) # april 30 of this year
# create timezone-aware datetime objects
timezone = pytz.timezone("Europe/Berlin")
start_date = reference_date - timedelta(days=365) # Last 12 months before april 30
start_date = timezone.localize(start_date)
end_date = timezone.localize(reference_date)
print(f"Filtering from {start_date} to {end_date}")
# Function to filter the CSV
def filter_csv(file_path):
try:
# open the file and check the first line
with open(file_path, 'r') as file:
first_line = file.readline().strip()
# determine if 'sep=;' line needs to be skipped
skip_rows = 1 if first_line.startswith('sep=') else 0
# Load the CSV file
data = pd.read_csv(file_path, delimiter=';', skiprows=skip_rows)
# Convert dates to ensure format
data['startdate'] = pd.to_datetime(data['startdate'], utc=True, errors='coerce')
# Debug: column names
print("Columns found in CSV:", data.columns.tolist())
print("Sample data:", data.head())
# Filter by device and date range
# Assuming 'startdate' is sufficient for filtering for simplicity
filtered_data = data[
(data['sourcename'].str.contains(device_id)) &
(data['startdate'] >= start_date) &
(data['startdate'] <= end_date)
]
# Save the filtered data
filtered_data.to_csv(file_path.replace('.csv', '_filtered.csv'), index=False)
except Exception as e:
print(f"An error occurred: {e}")
# Call the function with the provided file path
filter_csv(file_path)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment