Created
May 17, 2024 08:51
-
-
Save quinnkeast/287defec9aab68d6dcff867248d2c853 to your computer and use it in GitHub Desktop.
Convert and filter apple health data by device and time range
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
import sys | |
from datetime import datetime, timedelta | |
import pytz | |
# Check if the correct number of arguments was passed | |
if len(sys.argv) != 2: | |
print("Usage: python filter_csv.py <path_to_csv_file>") | |
sys.exit(1) | |
# Get the file path from command line argument | |
file_path = sys.argv[1] | |
# Define the device and date range | |
# Replace with | |
device_id = 'INSERT DEVICE ID' | |
# fixed reference date | |
reference_date = datetime(datetime.now(). year, 4, 30) # april 30 of this year | |
# create timezone-aware datetime objects | |
timezone = pytz.timezone("Europe/Berlin") | |
start_date = reference_date - timedelta(days=365) # Last 12 months before april 30 | |
start_date = timezone.localize(start_date) | |
end_date = timezone.localize(reference_date) | |
print(f"Filtering from {start_date} to {end_date}") | |
# Function to filter the CSV | |
def filter_csv(file_path): | |
try: | |
# open the file and check the first line | |
with open(file_path, 'r') as file: | |
first_line = file.readline().strip() | |
# determine if 'sep=;' line needs to be skipped | |
skip_rows = 1 if first_line.startswith('sep=') else 0 | |
# Load the CSV file | |
data = pd.read_csv(file_path, delimiter=';', skiprows=skip_rows) | |
# Convert dates to ensure format | |
data['startdate'] = pd.to_datetime(data['startdate'], utc=True, errors='coerce') | |
# Debug: column names | |
print("Columns found in CSV:", data.columns.tolist()) | |
print("Sample data:", data.head()) | |
# Filter by device and date range | |
# Assuming 'startdate' is sufficient for filtering for simplicity | |
filtered_data = data[ | |
(data['sourcename'].str.contains(device_id)) & | |
(data['startdate'] >= start_date) & | |
(data['startdate'] <= end_date) | |
] | |
# Save the filtered data | |
filtered_data.to_csv(file_path.replace('.csv', '_filtered.csv'), index=False) | |
except Exception as e: | |
print(f"An error occurred: {e}") | |
# Call the function with the provided file path | |
filter_csv(file_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment