Last active
May 17, 2024 08:54
-
-
Save quinnkeast/ee8a0fb5ac50ada0031169a754a0b104 to your computer and use it in GitHub Desktop.
Sum data
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| import sys | |
| from datetime import datetime, timedelta | |
| import pytz | |
| # Check if the correct number of arguments was passed | |
| if len(sys.argv) != 2: | |
| print("Usage: python filter_csv.py <path_to_csv_file>") | |
| sys.exit(1) | |
| # Get the file path from command line argument | |
| file_path = sys.argv[1] | |
| # Function to filter and consolidate the CSV | |
| def filter_consolidate_csv(file_path): | |
| try: | |
| # open the file and check the first line | |
| with open(file_path, 'r') as file: | |
| first_line = file.readline().strip() | |
| # determine if 'sep=;' line needs to be skipped | |
| skip_rows = 1 if first_line.startswith('sep=') else 0 | |
| # Load the CSV file | |
| data = pd.read_csv(file_path, delimiter=',', skiprows=skip_rows) | |
| # Debug: column names | |
| print("Columns found in CSV:", data.columns.tolist()) | |
| print("Sample data:", data.head()) | |
| # Ensure 'startdate' column exists | |
| if 'startdate' not in data.columns: | |
| print("Error: 'startdate' column is missing. Check column names in CSV.") | |
| return # Exit the function if 'startdate' column is not found | |
| # Convert dates to ensure format | |
| data['startdate'] = pd.to_datetime(data['startdate'], utc=True, errors='coerce') | |
| # Debug: column names | |
| print("Columns found in CSV:", data.columns.tolist()) | |
| print("Sample data:", data.head()) | |
| # Group by day and sum the values | |
| data['startdate'] = data['startdate'].dt.date # Normalize to date | |
| consolidated_data = data.groupby('startdate').agg({'value': 'sum'}).reset_index() | |
| # Save the consolidated data | |
| consolidated_data.to_csv(file_path.replace('.csv', '_consolidated.csv'), index=False) | |
| except Exception as e: | |
| print(f"An error occurred: {e}") | |
| # Call the function with the provided file path | |
| filter_consolidate_csv(file_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment