Skip to content

Instantly share code, notes, and snippets.

@bubbobne
Created June 28, 2024 11:21
Show Gist options
  • Save bubbobne/82b55e1de0c622debd64e17d480237ec to your computer and use it in GitHub Desktop.
Save bubbobne/82b55e1de0c622debd64e17d480237ec to your computer and use it in GitHub Desktop.
Merge meteotrentino data files
import pandas as pd
import numpy as np
import os
#fem_data = pd.read_csv('temperature_fem.csv', header=0, parse_dates=[1], index_col=1, squeeze=True)
variable = "temperature"
is_sum= False
stations = pd.read_csv("../stations_tot.csv", index_col=0)
dfs = {}
for key, row in stations.iterrows():
if 'T0' in row['nomebreve'] and os.path.exists("./"+variable+"/"+ row['nomebreve'].lower()+".csv"):
print(row['nomebreve'])
file_path = "./"+variable+"/"+ row['nomebreve'].lower()+".csv"
df = pd.read_csv(
file_path,
skiprows=4,
header=None,
names=['Datetime', 'Value', 'Qual', 'Metadata'],
parse_dates=[0], index_col=0, squeeze=True,
dayfirst=True,
low_memory=False,
encoding='latin1'
)
df.drop(columns=['Metadata'], inplace=True)
df = df[(df['Qual']!=140) & (df['Qual']!=255)]
df.drop(columns=['Qual'], inplace=True)
if is_sum:
df = df.resample('H').sum()
else:
df = df.resample('H').mean()
df.index = df.index - pd.Timedelta(hours=1)
dfs[key] = df['Value']
final_df = pd.concat(dfs, axis=1)
fem_data = pd.read_csv('temperature_fem.csv', header=0, parse_dates=[1], index_col=1, squeeze=True)
d = pd.merge( fem_data,final_df, left_index=True, right_index=True, how = "outer")
d.to_csv(variable+".csv")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment