Skip to content

Instantly share code, notes, and snippets.

@kudkudak
Last active August 29, 2015 14:10
Show Gist options
  • Save kudkudak/7dfb6d3e7a1305b7b5b5 to your computer and use it in GitHub Desktop.
Save kudkudak/7dfb6d3e7a1305b7b5b5 to your computer and use it in GitHub Desktop.
Parse meto data
import pandas as pd
import numpy as np
def get_city_meteo(city):
a = [pd.read_csv(os.path.join(c["DATA_DIR"], str(city)+"-201"+str(i)+".csv"), sep=",") for i in range(4)]
csv = pd.concat(a)
# Do simple transformations
columns = ["date_year", "date_month", "date_day", "date_hour", "cloud_amount", "pressure", "temperature", "wind_speed", "percipitation_amount"]
csv.index = range(len(csv))
csv = csv[columns]
csv = csv.replace("unknown", np.nan)
csv = csv.replace(np.NAN, np.nan)
csv = csv.replace("trace", 0)
csv = csv.astype("float")
csv = csv.interpolate()
data = [] # we will build new csv row by row
HOUR_COL = 3
for id in range(len(csv)-1):
hour_cur = int(np.around(csv.values[id][HOUR_COL]))
hour_next = int(np.around(csv.values[id+1][HOUR_COL]))
if hour_next < hour_cur:
hour_next += 24
cur = csv.values[id]
nxt = csv.values[id+1]
# excluding next copy rows with simple interpolation
diff = float(hour_next - hour_cur)
for i in range(hour_next - hour_cur): #not plus one!
blend = i/diff
data.append((1.0-blend)*cur + blend*nxt)
# Fix hour
data[-1][HOUR_COL] = cur[HOUR_COL] + i
data.append(list(csv.values[-1]))
while int(data[-1][HOUR_COL]) != 23:
cp_data = list(data[-1])
cp_data[HOUR_COL] += 1
data.append(cp_data)
data = np.array(data)
return data
def get_meteo_data():
get_city_meteo("lodz")
cities = ["bialystok","gorzow","katowice","kielce","krakow","lodz","olszyn","opole","rzeszow","warszawa","wroclaw"]
M = np.hstack([get_city_meteo(city) for city in cities])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment