Skip to content

Instantly share code, notes, and snippets.

import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
# NOTE: Make sure that the class is labeled 'class' in the data file
tpot_data = np.recfromcsv('PATH/TO/DATA/FILE', delimiter='COLUMN_SEPARATOR', dtype=np.float64)
features = np.delete(tpot_data.view(np.float64).reshape(tpot_data.size, -1), tpot_data.dtype.names.index('class'), axis=1)
training_features, testing_features, training_classes, testing_classes = \
train_test_split(features, tpot_data['class'], random_state=42)
r = requests.get('http://api.gios.gov.pl/pjp-api/rest/station/findAll')
allStations = json_normalize(r.json())
print(allStations[allStations["city.name"] == u"Gdańsk"])
addressStreet city city.commune.communeName \
2 ul. Powstańców Warszawskich NaN Gdańsk
14 ul. Kaczeńce NaN Gdańsk
26 ul. Wyzwolenia NaN Gdańsk
67 ul. Leczkowa NaN Gdańsk
75 ul. Ostrzycka NaN Gdańsk
city.commune.districtName city.commune.provinceName city.id city.name \
2 Gdańsk POMORSKIE 218.0 Gdańsk
14 Gdańsk POMORSKIE 218.0 Gdańsk
stationId = 733
r = requests.get('http://api.gios.gov.pl/pjp-api/rest/station/sensors/' + str(stationId))
sensors = json_normalize(r.json())
print(sensors)
id param.idParam param.paramCode param.paramFormula \
0 4720 8 CO CO
1 4727 3 PM10 PM10
2 4723 6 NO2 NO2
3 4725 5 O3 O3
4 4730 1 SO2 SO2
param.paramName sensorDateEnd sensorDateStart stationId
0 tlenek węgla None 1998-05-01 12:00:00 733
1 pył zawieszony PM10 None 1998-05-01 12:00:00 733
sensorId = 4727
r = requests.get('http://api.gios.gov.pl/pjp-api/rest/data/getData/' + str(sensorId))
concentration = json_normalize(r.json())
concentrationFrame = pd.DataFrame()
concentrationFrame["dates"] = [d[u'date'] for d in concentration["values"].values.item()]
concentrationFrame["values"] = [d[u'value'] for d in concentration["values"].values.item()]
concentrationFrame.set_index(["dates"], inplace=True)
concentrationFrame = concentrationFrame.iloc[::-1]
values
dates
2017-04-16 01:00:00 18.64540
2017-04-16 02:00:00 8.53258
2017-04-16 03:00:00 3.52958
2017-04-16 04:00:00 2.12867
2017-04-16 05:00:00 1.00000
2017-04-16 06:00:00 12.96610
2017-04-16 07:00:00 13.20580
2017-04-16 08:00:00 2.32258
oneHour = bigDataFrame['2015-06-12 08:00:00':'2015-06-12 08:00:00']
oneHour.index = oneHour.index.droplevel(level=0)
oneHour["coords"] = oneHour.index.map(lambda name: map(tuple,stations[stations["Kod stacji"] == name][[u'WGS84 φ N', u'WGS84 λ E']].values)[0])
pm10vals = oneHour[["PM10", "coords"]]
pm10vals.dropna(inplace=True)
oneHour[["lat", "long"]] = oneHour["coords"].apply(pd.Series)
oneHour.drop("coords", axis=1, inplace=True)