Skip to content

Instantly share code, notes, and snippets.

@rmania
rmania / bag_api.py
Created December 4, 2017 13:20
Call the BAG API #AmsterdamCityData
import pprint
import requests
import json
pc_list = ['1069KJ', '1058NH', '1057PV']
# or 4-digit postal codes
pc_list = ['1069', '1058', '1057']
num_url = 'https://api.data.amsterdam.nl/bag/nummeraanduiding/'
# totaal resultaten opvangen in results list..
@rmania
rmania / query_postgresql_database.py
Created December 4, 2017 13:04
query data from a Postgresql db, return a Pandas Dataframe. --schema --password etc. to be stored in an auth.conf file #AmsterdamCityData
# script with 2 ways to push to postgresql database
import pandas as pd
import psycopg2
import logging
import configparser
from sqlalchemy import create_engine
# config parser
config = configparser.RawConfigParser()
config.read('auth.conf')
@rmania
rmania / geo_conversion.py
Last active December 1, 2017 14:04
common geo manipulations
from functools import partial
import pyproj as proj
from shapely.ops import transform
from shapely.geometry import mapping, shape
import json
def rd2wgsGeojson(geojson):
# convert geojson from RD new to WSG84
reprojection = partial(proj.transform,
# Source coordinate system
@rmania
rmania / json_to_csv.py
Created November 9, 2017 10:12
Convert JSON to CSV
from collections import OrderedDict
def json_to_csv(input_f, output_f):
""" Convert JSON to CSV """
input_data = json.load(input_f, object_pairs_hook=OrderedDict)
if isinstance(input_data, list):
list_of_dicts = input_data
fieldnames = list_of_dicts[0].keys()
@rmania
rmania / ml_processing.py
Last active July 7, 2017 20:37
some useful ml processing functions
############ ExtraTreesRegressor , TimeSeriesSplit
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
# Model selection
etr = ExtraTreesRegressor(n_estimators = 100, n_jobs=-1)
# Predict, TimeSeriesSplit
tscv = TimeSeriesSplit(n_splits=5)
results = []
@rmania
rmania / ml_preprocessing_code_snippets.py
Last active March 30, 2017 19:36
machine learning preprocessing and feature generation code snippets
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import Imputer
# some functions executing some basic preprocessing steps for ml
predict_cols = ['x']
feature_cols = list(set(df.columns) - set(predict_cols) - {'source'})
# PREPROCESSING STEPS
@rmania
rmania / pyspark_df_ops.py
Last active March 23, 2017 22:22
common spark dataframe operations -pyspark
from pyspark.sql import functions as F
# remap one columns value F.when, otherwise
df = df.withColumn('colx', F.when(F.col('prefix')=='EZS', 'EZY').otherwise(F.col('prefix')))
# remap one column with double when condition. f.i. Reassign pier_in 'D' to DN/DSN based on incoming gate number
luggage_df = (luggage_df.withColumn('pier_in',
F.when((F.col('vop_in').isin(DS)) & (F.col('pier_in') == 'D'), 'DS')
.when((F.col('vop_in').isin(DNS)) & (F.col('pier_in') == 'D'), 'DNS')
.otherwise(F.col('pier_in')))
@rmania
rmania / matplotlib_setting_tricks.py
Last active March 21, 2017 16:33
tricks for manipulating the settings of matplotlib graphs
# modify color of yticklabels for the current axes
[i.set_color("red") for i in plt.gca().get_yticklabels()]
# get all tweakable parameters of 'ytick' etc..
[(param, value) for param, value in plt.rcParams.items() if 'ytick' in param]
# get the yticklabel texts
[item.get_text() for item in axi.get_yticklabels()]
#change label display parameters like fontsize
@rmania
rmania / geopandas_convert_geometry_3D_2D.py
Created March 13, 2017 21:06
flatten geometry series (3D to 2D) in geopandas dataframe
# Often when reading in a ShapeFile from Basemap, you'll get: "ValueError: readshapefile can only handle 2D shape types"
# A trick can be to convert your geometry in your GeoPandas Dataframe and restoring the new flattened 2D geometry
# series back into a shapefile and try again.
# edit from http://stackoverflow.com/questions/33417764/basemap-readshapefile-valueerror
from shapely.geometry import Polygon, MultiPolygon, shape, Point
import geopandas as gp
def convert_3D_2D(geometry):
'''