Skip to content

Instantly share code, notes, and snippets.

View joseph-allen's full-sized avatar

Joseph Allen joseph-allen

View GitHub Profile
@joseph-allen
joseph-allen / UKDSBrandedSeabornPlot.py
Created November 12, 2020 17:42
Branded Seaborn plot
# import packages
import matplotlib as plt
import seaborn as sns
import pandas as pd
# set seaborn style
sns.set(rc={'figure.figsize':(12,8)})
sns.set(font_scale=1.5)
# create UKDS color palette
@joseph-allen
joseph-allen / train_polynomial.py
Last active April 29, 2019 11:00
Quickly train a polynomial
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_absolute_error
def train_polynomial(df, train_upto,train_degree):
# build Polynomial features up to degree train_degree
p = PolynomialFeatures(degree=train_degree).fit(df[['dv','psi','temp1','temp2']])
@joseph-allen
joseph-allen / load_pickle.py
Created April 26, 2019 16:05
Demo of loading a pickle
import pickle
import pandas as pd
import numpy as np
# Create sample data
df = pd.DataFrame(columns=['A','B','C'])
df.loc[0] = [12,42,'test']
# load stored model
loaded_model = pickle.load(open('Pickled_Model.pkl', 'rb'))
@joseph-allen
joseph-allen / Polynomial_preprocessing.py
Created April 17, 2019 12:26
generates polynomial features out of any
from sklearn.preprocessing import PolynomialFeatures
p = PolynomialFeatures(degree=2).fit(df[['feature1','feature2]])
features = pd.DataFrame(p.transform(df[['feature1','feature2]]), columns=p.get_feature_names(df[['feature1','feature2]].columns))
features.head()
@joseph-allen
joseph-allen / read_multiple
Created April 10, 2019 18:39
Read multiple files in the same directory
import glob
path = r'path to file' # use your path
all_files = glob.glob(path + "/*.csv")
li = []
for filename in all_files:
df = pd.read_csv(filename, index_col=None, header=0,sep=',|;')
print(filename)
@joseph-allen
joseph-allen / plot_multiple_ylabels
Created December 4, 2018 14:00
plots multiple y axes on one graph
def plot_multi(data, cols=None, spacing=.1, **kwargs):
from pandas import plotting
# Get default color style from pandas - can be changed to any other color list
if cols is None: cols = data.columns
if len(cols) == 0: return
colors = getattr(getattr(plotting, '_style'), '_get_standard_colors')(num_colors=len(cols))
# First axis
import plotly as py
py.tools.set_credentials_file(username='YOURE_USERNAME', api_key='YOUR API KEY')
import cufflinks as cf
import pandas as pd
df = pd.read_csv('data.csv')
py.plotly.iplot([{
'x': df.var0,
'y': df[col],
@joseph-allen
joseph-allen / plot_dates.py
Created November 26, 2018 12:47
Plot a pandas dataframe of x over some datetime
import pandas as pd
# Visualisation
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# Configure visualisations
%matplotlib inline
mpl.style.use( 'ggplot' )
## Random Forest params
rf_param_grid = {"max_depth": [None],
"max_features": [1, 3],
"min_samples_split": [2, 3],
"min_samples_leaf": [1, 3],
"bootstrap": [False],
"n_estimators" :[100],
"criterion": ["gini"]}
## Extra Trees params
@joseph-allen
joseph-allen / laerning_curve
Created December 29, 2017 14:31
learning curve, kfold and gridsearch
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import GridSearchCV, StratifiedKFold, learning_curve
from sklearn.ensemble import GradientBoostingClassifier
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
n_jobs=-1, train_sizes=np.linspace(.1, 1.0, 5)):
"""Generate a simple plot of the test and training learning curve"""
plt.figure()