Skip to content

Instantly share code, notes, and snippets.

Avatar

Joseph Allen joseph-allen

View GitHub Profile
View UKDSBrandedSeabornPlot.py
# import packages
import matplotlib as plt
import seaborn as sns
import pandas as pd
# set seaborn style
sns.set(rc={'figure.figsize':(12,8)})
sns.set(font_scale=1.5)
# create UKDS color palette
@joseph-allen
joseph-allen / outlier_detection
Created Dec 29, 2017
Tukey method outlier detection
View outlier_detection
import numpy as np
from collections import Counter
def detect_outliers(df, n, features):
"""
Takes a dataframe df of features and returns a list of the indices
corresponding to the observations containing more than n outliers according
to the Tukey method.
"""
@joseph-allen
joseph-allen / train_polynomial.py
Last active Apr 29, 2019
Quickly train a polynomial
View train_polynomial.py
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_absolute_error
def train_polynomial(df, train_upto,train_degree):
# build Polynomial features up to degree train_degree
p = PolynomialFeatures(degree=train_degree).fit(df[['dv','psi','temp1','temp2']])
@joseph-allen
joseph-allen / load_pickle.py
Created Apr 26, 2019
Demo of loading a pickle
View load_pickle.py
import pickle
import pandas as pd
import numpy as np
# Create sample data
df = pd.DataFrame(columns=['A','B','C'])
df.loc[0] = [12,42,'test']
# load stored model
loaded_model = pickle.load(open('Pickled_Model.pkl', 'rb'))
@joseph-allen
joseph-allen / Polynomial_preprocessing.py
Created Apr 17, 2019
generates polynomial features out of any
View Polynomial_preprocessing.py
from sklearn.preprocessing import PolynomialFeatures
p = PolynomialFeatures(degree=2).fit(df[['feature1','feature2]])
features = pd.DataFrame(p.transform(df[['feature1','feature2]]), columns=p.get_feature_names(df[['feature1','feature2]].columns))
features.head()
@joseph-allen
joseph-allen / read_multiple
Created Apr 10, 2019
Read multiple files in the same directory
View read_multiple
import glob
path = r'path to file' # use your path
all_files = glob.glob(path + "/*.csv")
li = []
for filename in all_files:
df = pd.read_csv(filename, index_col=None, header=0,sep=',|;')
print(filename)
@joseph-allen
joseph-allen / plot_multiple_ylabels
Created Dec 4, 2018
plots multiple y axes on one graph
View plot_multiple_ylabels
def plot_multi(data, cols=None, spacing=.1, **kwargs):
from pandas import plotting
# Get default color style from pandas - can be changed to any other color list
if cols is None: cols = data.columns
if len(cols) == 0: return
colors = getattr(getattr(plotting, '_style'), '_get_standard_colors')(num_colors=len(cols))
# First axis
View plotly line plot
import plotly as py
py.tools.set_credentials_file(username='YOURE_USERNAME', api_key='YOUR API KEY')
import cufflinks as cf
import pandas as pd
df = pd.read_csv('data.csv')
py.plotly.iplot([{
'x': df.var0,
'y': df[col],
@joseph-allen
joseph-allen / plot_dates.py
Created Nov 26, 2018
Plot a pandas dataframe of x over some datetime
View plot_dates.py
import pandas as pd
# Visualisation
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# Configure visualisations
%matplotlib inline
mpl.style.use( 'ggplot' )
View sklearn example params
## Random Forest params
rf_param_grid = {"max_depth": [None],
"max_features": [1, 3],
"min_samples_split": [2, 3],
"min_samples_leaf": [1, 3],
"bootstrap": [False],
"n_estimators" :[100],
"criterion": ["gini"]}
## Extra Trees params
You can’t perform that action at this time.