Skip to content

Instantly share code, notes, and snippets.


Joseph Allen joseph-allen

View GitHub Profile
joseph-allen /
Last active Apr 29, 2019
Quickly train a polynomial
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_absolute_error
def train_polynomial(df, train_upto,train_degree):
# build Polynomial features up to degree train_degree
p = PolynomialFeatures(degree=train_degree).fit(df[['dv','psi','temp1','temp2']])
joseph-allen /
Created Apr 26, 2019
Demo of loading a pickle
import pickle
import pandas as pd
import numpy as np
# Create sample data
df = pd.DataFrame(columns=['A','B','C'])
df.loc[0] = [12,42,'test']
# load stored model
loaded_model = pickle.load(open('Pickled_Model.pkl', 'rb'))
joseph-allen /
Created Apr 17, 2019
generates polynomial features out of any
from sklearn.preprocessing import PolynomialFeatures
p = PolynomialFeatures(degree=2).fit(df[['feature1','feature2]])
features = pd.DataFrame(p.transform(df[['feature1','feature2]]), columns=p.get_feature_names(df[['feature1','feature2]].columns))
joseph-allen / read_multiple
Created Apr 10, 2019
Read multiple files in the same directory
View read_multiple
import glob
path = r'path to file' # use your path
all_files = glob.glob(path + "/*.csv")
li = []
for filename in all_files:
df = pd.read_csv(filename, index_col=None, header=0,sep=',|;')
joseph-allen / plot_multiple_ylabels
Created Dec 4, 2018
plots multiple y axes on one graph
View plot_multiple_ylabels
def plot_multi(data, cols=None, spacing=.1, **kwargs):
from pandas import plotting
# Get default color style from pandas - can be changed to any other color list
if cols is None: cols = data.columns
if len(cols) == 0: return
colors = getattr(getattr(plotting, '_style'), '_get_standard_colors')(num_colors=len(cols))
# First axis
View plotly line plot
import plotly as py'YOURE_USERNAME', api_key='YOUR API KEY')
import cufflinks as cf
import pandas as pd
df = pd.read_csv('data.csv')
'x': df.var0,
'y': df[col],
joseph-allen /
Created Nov 26, 2018
Plot a pandas dataframe of x over some datetime
import pandas as pd
# Visualisation
import matplotlib as mpl
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
# Configure visualisations
%matplotlib inline 'ggplot' )
View sklearn example params
## Random Forest params
rf_param_grid = {"max_depth": [None],
"max_features": [1, 3],
"min_samples_split": [2, 3],
"min_samples_leaf": [1, 3],
"bootstrap": [False],
"n_estimators" :[100],
"criterion": ["gini"]}
## Extra Trees params
joseph-allen / laerning_curve
Created Dec 29, 2017
learning curve, kfold and gridsearch
View laerning_curve
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import GridSearchCV, StratifiedKFold, learning_curve
from sklearn.ensemble import GradientBoostingClassifier
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
n_jobs=-1, train_sizes=np.linspace(.1, 1.0, 5)):
"""Generate a simple plot of the test and training learning curve"""
import seaborn as sns
g = sns.distplot(dataset["Feature"], color="m", label="Skewness : %.2f"%(dataset["Feature"].skew()))
g = g.legend(loc="best")
# Apply log to Feature to reduce skewness distribution
# dataset["Feature"] = dataset["Feature"].map(lambda i: np.log(i) if i > 0 else 0)
You can’t perform that action at this time.