Skip to content

Instantly share code, notes, and snippets.

View joseph-allen's full-sized avatar

Joseph Allen joseph-allen

View GitHub Profile
@joseph-allen
joseph-allen / laerning_curve
Created December 29, 2017 14:31
learning curve, kfold and gridsearch
import matplotlib.pyplot as plt
import numpy as np
from sklearn.model_selection import GridSearchCV, StratifiedKFold, learning_curve
from sklearn.ensemble import GradientBoostingClassifier
def plot_learning_curve(estimator, title, X, y, ylim=None, cv=None,
n_jobs=-1, train_sizes=np.linspace(.1, 1.0, 5)):
"""Generate a simple plot of the test and training learning curve"""
plt.figure()
import seaborn as sns
g = sns.distplot(dataset["Feature"], color="m", label="Skewness : %.2f"%(dataset["Feature"].skew()))
g = g.legend(loc="best")
# Apply log to Feature to reduce skewness distribution
# dataset["Feature"] = dataset["Feature"].map(lambda i: np.log(i) if i > 0 else 0)
@joseph-allen
joseph-allen / useful imports
Created December 29, 2017 14:18
Useful imports for a jupyter investigation
# Ignore warnings
import warnings
warnings.filterwarnings('ignore')
# Handle table-like data and matrices
import numpy as np
import pandas as pd
# Modelling Algorithms
from sklearn.tree import DecisionTreeClassifier
import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
from sklearn.tree import DecisionTreeClassifier
def plot_histograms(df, variables, n_rows, n_cols):
fig = plt.figure(figsize=(16, 12))
for i, var_name in enumerate(variables):
ax = fig.add_subplot(n_rows, n_cols, i+1)
@pytest.fixture(params=[('input', 'expected')])
def get_input(request):
return request.param
@pytest.fixture
def get_function_result(get_input):
return test_function(get_input[0])
@joseph-allen
joseph-allen / gist:f50306ba76d5c539feeb281384702a80
Created September 28, 2017 11:46
exporting jupyter notebook without input
jupyter nbconvert --to pdf Report.ipynb --TemplateExporter.exclude_input=True
import pandas as pd
import sys
import itertools
def permute(df):
columns = [df[column] for column in list(df)]
uniq_columns = [columns[x].unique() for x in range(0, len(columns))]
return pd.DataFrame(list(itertools.product(*uniq_columns)),
#!/bin/bash
echo -n "GitHub User: "
read USER
echo -n "GitHub Password: "
read -s PASS
echo ""
echo -n "GitHub Repo (e.g. foo/bar): "
{
"cells": [
{
"cell_type": "code",
"execution_count": 518,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
/**
* @fileoverview Rule to flag use of _.cloneDeep
* @author Joseph Allen
*/
//------------------------------------------------------------------------------
// Rule Definition
//------------------------------------------------------------------------------
module.exports = function(context) {