This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
jupyter-nbconvert --to slides MySlideshow.ipynb --reveal-prefix=reveal.js |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Add and commit all | |
git add -A && git commit -m "Your Message" | |
# Push to branch | |
git push -u origin <branch> | |
# undo commit | |
git reset HEAD~ | |
# remove tracked file | |
git rm --cached |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Assumed imports: | |
import pandas as pd | |
#John Galt's answer is basically a reduce operation. If I have more than a handful of dataframes, I'd put them in a list like this (generated via list comprehensions or loops or whatnot): | |
dfs = [df0, df1, df2, dfN] | |
#Assuming they have some common column, like name in your example, I'd do the following: | |
df_final = reduce(lambda left,right: pd.merge(left,right,on='name'), dfs) | |
#That way, your code should work with whatever number of dataframes you want to merge. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library("RSQLite") | |
# connect to the sqlite file | |
con = dbConnect(RSQLite::SQLite(), dbname="database.sqlite") | |
# get a list of all tables | |
alltables = dbListTables(con) | |
# get tables as df | |
df.country = dbGetQuery( con,'select * from Country' ) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import pandas as pd | |
from sqlalchemy import create_engine # database connection | |
import datetime as dt | |
from IPython.display import display | |
import matplotlib | |
import numpy as np | |
import matplotlib.pyplot as plt | |
pd.set_option('display.max_columns', 120) | |
%matplotlib inline |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(data.table) | |
library(bnlearn) | |
df <- fread("file.csv", sep="|", verbose=TRUE) | |
cols <- colnames(df, 3:24) | |
df <- as.data.frame(df) | |
df_temp <- data.frame(apply(df, 2, as.factor)) | |
df <- df_temp | |
rm(df_temp) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def supervised_learner(df, clf, train_list, testsize = 0.3, predictors=df.columns[1:], target=df.columns[-1]): | |
### Import packages | |
from sklearn.cross_validation import train_test_split | |
from sklearn import linear_model | |
from sklearn.linear_model import SGDClassifier | |
from sklearn.metrics import confusion_matrix | |
from sklearn.metrics import classification_report | |
import pandas as pd | |
import numpy as np | |
from sklearn.metrics import roc_curve |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def df_get_dummies(df, drops, cats, pkey=None, binary=1): | |
### Import packages | |
import pandas as pd | |
import numpy as np | |
### | |
# The following columns aren't used in modelling so drop | |
df = df.drop(drops, 1) | |
# Ensure BPE and LE are categories before getting dummies |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
df_out = pd.DataFrame(columns=df.columns) | |
df_temp = pd.DataFrame(columns=df.columns) | |
for elem in a_list: | |
df_temp = df[df.pkey==elem] | |
if len(df_temp) > a_number: | |
df_temp_ind = df_temp.index.map(lambda x: x if np.random.binomial(1, prob_keep)==1 else None) | |
df_temp = df_temp.loc[df_temp_ind,].dropna() | |
else: | |
pass | |
df_out = df_out.append(df_temp, ignore_index=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Inspired from here http://stackoverflow.com/questions/7015587/python-difference-of-2-datetimes-in-months | |
from datetime import datetime, timedelta | |
from calendar import monthrange | |
def monthdelta(df): | |
d1 = df[0] | |
d2 = df[1] | |
delta = 0 | |
while True: | |
mdays = monthrange(d1.year, d1.month)[1] | |
d1 += timedelta(days=mdays) |