Skip to content

Instantly share code, notes, and snippets.

View rtjeannier's full-sized avatar

Roland Jeannier rtjeannier

View GitHub Profile
import boto3
import os
from botocore.exceptions import ClientError
from cryptography.fernet import Fernet
from typing import Tuple, List
def create_s3_client():
"""Create and return an S3 client."""
return boto3.client('s3')
@rtjeannier
rtjeannier / df_cleaner.py
Created July 18, 2018 14:05
Clean dataframe columns
def clean_dataframe_columns(df):
df.columns = column_cleaner(df.columns)
# Takes a dataframe and cleans all the column names
def column_cleaner(columns):
clean_list = []
for c in columns:
c = clean_column(c)
clean_list.append(create_unique_name(c, clean_list))
return clean_list
@rtjeannier
rtjeannier / checkversion.py
Created December 6, 2017 19:07
check verison of library python
import sklearn
print('The scikit-learn version is {}.'.format(sklearn.__version__))
@rtjeannier
rtjeannier / recurisve_tree.py
Last active October 5, 2017 04:39
recursive tree
def num_nodes(n):
return 2**(n) - 1
def parent_of_value(value, tree):
for n in tree:
if n.value == value:
if n.parent != None:
return n.parent.value
else:
@rtjeannier
rtjeannier / gist:49a27e5886669445673c3735da251c7a
Created September 15, 2017 15:47
Remove file from history code
# making file *.csv gets rid of all csvs
git filter-branch --index-filter 'git rm -r --cached --ignore-unmatch <file/dir>' HEAD
# THIS IS DANGEROUS. WILL BREAK EVERYTHING. But it works if you are okay with that...
@rtjeannier
rtjeannier / calculate_feature_importance.py
Created September 1, 2017 17:37
feature importance for bagging trees
from sklearn.ensemble import BaggingClassifier
dtc_params = {
'max_features': [0.5, 0.7, 1.0],
'max_samples': [0.5, 0.7, 1.0],
'n_estimators': [2, 5, 10, 20],
}
dtc_gs = GridSearchCV(BaggingClassifier(), dtc_params, cv=5, verbose=1)
dtc_gs.fit(X, y)
@rtjeannier
rtjeannier / baggingClassifer.py
Created September 1, 2017 17:16
Baggig classifier grid search and random forrest
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import RandomForestClassifier
bc_params = {"base_estimator__max_depth": [3,5,10,20],
"base_estimator__max_features": [None, "auto"],
"base_estimator__min_samples_leaf": [1, 3, 5, 7, 10],
"base_estimator__min_samples_split": [2, 5, 7],
'bootstrap_features': [False, True],
@rtjeannier
rtjeannier / scraper_start.py
Created September 1, 2017 14:38
web scraper help some starter code
import requests
import bs4
from bs4 import BeautifulSoup
import time
import numpy as np
import pandas as pd
import time
from selenium.webdriver.chrome.options import Options
from selenium import webdriver
from selenium.common.exceptions import NoSuchElementException
@rtjeannier
rtjeannier / feature_importance_corr.py
Created August 31, 2017 21:26
Joe's feature importance plot with correlation
from sklearn.ensemble import ExtraTreesClassifier
plt.style.use('fivethirtyeight')
def feature_importance_eda(X, y):
'''Get an estimate of the feature importance of data'''
# Build a forest and compute the feature importances
forest = ExtraTreesClassifier(n_estimators=250,
random_state=0)
@rtjeannier
rtjeannier / terrorist_plot.html
Created August 28, 2017 04:01
terrorist by region plot
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="utf-8">
<title>Bokeh Plot</title>
<link rel="stylesheet" href="https://cdn.pydata.org/bokeh/release/bokeh-0.12.5.min.css" type="text/css" />