Skip to content

Instantly share code, notes, and snippets.

View alexanderholt's full-sized avatar

alexanderholt

View GitHub Profile
def gen_hist(df): # plot all relationships
heads = list(df.columns)
while heads:
base = heads.pop(0)
for i in heads:
sns.distplot(df[base])
plt.show()
gen_hist(df_num)
@alexanderholt
alexanderholt / eda
Last active October 16, 2017 13:12
def eda(dataframe):
# this code is from @ritikabhasker, slightly adapted, but mainly hers.
print("**MISSING VALUES** \n", dataframe.isnull().sum(), "\n")
print("**DATAFRAME INDEX** \n", dataframe.index, "\n")
print("**DATAFRAME TYPES** \n", dataframe.dtypes, "\n")
print("**DATAFRAME SHAPE** \n", dataframe.shape, "\n")
print("**DATAFRAME DESCRIBE** \n", dataframe.describe(), "\n")
print("**NUMBER OF UNIQUE VALUES PER COLUMN**")
for item in dataframe:
print(item)
@alexanderholt
alexanderholt / savehighres
Created October 10, 2017 12:55
Save a high res png
# Include this after you import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['savefig.dpi'] = 500
.
.
.
@alexanderholt
alexanderholt / savehighres
Created October 10, 2017 12:55
Save a high res png
# Include this after you import matplotlib
import matplotlib.pyplot as plt
plt.rcParams['savefig.dpi'] = 500
.
.
.
@alexanderholt
alexanderholt / removecharfromstring
Last active October 10, 2017 13:15
Removing using .split() .lstrip .rstrip()
test_strip = 'Braund, Mr. Owen Harris'
test_strip.split('.')[1].lstrip()
# . split splits string into two tuples on either side of string
# then I pull the second tuple
#.lstrip removes leading white space by default
bad[i].split('(')[1].lstrip().rstrip(')')
# import regex
#generate code from https://regex101.com
import re
regex = r"\d+"
for book in html.find_all('div', class_='booking'):
matches = re.search(regex, book.text) #make sur eyou change from find iter
#Joe says to try and except every time
import pandas as pd
dc_eats = pd.DataFrame(columns=["name","location","price","bookings"])
# loop through each entry
for entry in html.find_all('div', {'class':'result content-section-list-row cf with-times'}):
# grab the name
name = entry.find('span', {'class': 'rest-row-name-text'}).text
# grab the location
# clicking the 'next' button in selenium
link = driver.find_element_by_link_text('Next')
link.click()
@alexanderholt
alexanderholt / nltk_import.py
Last active October 25, 2017 14:47
Natural language toolkit Lemmatize
string = 'some string of characters'
from nltk.stem import WordNetLemmatizer
#instantiate
lemmatizer = WordNetLemmatizer()
#Before we can lemmitize our spam string we need to tokenize it.
from nltk.tokenize import RegexpTokenizer
tokenizer = RegexpTokenizer(r'\w+')
string = 'string of charactrers
hvec = HashingVectorizer()
hvec.fit([spam])
df = pd.DataFrame(hvec.transform([string]).todense())
df.transpose().sort_values(0, ascending=False).head(10).transpose()