This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def gen_hist(df): # plot all relationships | |
| heads = list(df.columns) | |
| while heads: | |
| base = heads.pop(0) | |
| for i in heads: | |
| sns.distplot(df[base]) | |
| plt.show() | |
| gen_hist(df_num) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def eda(dataframe): | |
| # this code is from @ritikabhasker, slightly adapted, but mainly hers. | |
| print("**MISSING VALUES** \n", dataframe.isnull().sum(), "\n") | |
| print("**DATAFRAME INDEX** \n", dataframe.index, "\n") | |
| print("**DATAFRAME TYPES** \n", dataframe.dtypes, "\n") | |
| print("**DATAFRAME SHAPE** \n", dataframe.shape, "\n") | |
| print("**DATAFRAME DESCRIBE** \n", dataframe.describe(), "\n") | |
| print("**NUMBER OF UNIQUE VALUES PER COLUMN**") | |
| for item in dataframe: | |
| print(item) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Include this after you import matplotlib | |
| import matplotlib.pyplot as plt | |
| plt.rcParams['savefig.dpi'] = 500 | |
| . | |
| . | |
| . |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Include this after you import matplotlib | |
| import matplotlib.pyplot as plt | |
| plt.rcParams['savefig.dpi'] = 500 | |
| . | |
| . | |
| . |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| test_strip = 'Braund, Mr. Owen Harris' | |
| test_strip.split('.')[1].lstrip() | |
| # . split splits string into two tuples on either side of string | |
| # then I pull the second tuple | |
| #.lstrip removes leading white space by default | |
| bad[i].split('(')[1].lstrip().rstrip(')') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # import regex | |
| #generate code from https://regex101.com | |
| import re | |
| regex = r"\d+" | |
| for book in html.find_all('div', class_='booking'): | |
| matches = re.search(regex, book.text) #make sur eyou change from find iter |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| #Joe says to try and except every time | |
| import pandas as pd | |
| dc_eats = pd.DataFrame(columns=["name","location","price","bookings"]) | |
| # loop through each entry | |
| for entry in html.find_all('div', {'class':'result content-section-list-row cf with-times'}): | |
| # grab the name | |
| name = entry.find('span', {'class': 'rest-row-name-text'}).text | |
| # grab the location |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # clicking the 'next' button in selenium | |
| link = driver.find_element_by_link_text('Next') | |
| link.click() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| string = 'some string of characters' | |
| from nltk.stem import WordNetLemmatizer | |
| #instantiate | |
| lemmatizer = WordNetLemmatizer() | |
| #Before we can lemmitize our spam string we need to tokenize it. | |
| from nltk.tokenize import RegexpTokenizer | |
| tokenizer = RegexpTokenizer(r'\w+') |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| string = 'string of charactrers | |
| hvec = HashingVectorizer() | |
| hvec.fit([spam]) | |
| df = pd.DataFrame(hvec.transform([string]).todense()) | |
| df.transpose().sort_values(0, ascending=False).head(10).transpose() |
OlderNewer