This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def plot_pdp(model, X, feature, target=False, return_pd=False, y_pct=True, figsize=(10,9), norm_hist=True, dec=.5): | |
# Get partial dependence | |
pardep = partial_dependence(model, X, [feature]) | |
# Get min & max values | |
xmin = pardep[1][0].min() | |
xmax = pardep[1][0].max() | |
ymin = pardep[0][0].min() | |
ymax = pardep[0][0].max() | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name | review_count | rating | tags | ||
---|---|---|---|---|---|
0 | The Calaveras | 103 | 4.5 | ['bars', 'mexican', 'tapas', 'small', 'plates', '2'] | |
1 | Las Catrinas Mexican Bar & Eatery | 301 | 4.0 | ['mexican', 'cocktail', 'bars', '2'] | |
2 | Chano's Cantina | 165 | 4.0 | ['cocktail', 'bars', 'new', 'mexican', 'cuisine', '2'] | |
3 | Maizal Restaurant & Tequila Bar | 295 | 4.0 | ['mexican', 'cocktail', 'bars', '2'] | |
4 | Juquila Kitchen and Bar | 98 | 4.0 | ['new', 'mexican', 'cuisine', 'tacos', 'cocktail', 'bars', '2'] |
We can make this file beautiful and searchable if this error is corrected: Unclosed quoted field in line 10.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name,categories,rating,price | |
Tacuba,"[{'alias': 'mexican', 'title': 'Mexican'}, {'alias': 'tapas', 'title': 'Tapas Bars'}, {'alias': 'latin', 'title': 'Latin American'}]",3.5,$$ | |
Mi Espiguita Taqueria,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.5,$ | |
El Mero Mero,"[{'alias': 'mexican', 'title': 'Mexican'}]",4.5,$$ | |
Hoja Santa,"[{'alias': 'newmexican', 'title': 'New Mexican Cuisine'}, {'alias': 'mexican', 'title': 'Mexican'}]",4.5, | |
The Calaveras,"[{'alias': 'bars', 'title': 'Bars'}, {'alias': 'mexican', 'title': 'Mexican'}, {'alias': 'tapasmallplates', 'title': 'Tapas/Small Plates'}]",4.5,$$ | |
Athens Grill & Sports Bar,"[{'alias': 'mexican', 'title': 'Mexican'}]",4,$ | |
Mezquite Restaurant,"[{'alias': 'mexican', 'title': 'Mexican'}, {'alias': 'peruvian', 'title': 'Peruvian'}, {'alias': 'seafood', 'title': 'Seafood'}]",4,$$ | |
Fresco's Cantina,"[{'alias': 'mexican', 'title': 'Mexican'}, {'alias': 'latin', 'title': 'Latin American'}, {'alias': 'newmexican', 'title': 'New Mexican Cuisine'}]",4.5,$$ | |
Chela & Garnacha,"[{'al |
We can make this file beautiful and searchable if this error is corrected: It looks like row 3 should actually have 17 columns, instead of 5 in line 2.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
,id,alias,name,image_url,is_closed,url,review_count,categories,rating,coordinates,transactions,price,location,phone,display_phone,distance | |
0,EHUNrnIgnhwTnpOm3gEESg,tacuba-astoria,Tacuba,https://s3-media4.fl.yelpcdn.com/bphoto/Q6jPz4xg6QPh4NElULobrA/o.jpg,False,https://www.yelp.com/biz/tacuba-astoria?adjust_creative=TKz2edtltxgYtPzgSrH9EQ&utm_campaign=yelp_api_v3&utm_medium=api_v3_business_search&utm_source=TKz2edtltxgYtPzgSrH9EQ,645,"[{'alias': 'mexican', 'title': 'Mexican'}, {'alias': 'tapas', 'title': 'Tapas Bars'}, {'alias': 'latin', 'title': 'Latin American'}]",3.5,"{'latitude': 40.75585, 'longitude': -73.92447}","['delivery', 'pickup']",$$,"{'address1': '35-01 36th St', 'address2': '', 'address3': '', 'city': 'Astoria', 'zip_code': '11106', 'country': 'US', 'state': 'NY', 'display_address': ['35-01 36th St', 'Astoria, NY 11106']}",+17187862727,(718) 786-2727,1229.3517053099579 | |
1,yvva7IYpD6J7OfKlCdQrkw,mi-espiguita-taqueria-astoria,Mi Espiguita Taqueria,https://s3-media2.fl.yelpcdn.com/bphoto/TEho39G01VJX |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Create nested dictionaries to map grades to year and neighbor states | |
neighbor_grades = {} | |
for year in df.Year.unique(): | |
neighbor_grades[year] = {} | |
for state in df.State.unique(): | |
# Default score is state score if no neighboring state (i.e. Alaska) | |
default_score = df[(df.State==state) & (df.Year==year)]['State Grade'].values[0] | |
scores = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import urllib.request | |
url = 'https://github.com/ritvikmath/StarbucksStoreScraping/raw/master/us_states.geojson' | |
urllib.request.urlretrieve(url, 'international_data/us_states.geojson') | |
#manipulate complex shapes | |
from shapely.geometry import Polygon, MultiPolygon | |
#manipulate json objects | |
import json |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import scipy.stats | |
# Get unique list of regions | |
regions = df['Region'].unique() | |
# For each region, compare quantities of the region against rest of world | |
for region in regions: | |
test = df['Quantity'].loc[(df['Region'] == region)] | |
control = df['Quantity'].loc[(df['Region'] != region)] | |
results = stats.ttest_ind(test, control) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
variable_name | sum_sq | df | F | PR(>F) | |
---|---|---|---|---|---|
C(Employee) | 8259.303410743278 | 8.0 | 3.010142879436235 | 0.00231317377883642 | |
C(Region) | 40031.87438364985 | 8.0 | 14.589809289447707 | 6.6425808706248666e-21 | |
C(CategoryName) | 959.7513428085351 | 7.0 | 0.3997554241095136 | 0.9028853563957704 | |
Discount | 6925.520309945907 | 1.0 | 20.19231367164281 | 7.392950240262248e-06 | |
Residual | 704133.9307385169 | 2053.0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import statsmodels.api as sm | |
from statsmodels.formula.api import ols | |
# Define target & independent variables | |
# C(var_name) indicates variable is categorical | |
formula = 'Quantity ~ Discount + C(Employee) + C(Region) + C(CategoryName)' | |
# Fit ordinary least squares model to data | |
lm = ols(formula, df).fit() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
OrderId | UnitPrice | Quantity | Discount | CustomerId | Region | CategoryName | Employee | |
---|---|---|---|---|---|---|---|---|
10248 | 14.0 | 12 | 0.0 | VINET | Western Europe | Dairy Products | Buchanan5 | |
10248 | 9.8 | 10 | 0.0 | VINET | Western Europe | Grains/Cereals | Buchanan5 | |
10248 | 34.8 | 5 | 0.0 | VINET | Western Europe | Dairy Products | Buchanan5 | |
10249 | 18.6 | 9 | 0.0 | TOMSP | Western Europe | Produce | Suyama6 | |
10249 | 42.4 | 40 | 0.0 | TOMSP | Western Europe | Produce | Suyama6 | |
10250 | 7.7 | 10 | 0.0 | HANAR | South America | Seafood | Peacock4 | |
10250 | 42.4 | 35 | 0.15 | HANAR | South America | Produce | Peacock4 | |
10250 | 16.8 | 15 | 0.15 | HANAR | South America | Condiments | Peacock4 | |
10251 | 16.8 | 6 | 0.05 | VICTE | Western Europe | Grains/Cereals | Leverling3 |
NewerOlder