This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def calculate_roc_auc(model_pipe, X, y): | |
| """Calculate roc auc score. | |
| Parameters: | |
| =========== | |
| model_pipe: sklearn model or pipeline | |
| X: features | |
| y: true target | |
| """ | |
| y_proba = model_pipe.predict_proba(X)[:,1] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| pipeline = Pipeline([ | |
| ('dtype_1', FeatureTransformer(features=INTEGER_COLS, assign_dtype=int)), | |
| ('dtype_2', FeatureTransformer(features=FLOAT_COLS, assign_dtype=float)), | |
| ('dtype_3', FeatureTransformer(features=OBJECT_COLS, assign_dtype='object')), | |
| ('basic_transformations_1', FeatureTransformer(features=['deck', 'embark_town'], strlowercase=True)), | |
| ('basic_transformations_2', FeatureTransformer(features=['embark_town'], remove_whitespace=True)), | |
| ('num_imputer', Imputer(NUMERICAL_COLS, method='mean')), | |
| ('scaler', Scaler(NUMERICAL_COLS)), | |
| ('cat_encoder', Encoder(OBJECT_COLS, encoding='onehotencoder')), | |
| ('pca', PCA(n_components=0.95)), |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| models_for_gridsearch = { | |
| 'LogisticRegression': LogisticRegression(), | |
| 'KNeighborsClassifier': KNeighborsClassifier(), | |
| # 'DecisionTreeClassifier': DecisionTreeClassifier(), | |
| # 'ExtraTreesClassifier': ExtraTreesClassifier(), | |
| 'RandomForestClassifier': RandomForestClassifier(), | |
| # 'AdaBoostClassifier': AdaBoostClassifier(), | |
| # 'GradientBoostingClassifier': GradientBoostingClassifier(), | |
| 'SVC': SVC(), | |
| } |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| class FeatureTransformer(BaseEstimator, TransformerMixin): | |
| def __init__(self, features, assign_dtype=None, strlowercase=False, remove_whitespace=False): # no *args or **kargs | |
| self.features = features | |
| self.assign_dtype = assign_dtype | |
| self.remove_whitespace = remove_whitespace | |
| self.strlowercase = strlowercase | |
| def fit(self, X, y=None): | |
| return self # nothing else to do | |
| def transform(self, X, y=None): | |
| if self.assign_dtype is not None: |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # Data manipulation | |
| import seaborn as sns | |
| import numpy as np | |
| import pandas as pd | |
| pd.options.display.precision = 4 | |
| pd.options.mode.chained_assignment = None | |
| from sklearn import set_config | |
| # Machine learning pipeline | |
| from sklearn.pipeline import Pipeline |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| WITH date_table AS ( WITH recursive date_in_range(day) AS ( VALUES('2019-12-30') | |
| UNION ALL | |
| SELECT date(day, '+7 day') | |
| FROM date_in_range | |
| WHERE date(day, '+7 day') < '2020-10-19' ) | |
| SELECT strftime('%Y-%W', day) AS year_week, | |
| row_number() OVER(ORDER BY day) AS yw_id | |
| FROM date_in_range) | |
| SELECT table_for_flags.user_id , | |
| table_for_flags.year_week , |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| # !pip install pulp | |
| import pulp as plp | |
| # %matplotlib inline | |
| from matplotlib import pyplot as plt | |
| import cv2 | |
| import numpy as np | |
| from pytesseract import image_to_string | |
| import pytesseract | |
| pytesseract.pytesseract.tesseract_cmd = r'/usr/local/bin/tesseract/' | |
| # !pip install streamlit==0.72.0 |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| df_Players_Drafted_2000 = df_Players[ | |
| df_Players['DRAFT YEAR']=='2000' | |
| ][ | |
| ['PLAYER', 'TEAM', 'AGE', 'HEIGHT', 'WEIGHT', 'COLLEGE COUNTRY','DRAFT YEAR', 'ESPN_GAMELOG_ID'] | |
| ].reset_index(drop=True) | |
| SEASON_2000_2001_CAREER_LIST_TO_CONCAT = [] | |
| for INDEX, ROW in df_Players_Drafted_2000.iterrows(): |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| def get_yearly_player_gamelog(PLAYER_URL, YEAR, PLAYER_NAME): | |
| try: | |
| HTML_RESULTS = pd.read_html('{}{}'.format(PLAYER_URL, YEAR)) | |
| LEN_HTML_RESULTS = len(HTML_RESULTS) | |
| EMPTY_LIST = [] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| for INDEX, ROW in df_Players.iterrows(): | |
| # In order not to get the HTTP 429 Too Many Requests error, we're sleeping the script for some time. | |
| time.sleep(5) | |
| try: | |
| site= 'https://www.google.com/search?q={}+nba+espn+gamelog'.format(df_Players.loc[INDEX, | |
| 'PLAYER'].replace(' ', '+')) |
NewerOlder