mesutcanalkan

## scores_pipeline.py
def calculate_roc_auc(model_pipe, X, y):
    """Calculate roc auc score.

    Parameters:
    ===========
    model_pipe: sklearn model or pipeline
    X: features
    y: true target
    """
    y_proba = model_pipe.predict_proba(X)[:,1]

## fitting_pipeline.py
pipeline = Pipeline([
    ('dtype_1', FeatureTransformer(features=INTEGER_COLS, assign_dtype=int)),
    ('dtype_2', FeatureTransformer(features=FLOAT_COLS, assign_dtype=float)),
    ('dtype_3', FeatureTransformer(features=OBJECT_COLS, assign_dtype='object')),
    ('basic_transformations_1', FeatureTransformer(features=['deck', 'embark_town'], strlowercase=True)),
    ('basic_transformations_2', FeatureTransformer(features=['embark_town'], remove_whitespace=True)),
    ('num_imputer', Imputer(NUMERICAL_COLS, method='mean')),
    ('scaler', Scaler(NUMERICAL_COLS)),
    ('cat_encoder', Encoder(OBJECT_COLS, encoding='onehotencoder')),
    ('pca', PCA(n_components=0.95)),

## gridsearch_pipeline_params.py
models_for_gridsearch = {
    'LogisticRegression': LogisticRegression(),
    'KNeighborsClassifier': KNeighborsClassifier(),
    # 'DecisionTreeClassifier': DecisionTreeClassifier(),
    # 'ExtraTreesClassifier': ExtraTreesClassifier(),
    'RandomForestClassifier': RandomForestClassifier(),
    # 'AdaBoostClassifier': AdaBoostClassifier(),
    # 'GradientBoostingClassifier': GradientBoostingClassifier(),
    'SVC': SVC(),
}

## pipeline_classes.py
class FeatureTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, features, assign_dtype=None, strlowercase=False, remove_whitespace=False): # no *args or **kargs
        self.features = features
        self.assign_dtype = assign_dtype
        self.remove_whitespace = remove_whitespace
        self.strlowercase = strlowercase
    def fit(self, X, y=None):
        return self # nothing else to do
    def transform(self, X, y=None):
        if self.assign_dtype is not None:

## pipeline_libraries.py
# Data manipulation
import seaborn as sns
import numpy as np
import pandas as pd
pd.options.display.precision = 4
pd.options.mode.chained_assignment = None

from sklearn import set_config
# Machine learning pipeline
from sklearn.pipeline import Pipeline

## retention_sqlite.sql
WITH date_table AS ( WITH recursive date_in_range(day) AS ( VALUES('2019-12-30')
UNION ALL
SELECT date(day, '+7 day')
FROM   date_in_range
WHERE  date(day, '+7 day') < '2020-10-19' )
SELECT   strftime('%Y-%W', day)          AS year_week,
         row_number() OVER(ORDER BY day) AS yw_id
FROM     date_in_range)
SELECT table_for_flags.user_id ,
       table_for_flags.year_week ,

## sudoku_app.py
# !pip install pulp
import pulp as plp
# %matplotlib inline
from matplotlib import pyplot as plt
import cv2
import numpy as np
from pytesseract import image_to_string
import pytesseract
pytesseract.pytesseract.tesseract_cmd = r'/usr/local/bin/tesseract/'
# !pip install streamlit==0.72.0

## Seasonal Gamelog.py
df_Players_Drafted_2000 = df_Players[
                                    df_Players['DRAFT YEAR']=='2000'
                                  ][
    ['PLAYER', 'TEAM', 'AGE', 'HEIGHT', 'WEIGHT', 'COLLEGE COUNTRY','DRAFT YEAR', 'ESPN_GAMELOG_ID']
                                    ].reset_index(drop=True)


SEASON_2000_2001_CAREER_LIST_TO_CONCAT = []

for INDEX, ROW in df_Players_Drafted_2000.iterrows():

## yearly_player_gamelog.py
def get_yearly_player_gamelog(PLAYER_URL, YEAR, PLAYER_NAME):

    try:

        HTML_RESULTS = pd.read_html('{}{}'.format(PLAYER_URL, YEAR))

        LEN_HTML_RESULTS = len(HTML_RESULTS)

        EMPTY_LIST = []

## google_search_player_id.py
for INDEX, ROW in df_Players.iterrows():

    # In order not to get the HTTP 429 Too Many Requests error, we're sleeping the script for some time.
    time.sleep(5)

    try:

        site= 'https://www.google.com/search?q={}+nba+espn+gamelog'.format(df_Players.loc[INDEX,
                                                                                      'PLAYER'].replace(' ', '+'))
	def calculate_roc_auc(model_pipe, X, y):
	"""Calculate roc auc score.

	Parameters:
	===========
	model_pipe: sklearn model or pipeline
	X: features
	y: true target
	"""
	y_proba = model_pipe.predict_proba(X)[:,1]
	pipeline = Pipeline([
	('dtype_1', FeatureTransformer(features=INTEGER_COLS, assign_dtype=int)),
	('dtype_2', FeatureTransformer(features=FLOAT_COLS, assign_dtype=float)),
	('dtype_3', FeatureTransformer(features=OBJECT_COLS, assign_dtype='object')),
	('basic_transformations_1', FeatureTransformer(features=['deck', 'embark_town'], strlowercase=True)),
	('basic_transformations_2', FeatureTransformer(features=['embark_town'], remove_whitespace=True)),
	('num_imputer', Imputer(NUMERICAL_COLS, method='mean')),
	('scaler', Scaler(NUMERICAL_COLS)),
	('cat_encoder', Encoder(OBJECT_COLS, encoding='onehotencoder')),
	('pca', PCA(n_components=0.95)),
	models_for_gridsearch = {
	'LogisticRegression': LogisticRegression(),
	'KNeighborsClassifier': KNeighborsClassifier(),
	# 'DecisionTreeClassifier': DecisionTreeClassifier(),
	# 'ExtraTreesClassifier': ExtraTreesClassifier(),
	'RandomForestClassifier': RandomForestClassifier(),
	# 'AdaBoostClassifier': AdaBoostClassifier(),
	# 'GradientBoostingClassifier': GradientBoostingClassifier(),
	'SVC': SVC(),
	}
	class FeatureTransformer(BaseEstimator, TransformerMixin):
	def __init__(self, features, assign_dtype=None, strlowercase=False, remove_whitespace=False): # no args or *kargs
	self.features = features
	self.assign_dtype = assign_dtype
	self.remove_whitespace = remove_whitespace
	self.strlowercase = strlowercase
	def fit(self, X, y=None):
	return self # nothing else to do
	def transform(self, X, y=None):
	if self.assign_dtype is not None:
	# Data manipulation
	import seaborn as sns
	import numpy as np
	import pandas as pd
	pd.options.display.precision = 4
	pd.options.mode.chained_assignment = None

	from sklearn import set_config
	# Machine learning pipeline
	from sklearn.pipeline import Pipeline
	WITH date_table AS ( WITH recursive date_in_range(day) AS ( VALUES('2019-12-30')
	UNION ALL
	SELECT date(day, '+7 day')
	FROM date_in_range
	WHERE date(day, '+7 day') < '2020-10-19' )
	SELECT strftime('%Y-%W', day) AS year_week,
	row_number() OVER(ORDER BY day) AS yw_id
	FROM date_in_range)
	SELECT table_for_flags.user_id ,
	table_for_flags.year_week ,
	# !pip install pulp
	import pulp as plp
	# %matplotlib inline
	from matplotlib import pyplot as plt
	import cv2
	import numpy as np
	from pytesseract import image_to_string
	import pytesseract
	pytesseract.pytesseract.tesseract_cmd = r'/usr/local/bin/tesseract/'
	# !pip install streamlit==0.72.0
	df_Players_Drafted_2000 = df_Players[
	df_Players['DRAFT YEAR']=='2000'
	][
	['PLAYER', 'TEAM', 'AGE', 'HEIGHT', 'WEIGHT', 'COLLEGE COUNTRY','DRAFT YEAR', 'ESPN_GAMELOG_ID']
	].reset_index(drop=True)


	SEASON_2000_2001_CAREER_LIST_TO_CONCAT = []

	for INDEX, ROW in df_Players_Drafted_2000.iterrows():
	def get_yearly_player_gamelog(PLAYER_URL, YEAR, PLAYER_NAME):

	try:

	HTML_RESULTS = pd.read_html('{}{}'.format(PLAYER_URL, YEAR))

	LEN_HTML_RESULTS = len(HTML_RESULTS)

	EMPTY_LIST = []
	for INDEX, ROW in df_Players.iterrows():

	# In order not to get the HTTP 429 Too Many Requests error, we're sleeping the script for some time.
	time.sleep(5)

	try:

	site= 'https://www.google.com/search?q={}+nba+espn+gamelog'.format(df_Players.loc[INDEX,
	'PLAYER'].replace(' ', '+'))