Skip to content

Instantly share code, notes, and snippets.

View GabrielSGoncalves's full-sized avatar
🏀
Data Engineer @ Big Data

GabrielSGoncalves GabrielSGoncalves

🏀
Data Engineer @ Big Data
View GitHub Profile
@GabrielSGoncalves
GabrielSGoncalves / read_private_file_from_gdrive.py
Last active September 21, 2023 16:06
Read private files from a Google Drive
from typing import Union, Dict
from io import BytesIO, StringIO
import json
import pandas as pd
import requests
from pydrive2.auth import GoogleAuth
from pydrive2.drive import GoogleDrive
def read_private_file_from_gdrive(
file_url: str, file_format: str, google_auth: GoogleAuth, **kwargs
@GabrielSGoncalves
GabrielSGoncalves / read_private_sheets.py
Created November 22, 2021 18:26
Function for reading private Google Spreadsheets
import pandas as pd
import requests
import gspread
def read_private_sheets(
credentials_json: str, sheet_url: str, worksheet: int = 0
) -> pd.DataFrame:
"""Read a private available Google Sheets as a Pandas Dataframe.
@GabrielSGoncalves
GabrielSGoncalves / read_public_sheets.py
Created November 22, 2021 18:20
Function for reading Google Sheets open to public
from io import BytesIO
import requests
import pandas as pd
def read_public_sheets(file_url: str) -> pd.DataFrame:
"""Read a publicly available Google Sheets file as a Pandas Dataframe.
Parameters
----------
file_url : str
@GabrielSGoncalves
GabrielSGoncalves / read_file_from_gcloud_storage.py
Created November 17, 2021 18:19
Function for reading private files from a Google Cloud Storage
from typing import Union, Dict
from io import BytesIO
from bson import json_util
import pandas as pd
import requests
from google.cloud import storage
from google.oauth2 import service_account
def read_file_from_gcloud_storage(
@GabrielSGoncalves
GabrielSGoncalves / read_private_spreadsheets.py
Created November 17, 2021 17:11
Function for reading private Google Spreadsheets
import pandas as pd
import requests
import gspread
def read_private_spreadsheets(
credentials_json: str, sheet_key: str, worksheet: int = 0
) -> pd.DataFrame:
"""Read a private available Google Spreadsheet as a Pandas Dataframe.
Parameters
@GabrielSGoncalves
GabrielSGoncalves / read_public_file_from_gdrive.py
Last active November 25, 2021 02:20
Function for reading publicly open files from a Google Drive
from typing import Union, Dict
from io import StringIO
import json
import pandas as pd
import requests
def read_public_file_from_gdrive(
file_url: str, file_format: str, **kwargs
) -> Union[pd.DataFrame, Dict]:
"""Read public files stored in a Google Drive.
import pickle
# To pickle a Scorecard object
with open('scorecard_model.pickle', 'wb') as pfile:
pickle.dump(scorecard, pfile)
# To unpickle it
with open("scorecard_model.pickle", 'rb') as scorecard_pickle:
scorecard_production = pickle.load(scorecard_pickle)
from optbinning.scorecard.plots import plot_ks, plot_auc_roc
# Assign score and predicted probability to test dataset
df_application_test.loc[:,"score"] = scorecard.score(df_application_test)
# Kolmogorov_Smirnov Plot
plot_ks(df_application_test.TARGET, df_application_test.score)
# ROC-AUC plot
plot_auc_roc(df_application_test.TARGET, df_application_test.score)
Variable Bin id Bin Count Count (%) Non-event Event Event rate WoE IV JS Coefficient Points
NAME_CONTRACT_TYPE 0 ['Revolving loans'] 12056 0.096 11415 641 0.053 0.44 0.015 0.002 -0.628 -6.566
NAME_CONTRACT_TYPE 1 ['Cash loans'] 113899 0.904 104437 9462 0.083 -0.038 0.001 0.0 -0.628 6.815
NAME_CONTRACT_TYPE 2 Special 0 0.0 0 0 0.0 0.0 0.0 0.0 -0.628 5.747
NAME_CONTRACT_TYPE 3 Missing 0 0.0 0 0 0.0 0.0 0.0 0.0 -0.628 5.747
CODE_GENDER 0 ['XNA' 'F'] 82896 0.658 77096 5800 0.07 0.148 0.013 0.002 -0.738 0.888
CODE_GENDER 1 ['M'] 43059 0.342 38756 4303 0.1 -0.242 0.022 0.003 -0.738 13.69
CODE_GENDER 2 Special 0 0.0 0 0 0.0 0.0 0.0 0.0 -0.738 5.747
CODE_GENDER 3 Missing 0 0.0 0 0 0.0 0.0 0.0 0.0 -0.738 5.747
FLAG_OWN_CAR 0 ['Y'] 42893 0.341 39866 3027 0.071 0.138 0.006 0.001 -1.085 -0.946
scorecard_summary = scorecard.table(style="detailed").round(3)
scorecard_summary.to_csv('scorecard_table_detailed.csv', index=False)