Skip to content

Instantly share code, notes, and snippets.

View renardeinside's full-sized avatar
🦊
Make the elephant dance!

Ivan Trusov renardeinside

🦊
Make the elephant dance!
View GitHub Profile
import numpy as np
import pandas as pd
ds_size = 10000
targets = np.random.choice([0,1],size=ds_size)
probs = np.random.uniform(0,1,size=ds_size)
df = pd.DataFrame([targets,probs]).T
df.columns=['target','proba']
def getLiftTable(df,targetColumn,probaColumn,quantilesSize=10):
import pandas as pd
import sklearn.ensemble as ske
from sklearn.preprocessing import Imputer
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import ExtraTreesClassifier,GradientBoostingClassifier
from sklearn.base import TransformerMixin
from sklearn.cross_validation import train_test_split
import numpy as np
from tqdm import tqdm
import xgboost as xgb
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
# PSI (Population Stability Index) is useful when you want to compare two distributions more gently, then 2-KSA etс
# More docs at http://ucanalytics.com/blogs/population-stability-index-psi-banking-case-study/
import numpy as np
import pandas as pd
def psi(v1,v2,groups=10):
"""
v1 - first distribution (1-D array)
v2 - second distribution (1-D array)
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
import logging
def create_logger(name, log_file, level=logging.INFO,filemode='w'):
"""
Creates logger
:param name: logger name
:param log_file: logger file
:param level: logging level
:parameter filemode: output type ('w'-oWerwrite,'a'-Append)
@renardeinside
renardeinside / pip_local_repo_howto.md
Last active March 20, 2018 12:40
How to setup pip to use local repo

How to define file encoding on nix system?

Use command:

file -i somefile.csv

In output:

charset=<some charset>
@renardeinside
renardeinside / databricks-streamlit-demo-p1.py
Created July 3, 2021 18:48
databricks-streamlit-demo-p1
class DataProvider:
# above goes some low-level code
def _get_data(self, query: str) -> pd.DataFrame:
self.logger.debug(f"Running SQL query: {query}")
start_time = dt.datetime.now()
data = pd.read_sql(query, self.connection)
end_time = dt.datetime.now()
time_delta = end_time - start_time
self.logger.debug(
f"Query executed, returning the result. Total query time: {time_delta}"
@renardeinside
renardeinside / databricks-streamlit-demo-p2.py
Created July 3, 2021 19:31
databricks-streamlit-demo-p2
st.write(
"""
# Databricks Streamlit Demo :fire:
This Streamlit application connects to Databricks SQL Endpoint and creates some visualizations based on the [NYC Taxi Dataset](#https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page).
"""
)