Ivan Trusov renardeinside

## count_lifts.py
import numpy as np
import pandas as pd

ds_size = 10000
targets = np.random.choice([0,1],size=ds_size)
probs = np.random.uniform(0,1,size=ds_size)
df = pd.DataFrame([targets,probs]).T
df.columns=['target','proba']

def getLiftTable(df,targetColumn,probaColumn,quantilesSize=10):

## bnp_paribas_xgboost.py
import pandas as pd
import sklearn.ensemble as ske
from sklearn.preprocessing import Imputer
from sklearn.feature_selection import SelectFromModel
from sklearn.ensemble import ExtraTreesClassifier,GradientBoostingClassifier
from sklearn.base import TransformerMixin
from sklearn.cross_validation import train_test_split
import numpy as np
from tqdm import tqdm
import xgboost as xgb

## keras_mnist.ipynb

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                renardeinside
                / keras_mnist.ipynb
            
            
              Last active
              June 23, 2016 08:45
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## psi.py
# PSI (Population Stability Index) is useful when you want to compare two distributions more gently, then 2-KSA etс
# More docs at http://ucanalytics.com/blogs/population-stability-index-psi-banking-case-study/

import numpy as np
import pandas as pd

def psi(v1,v2,groups=10):
    """
    v1 - first distribution (1-D array)
    v2 - second distribution (1-D array)

## deep_bayes_example.ipynb

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                renardeinside
                / deep_bayes_example.ipynb
            
            
              Created
              December 19, 2017 13:09
            
          
      Loading

      Sorry, something went wrong. Reload?
      Sorry, we cannot display this file.
      Sorry, this file is invalid so it cannot be displayed.
      
          Viewer requires iframe.
      
    
## python_logging_example.py

import logging

def create_logger(name, log_file, level=logging.INFO,filemode='w'):
    """
    Creates logger
    :param name: logger name
    :param log_file: logger file
    :param level: logging level
    :parameter filemode: output type ('w'-oWerwrite,'a'-Append)

## pip_local_repo_howto.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                renardeinside
                / pip_local_repo_howto.md
            
            
              Last active
              March 20, 2018 12:40
            
              
                How to setup pip to use local repo
              
          
    How to setup pip to use local repo?

This method is tested on Linux distribution and based on this sources:

https://pip.pypa.io/en/stable/user_guide/#config-file (official pip docs)
https://medium.com/@vando/creating-an-small-local-yum-and-pip-repository-f04ff08c144f (post about)

Steps to setup:

Create somewhere on OS file with name pip.conf. This file may also exist in your $HOME/pip.
Add this lines to file:

[global]


## define_file_encoding.md

      
              1 file
            
          
              0 forks
            
          
                0 comments
              
            
              0 stars
            
          
                renardeinside
                / define_file_encoding.md
            
            
              Created
              June 13, 2018 08:58
            
          
    How to define file encoding on nix system?

Use command:
file -i somefile.csv
In output:
charset=<some charset>

  
## databricks-streamlit-demo-p1.py
class DataProvider:
  # above goes some low-level code
  def _get_data(self, query: str) -> pd.DataFrame:
      self.logger.debug(f"Running SQL query: {query}")
      start_time = dt.datetime.now()
      data = pd.read_sql(query, self.connection)
      end_time = dt.datetime.now()
      time_delta = end_time - start_time
      self.logger.debug(
          f"Query executed, returning the result. Total query time: {time_delta}"

## databricks-streamlit-demo-p2.py
st.write(
    """
    # Databricks Streamlit Demo :fire:
    This Streamlit application connects to Databricks SQL Endpoint and creates some visualizations based on the [NYC Taxi Dataset](#https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page).
    """
)
	import numpy as np
	import pandas as pd

	ds_size = 10000
	targets = np.random.choice([0,1],size=ds_size)
	probs = np.random.uniform(0,1,size=ds_size)
	df = pd.DataFrame([targets,probs]).T
	df.columns=['target','proba']

	def getLiftTable(df,targetColumn,probaColumn,quantilesSize=10):
	import pandas as pd
	import sklearn.ensemble as ske
	from sklearn.preprocessing import Imputer
	from sklearn.feature_selection import SelectFromModel
	from sklearn.ensemble import ExtraTreesClassifier,GradientBoostingClassifier
	from sklearn.base import TransformerMixin
	from sklearn.cross_validation import train_test_split
	import numpy as np
	from tqdm import tqdm
	import xgboost as xgb
	# PSI (Population Stability Index) is useful when you want to compare two distributions more gently, then 2-KSA etс
	# More docs at http://ucanalytics.com/blogs/population-stability-index-psi-banking-case-study/

	import numpy as np
	import pandas as pd

	def psi(v1,v2,groups=10):
	"""
	v1 - first distribution (1-D array)
	v2 - second distribution (1-D array)

	import logging

	def create_logger(name, log_file, level=logging.INFO,filemode='w'):
	"""
	Creates logger
	:param name: logger name
	:param log_file: logger file
	:param level: logging level
	:parameter filemode: output type ('w'-oWerwrite,'a'-Append)
	class DataProvider:
	# above goes some low-level code
	def _get_data(self, query: str) -> pd.DataFrame:
	self.logger.debug(f"Running SQL query: {query}")
	start_time = dt.datetime.now()
	data = pd.read_sql(query, self.connection)
	end_time = dt.datetime.now()
	time_delta = end_time - start_time
	self.logger.debug(
	f"Query executed, returning the result. Total query time: {time_delta}"
	st.write(
	"""
	# Databricks Streamlit Demo :fire:
	This Streamlit application connects to Databricks SQL Endpoint and creates some visualizations based on the [NYC Taxi Dataset](#https://www1.nyc.gov/site/tlc/about/tlc-trip-record-data.page).
	"""
	)