sangarshanan Sangarshanan

## gist:69186e77ad335a6f5a1cd14b8a7cf6d3
To randomize a dataset: shuf=data.iloc[np.random.permutation(len(data))]
sh = shuf.reset_index(drop=true)

TO KNOW NULL
data.isnull().sum()

TO DELETE FROM DICT
del dna_counts['e']

RAMDOM FOREST

## Reduce GPU usage
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.3
set_session(tf.Session(config=config))


By default system uses max GPU and that seems to return an error while training

## Choosing threshold in Binary classification
Threshold of 0.5 is used by default (for binary problems) to convert predicted probabilities into class predictions

Threshold can be adjusted to increase sensitivity or specificity

Sensitivity and specificity have an inverse relationship

Increasing one would always decrease the other

Adjusting the threshold should be one of the last step you do in the model-building process

## Keys in sorting
When you wanna sort

10 2 5
7 1 0
9 9 9
1 23 12
6 5 9

with respect to element 1 (2,1,9,23,5)

## List Operation
--------------------------------
S = 'abcdef'
N = 3

print(list(zip(*[iter(S)] * N)))

will Print: [('a','b','c'),('d','e','f')]
--------------------------------
ROTATE AN LIST

## PANDAS
SALES WHERE SHOP_ID IS 26,27 OR 28
sales = sales[sales['shop_id'].isin([26, 27, 28])]

## REGEX
r'b[aeiou]{2}t'	  b followed by two vowels, followed by t
r'A\d{3,}'	      The letter A followed by 3 or more digits
r'[A-Z]{,5}'	    Zero to five capital letters
r'\w{3,7}'	      Three to seven “word” characters


## road_between_latlongs.py
import psycopg2
import random, string
hostname = 'localhost'
username = 'postgres'
password = '*****'
database = 'pgroute'
myConnection = psycopg2.connect(host=hostname, user=username, password=password, dbname=database )

# Simple routine to run a query on a database and print the results:
def doQuery( conn, query ) :

## graphhopper_routing.py
import pandas as pd
import urllib.request

### GRAPHOPPER API ###
urlStart = 'http://localhost:8989/route/?'
point = 'point='
urlEnd = '&type=gpx&instructions=false&vehicle=car'
separator = '%2C'

### SOURCE POINT ###

## yellowbrick_text_visualize.py
from yellowbrick.text import TSNEVisualizer,UMAPVisualizer
from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer

def visualize(dim_reduction,encoding,corpus_data,corpus_target,labels = True,alpha=0.7,metric=None):
    if 'tfidf' in encoding.lower():
        encode  = TfidfVectorizer()
    if 'count' in encoding.lower():
        encode = CountVectorizer()
    docs   = encode.fit_transform(corpus_data)
    if labels is True:
	To randomize a dataset: shuf=data.iloc[np.random.permutation(len(data))]
	sh = shuf.reset_index(drop=true)

	TO KNOW NULL
	data.isnull().sum()

	TO DELETE FROM DICT
	del dna_counts['e']

	RAMDOM FOREST
	import tensorflow as tf
	from keras.backend.tensorflow_backend import set_session
	config = tf.ConfigProto()
	config.gpu_options.per_process_gpu_memory_fraction = 0.3
	set_session(tf.Session(config=config))


	By default system uses max GPU and that seems to return an error while training
	Threshold of 0.5 is used by default (for binary problems) to convert predicted probabilities into class predictions

	Threshold can be adjusted to increase sensitivity or specificity

	Sensitivity and specificity have an inverse relationship

	Increasing one would always decrease the other

	Adjusting the threshold should be one of the last step you do in the model-building process
	When you wanna sort

	10 2 5
	7 1 0
	9 9 9
	1 23 12
	6 5 9

	with respect to element 1 (2,1,9,23,5)
	--------------------------------
	S = 'abcdef'
	N = 3

	print(list(zip([iter(S)] N)))

	will Print: [('a','b','c'),('d','e','f')]
	--------------------------------
	ROTATE AN LIST
	SALES WHERE SHOP_ID IS 26,27 OR 28
	sales = sales[sales['shop_id'].isin([26, 27, 28])]
	r'b[aeiou]{2}t' b followed by two vowels, followed by t
	r'A\d{3,}' The letter A followed by 3 or more digits
	r'[A-Z]{,5}' Zero to five capital letters
	r'\w{3,7}' Three to seven “word” characters
	import psycopg2
	import random, string
	hostname = 'localhost'
	username = 'postgres'
	password = '*****'
	database = 'pgroute'
	myConnection = psycopg2.connect(host=hostname, user=username, password=password, dbname=database )

	# Simple routine to run a query on a database and print the results:
	def doQuery( conn, query ) :
	import pandas as pd
	import urllib.request

	### GRAPHOPPER API ###
	urlStart = 'http://localhost:8989/route/?'
	point = 'point='
	urlEnd = '&type=gpx&instructions=false&vehicle=car'
	separator = '%2C'

	### SOURCE POINT ###
	from yellowbrick.text import TSNEVisualizer,UMAPVisualizer
	from sklearn.feature_extraction.text import TfidfVectorizer,CountVectorizer

	def visualize(dim_reduction,encoding,corpus_data,corpus_target,labels = True,alpha=0.7,metric=None):
	if 'tfidf' in encoding.lower():
	encode = TfidfVectorizer()
	if 'count' in encoding.lower():
	encode = CountVectorizer()
	docs = encode.fit_transform(corpus_data)
	if labels is True: