Sandeep Anand sananand007

## Classifiers.py
'''
Logistic Regression
'''
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, roc_curve, precision_score, classification_report
lgmodel = LogisticRegression(max_iter=100, C=1e5)
lgmodel.fit(X_train, y_train)
ypred=lgmodel.predict(X_test)
print(lgmodel.score(X_train, y_train))
print(confusion_matrix(y_test,ypred))

## classifying.py
# Turn into classes using pd.cut()
df_train_filt2['Fareclass']=pd.qcut(df_train_filt2['Fare'], 4, labels=[1,2,3,4])
df_train_filt2['Ageclass']=pd.qcut(df_train_filt2['Age'], 5, labels=[1,2,3,4,5])
df_test_filt1['Fareclass']=pd.qcut(df_test_filt1['Fare'], 4, labels=[1,2,3,4])
df_test_filt1['Ageclass']=pd.qcut(df_test_filt1['Age'], 5, labels=[1,2,3,4,5])
df_train_filt2.drop(['Fare','Age'], axis=1, inplace=True)
df_test_filt1.drop(['Fare','Age'], axis=1, inplace=True)

# Get the encoding done to get rid of string columns that you cannot train
df_train_filt2=pd.get_dummies(df_train_filt2, columns=\

## loading_the_dataset.py
# Get the Data and see the dataset
import os
import matplotlib.pyplot as plt
import pandas as pd
path=os.getcwd()
filepath=[]
for file in os.listdir(path):
    if 'csv' in file:filepath.append(path+"\\"+file)
for path in filepath:
    if "train" in path:df_train=pd.read_csv(path)

## Fill_up_ages.py
# Now to fill up the NA values for Ages
mean_age=df_train_filt2['Age'].mean()
listofAgeind_=list(df_train_filt2[df_train_filt2['Age'].isna()==True].index)
list_of_ages=np.random.normal(mean_age, 10, len(listofAgeind_))

plt.plot(list_of_ages, linewidth=2, color='g', label='Age')
plt.show()
for (idx,age) in zip(listofAgeind_,list_of_ages):df_train_filt2.loc[idx, 'Age']=age
print(df_train_filt2.count())

## Amzn-sagemaker.md

      
              1 file
            
          
              0 forks
            
          
              2 comments
            
          
              0 stars
            
          
                sananand007
                / Amzn-sagemaker.md
            
            
              Last active
              April 1, 2018 21:42
            
              
                Running a Jupyter Instance using Amazon sagemaker
              
          
    Running a Jupyter Instance using Amazon sagemaker

Loading the data using kaggle-cli


Links

https://www.kaggle.com/general/6604
https://github.com/floydwch/kaggle-cli
[Downloading the files] https://stackoverflow.com/questions/7243750/download-file-from-web-in-python-3


Kaggle Username and password :
sandeep.maxcraze@gmail.com | ********


## Run_Jupyter_Notebook_Conda_aws.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              1 star
            
          
                sananand007
                / Run_Jupyter_Notebook_Conda_aws.md
            
            
              Last active
              March 4, 2018 15:32
            
              
                Steps to Run a Jupyter Notebook using AWS Putty Console and Anaconda env with Windows and Chrome
              
          
    How to Login into the Jupyter Notebook through AWS and Conda Environment created in AWS :

Installing Anaconda on Ubuntu :


refer : https://www.digitalocean.com/community/tutorials/how-to-install-the-anaconda-python-distribution-on-ubuntu-16-04

    - https://repo.continuum.io/archive/Anaconda3-5.1.0-Linux-x86_64.sh
       cd /tmp
    - curl -O https://repo.continuum.io/archive/Anaconda3-5.1.0-Linux-x86_64.sh
    - check the sha256sum: sha256sum Anaconda3-5.1.0-Linux-x86_64.sh


## The Technical Interview Cheat Sheet.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                sananand007
                / The Technical Interview Cheat Sheet.md
            
            
              Created
              March 8, 2017 03:20
                — forked from tsiege/The Technical Interview Cheat Sheet.md
            
              
                This is my technical interview cheat sheet. Feel free to fork it or do whatever you want with it. PLEASE let me know if there are any errors or if anything crucial is missing. I will add more links soon.
              
          
    Studying for a Tech Interview Sucks, so Here's a Cheat Sheet to Help

This list is meant to be a both a quick guide and reference for further research into these topics.  It's basically a summary of that comp sci course you never took or forgot about, so there's no way it can cover everything in depth.  It also will be available as a gist on Github for everyone to edit and add to.
Data Structure Basics

###Array
####Definition:

Stores data elements based on an sequential, most commonly 0 based, index.
Based on tuples from set theory.
	'''
	Logistic Regression
	'''
	from sklearn.linear_model import LogisticRegression
	from sklearn.metrics import accuracy_score, confusion_matrix, f1_score, roc_curve, precision_score, classification_report
	lgmodel = LogisticRegression(max_iter=100, C=1e5)
	lgmodel.fit(X_train, y_train)
	ypred=lgmodel.predict(X_test)
	print(lgmodel.score(X_train, y_train))
	print(confusion_matrix(y_test,ypred))
	# Turn into classes using pd.cut()
	df_train_filt2['Fareclass']=pd.qcut(df_train_filt2['Fare'], 4, labels=[1,2,3,4])
	df_train_filt2['Ageclass']=pd.qcut(df_train_filt2['Age'], 5, labels=[1,2,3,4,5])
	df_test_filt1['Fareclass']=pd.qcut(df_test_filt1['Fare'], 4, labels=[1,2,3,4])
	df_test_filt1['Ageclass']=pd.qcut(df_test_filt1['Age'], 5, labels=[1,2,3,4,5])
	df_train_filt2.drop(['Fare','Age'], axis=1, inplace=True)
	df_test_filt1.drop(['Fare','Age'], axis=1, inplace=True)

	# Get the encoding done to get rid of string columns that you cannot train
	df_train_filt2=pd.get_dummies(df_train_filt2, columns=\
	# Get the Data and see the dataset
	import os
	import matplotlib.pyplot as plt
	import pandas as pd
	path=os.getcwd()
	filepath=[]
	for file in os.listdir(path):
	if 'csv' in file:filepath.append(path+"\\"+file)
	for path in filepath:
	if "train" in path:df_train=pd.read_csv(path)
	# Now to fill up the NA values for Ages
	mean_age=df_train_filt2['Age'].mean()
	listofAgeind_=list(df_train_filt2[df_train_filt2['Age'].isna()==True].index)
	list_of_ages=np.random.normal(mean_age, 10, len(listofAgeind_))

	plt.plot(list_of_ages, linewidth=2, color='g', label='Age')
	plt.show()
	for (idx,age) in zip(listofAgeind_,list_of_ages):df_train_filt2.loc[idx, 'Age']=age
	print(df_train_filt2.count())