Skip to content

Instantly share code, notes, and snippets.

@Elsaveram
Elsaveram / multiprocessing.py
Created September 11, 2018 02:10
Muti-processing for python
import multiprocessing as mp
#By default this will use all the cpu cores on your machine by splitting the corpus (or selected list) into n pieces and
#will run the function specified in the first argument
with mp.Pool() as pool:
str_business_df['nltk_dict'] = pool.map(nlp.vectorize, str_business_df['corp'])
@Elsaveram
Elsaveram / recommender1.py
Last active September 11, 2018 19:43
TFIDF and Cosine Similarity for restaurant recommendation system
from collections import defaultdict
import nltk
import string
import re
from nltk.corpus import stopwords
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel
stop = stopwords.words('english')
@Elsaveram
Elsaveram / s3setup.py
Created September 3, 2018 22:59
S3 set up and csv download
#AWS credential setup
# pip install awscli
# pip install boto3
# aws configure
# Configure guide: https://docs.aws.amazon.com/cli/latest/userguide/cli-chap-getting-started.html
##Configure-QuickConfigure-IAMconsole-DeleteYourRootAccessKeys-ManageSecurityCredentials-AccessKeys-CreateNewKey
import boto3
s3 = boto3.resource('s3')
@Elsaveram
Elsaveram / coverter.py
Created September 3, 2018 22:53
Convert json to pandas dataframe
import json
import pandas as pd
from glob import glob
import matplotlib.pyplot as plt
#Convert json string to a flat python dictionary
def convert(x):
ob = json.loads(x)
@Elsaveram
Elsaveram / House class.py
Last active September 3, 2018 18:37
Sample of the house class for Kaggle housing prices in Iowa blog post
class House():
def __init__(self, train_data_file, test_data_file):
train = pd.read_csv(train_data_file)
test = pd.read_csv(test_data_file)
self.all = pd.concat([train,test], ignore_index=True)
self.all['test'] = self.all.SalePrice.isnull()
def train(self):
return(self.all[~self.all['test']])