Skip to content

Instantly share code, notes, and snippets.

@prakhar21
Last active December 13, 2017 12:30
Show Gist options
  • Save prakhar21/1fe87229f99c74d666e07d066b7933a7 to your computer and use it in GitHub Desktop.
Save prakhar21/1fe87229f99c74d666e07d066b7933a7 to your computer and use it in GitHub Desktop.
Swiggy Account Analysis
#!/urs/bin/env python
"""
@uthor: Prakhar Mishra
date: Dec, 13 2017
desc: Read URL for more information on the same.
"""
import os
import csv
import time
from bs4 import BeautifulSoup
import matplotlib.pyplot as plt
from selenium import webdriver
import re
import math
from collections import Counter
from nltk.util import ngrams
PHONENUMBER="number"
PASSWORD="password"
URL="https://www.swiggy.com/"
FILE='data.csv'
def crawl():
'''
Crawl data from website
'''
driver = webdriver.Firefox()
driver.get(URL)
# Login popup
driver.find_element_by_link_text('Login').click()
# Login
# Proper sleep has to be applied for you to not be recognized as bot
pnumber = driver.find_element_by_id('phoneNumberField')
for num in PHONENUMBER:
time.sleep(0.2)
pnumber.send_keys(num)
time.sleep(1.5)
passwd = driver.find_element_by_id('passwordField')
for pss in PASSWORD:
time.sleep(0.2)
passwd.send_keys(pss)
time.sleep(1.5)
driver.find_element_by_xpath('/html/body/div/div[1]/div/div/div[2]/login/div/div[4]/div/div/div[2]/form/div[3]/button').click()
time.sleep(5)
# Go to my account
header = driver.find_element_by_xpath('//*[@id="header-menu"]')
header.click()
account = driver.find_element_by_link_text('Account')
account.click()
# Go to my orders
# TODO
# Open full list first till [Load More...] is no more visible
# TODO
# Get RestaurantName, DateTime, PaymentMode, GrandTotal, ItemName, Taxes, Address
# TODO
# Write to file
f = open(FILE, 'wb')
writer = csv.writer(f)
for record in data:
writer.writerow(record)
f.close()
return
def load():
'''
Loading data to memory
'''
data = []
f = open(FILE, 'rb')
reader = csv.reader(f)
reader.next()
for row in reader:
data.append(row)
f.close()
return data
def similarity(v1, v2):
'''
String similarity (Cosine)
'''
intersection = set(v1.keys()) & set(v2.keys())
numerator = sum([v1[x] * v2[x] for x in intersection])
sum1 = sum([v1[x]**2 for x in v1.keys()])
sum2 = sum([v2[x]**2 for x in v2.keys()])
denominator = math.sqrt(sum1) * math.sqrt(sum2)
if not denominator:
return 0.0
else:
return float(numerator) / denominator
def shingles(s):
'''
Vectorize the string
'''
temp = []
for i in ngrams(s, 3):
temp.append(''.join(i))
return Counter(temp)
def smartsimilarity():
data = ["Poori Chole", "Poori Chola", "Chola Puri", "Rajma Chawal", "Rajma Rice", "Chawal Rajma"]
lookup = {}
for i in enumerate(data):
lookup[i[0]] = i[1]
data_shingles = []
final = {}
for d in data:
data_shingles.append(shingles(d))
# Merging - [Level 1]
for s1 in range(0, len(data_shingles)):
for s2 in range(0, len(data_shingles)):
simscore = similarity(data_shingles[s1], data_shingles[s2])
if simscore >= 0.65:
if s1 not in final:
final[s1] = [s2]
else:
final[s1].append(s2)
clusters = []
# Merging - [Level 2]
for k, v in final.items():
if len(v) == 1:
clusters.append(v)
else:
for neig in v:
if neig != k:
clusters.append(final[neig])
unique_data = [list(x) for x in set(tuple(x) for x in clusters)]
p = []
# Remap from index to string
for k in unique_data:
p.append([lookup[i] for i in k])
return p
def top5restaurants(data):
restaurants = [d[0] for d in data]
return Counter(restaurants)
def modeOfpayment(data):
payment = [d[1] for d in data]
return Counter(payment)
def timeline(data):
timeline = [d[2] for d in data]
return Counter(timeline)
def delivery(data):
location = [d[3] for d in data]
return Counter(location)
def extracharges(data):
all_extra_costs = [float(d[4]) for d in data]
return sum(all_extra_costs)
def totalamount(data):
all_costs = [float(d[5]) for d in data]
return sum(all_costs)
def analyze(data):
'''
Doing Analysis on various factors
'''
clusters = smartsimilarity()
top_5_restaurants = top5restaurants(data)
mode_of_payment = modeOfpayment(data)
dates = timeline(data)
address = delivery(data)
#delivery_charges_packing_charge = extracharges(data)
#total_spent = totalamount(data)
return clusters, top_5_restaurants, mode_of_payment, dates, address
def visualize():
'''
Visualizing the results
'''
# TODO
return
def main():
"""
try:
crawl()
except Exception as e:
raise e
"""
try:
data = load()
except:
raise "Failed to load data from file to memory"
try:
print analyze(data)
except Exception as e:
raise e
"""
try:
visualize()
except:
raise "Failed to visualize the results"
"""
if __name__=='__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment