Flipkart Billion dollar sale hack
# Author = Nikhil Venkat Sonti | |
# email = nikhilsv92@gmail.com | |
# github ID = shadowfax92 | |
import sys | |
from xml.dom.minidom import _get_StringIO | |
from lxml import html | |
import requests | |
import os | |
import re | |
import time | |
import datetime | |
import csv | |
import urllib2 | |
from StringIO import StringIO | |
import multiprocessing as mp | |
import socket | |
import shutil | |
from email.mime.text import MIMEText | |
from email.mime.multipart import MIMEMultipart | |
from email.mime.application import MIMEApplication | |
import subprocess | |
import time | |
new_item_dict = {} | |
def play_alert(): | |
# play alert on Mac using say command | |
os.system('say "ring ring ring"') | |
def mail_me(subject, content): | |
msg = MIMEMultipart('alternative') | |
msgbody = MIMEText(content) | |
msg["From"] = "X@gmail.com" | |
msg["To"] = "Y@gmail.com" | |
msg["Subject"] = subject | |
msg.attach(msgbody) | |
p = subprocess.Popen(["/usr/sbin/sendmail", "-t"], stdin=subprocess.PIPE,stdout=subprocess.PIPE,stderr=subprocess.PIPE) | |
(out,err) = p.communicate(msg.as_string()) | |
print 'Mailed = ' + content | |
def get_content_from_url_and_store(): | |
try: | |
url = 'http://www.flipkart.com/' | |
page = requests.get("http://www.flipkart.com/") | |
tree = html.parse(StringIO(page.text)).getroot() | |
os.system('clear') | |
print '\nRUNNING FLIPKART PARSER' | |
for i in range(1,8): | |
try: | |
mail_content = "" | |
mail_subject = "" | |
name_1 = "" | |
name_2 = "" | |
xpath_1 = '/html/body/div[1]/div[3]/div/div[9]/div/div[1]/div['+str(i)+']/a/div[4]/text()' | |
xpath_2 = '/html/body/div[1]/div[3]/div/div[9]/div/div[1]/div['+str(i)+']/a/div[2]/div/text()' | |
xpath_3 = '/html/body/div[1]/div[3]/div/div[9]/div/div[1]/div['+str(i)+']/a/div[1]/text()' | |
xpath_4 = '/html/body/div[1]/div[3]/div/div[9]/div/div[1]/div['+str(i)+']/div/div[2]/text()' | |
xpath_5_link = '/html/body/div[1]/div[3]/div/div[9]/div/div[1]/div['+str(i)+']/a/div[5]/div[2]/div' | |
#sold out check | |
try: | |
name_4 = tree.xpath(xpath_4)[0].strip(' \t\n\r') | |
print "sold-out or not = " + name_4 | |
except Exception, e: | |
pass | |
finally: | |
pass | |
try: | |
name_1 = tree.xpath(xpath_1)[0].strip(' \t\n\r') | |
print "name-1 = " + name_1 | |
if re.search(r'(disk|hard|seagate|external|headphone|headset|phone)', name_1, re.IGNORECASE): | |
play_alert() | |
# mail_subject += name_1 + " " | |
# mail_subject += str(name_1) + " " | |
except Exception, e: | |
pass | |
finally: | |
pass | |
try: | |
name_2 = tree.xpath(xpath_2)[0].strip(' \t\n\r') | |
print "name-2 = " + name_2 | |
if re.search(r'(disk|hard|seagate|external|headphone|headset|phone)', name_2, re.IGNORECASE): | |
play_alert() | |
# mail_subject += str(name_2) + " " | |
except Exception, e: | |
pass | |
finally: | |
pass | |
try: | |
name_3 = tree.xpath(xpath_3)[0].strip(' \t\n\r') | |
print "offer percentage/price = " + name_3 | |
except Exception, e: | |
pass | |
finally: | |
pass | |
try: | |
name_5 = tree.xpath(xpath_5_link)[0].get('data-url') | |
link = 'http://www.flipkart.com'+name_5 | |
print "view/shop link = " + link | |
# print "view/shop link = " + name_5[0].strip(' \t\n\r') | |
except Exception, e: | |
pass | |
finally: | |
pass | |
mail_subject = str(name_1) + " " + str(name_2) | |
if mail_subject not in new_item_dict: | |
mail_content += str(name_1) + "\n" | |
mail_content += str(name_2) + "\n" | |
mail_content += str(name_4) + "\n" | |
mail_content += str(name_3) + "\n" | |
mail_content += str(link) + "\n" | |
new_item_dict[mail_subject] = mail_content | |
mail_me(mail_subject, mail_content) | |
play_alert() | |
except Exception, e: | |
print str(e) | |
finally: | |
pass | |
except Exception, e: | |
print 'Something Went Wrong :(' | |
print 'Exception: ', str(e) | |
pass | |
finally: | |
pass | |
def main(): | |
while True: | |
get_content_from_url_and_store() | |
time.sleep(5) | |
if __name__ == '__main__': | |
main() |
This comment has been minimized.
This comment has been minimized.
I guess its 5 Ms. and if its 5 Secs then its too much to loose. |
This comment has been minimized.
This comment has been minimized.
@Aadish26 time . sleep takes arguments as seconds. So, OP was in fact scraping the website every 5 seconds. |
This comment has been minimized.
This comment has been minimized.
I like it ! |
This comment has been minimized.
This comment has been minimized.
agree!, But it might happen that your program sent you a mail and you had opened link after 15 minute or more ,At that time product might be sold out. If you had opened link just in next second and it show status sold out then we can say, yes it was scam |
This comment has been minimized.
This comment has been minimized.
You are a true programmer man... Keep going.. |
This comment has been minimized.
This comment has been minimized.
moving from Gist to Github repo |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
why did you put sleep time as 5 seconds, you could have set it as 1 seconds. Who knows, you might have got what you really waned :-)