Skip to content

Instantly share code, notes, and snippets.

@sroy8091
Created July 7, 2016 10:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sroy8091/73b39453e97ccac27814d809116a8016 to your computer and use it in GitHub Desktop.
Save sroy8091/73b39453e97ccac27814d809116a8016 to your computer and use it in GitHub Desktop.
Script to fetch Hotel names, price and ratings to be inserted into a MySQL table
##Importing Required Libraries
from bs4 import BeautifulSoup
import requests
from __future__ import print_function
from sqlalchemy import *
import MySQLdb as my
import urllib2
##URl of Booking.Com to be fetched
##Default Location : Kolkata
##Date taken 20th June
url="http://www.booking.com/searchresults.html?label=gen173nr-1FCAEoggJCAlhYSDNiBW5vcmVmaGyIAQGYATG4AQ_IAQ_YAQHoAQH4AQKoAgM;sid=4de818f29a8ff3af42baceb8abc8c66e;dcid=1;checkin_monthday=20&checkin_year_month=2016-6&checkout_monthday=21&checkout_year_month=2016-6&class_interval=1&dest_id=-2092511&dest_type=city&dtdisc=0&group_adults=2&group_children=0&hlrd=0&hyb_red=0&inac=0&label_click=undef&nha_red=0&no_rooms=1&offset=0&postcard=0&redirected_from_city=0&redirected_from_landmark=0&redirected_from_region=0&review_score_group=empty&room1=A%2CA&sb_price_type=total&score_min=0&src=index&src_elem=sb&ss=Kolkata%2C%20West%20Bengal%2C%20India&ss_all=0&ss_raw=Kolkata&ssb=empty&sshis=0&"
##Set of Commands Used to Scrape data as follows :-
##hotels:- contain full data
##name:-Contains Name of hotels in Kolkata(minimum 10,max 16)
##score:- Contains Ratings of all the Hotels
##price:- Contains the prices of Hotels as on booking to be done on 20th June,2016
##details:- A list that contains name,rating,price accordingly for all the hotels
page=urllib2.urlopen(url)
soup=BeautifulSoup(page)
head = {"User-Agent":"Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.75 Safari/537.36"}
res = requests.get(url, headers=head)
soup = BeautifulSoup(res.text,"html.parser")
hotels = soup.select("#hotellist_inner div.sr_item.sr_item_new")
details = []
##details is a list that stores the details which fetches the required data##and stores them together to be accessed serially
##name,rating and price of hotels scraped all together
for hotel in hotels:
name = hotel.select("span.sr-hotel__name")
score = hotel.select("span.average.js--hp-scorecard-scoreval")
price = hotel.select("table div.sr-prc--num.sr-prc--final")
for i in range(0,len(name)):
#print name[i]
#print type(name[i].getText().encode("ascii", "ignore"))
details.append(name[i].getText().encode("ascii", "ignore"))
#print score[i].getText()
details.append(score[i].getText().encode("ascii", "ignore"))
for i in range(0,len(price)):
#print price[i].getText()
details.append(price[i].getText().encode("ascii", "ignore"))
##print(details)
##details = map(lambda s: s.strip(), details)
##print(details)
###print(details[0]+details[1]+details[2]+details[3]+details[4])
##Using SQLAlchemy
##engine for mysql database created
engine = create_engine('mysql://harimohan:root@localhost/foo',echo=True)
##MetaData() stores MetaData for the table to be created
metadata = MetaData()
##Hotel table created
hotel = Table('hotel', metadata,
Column('Hotel_Name', String(40), nullable=False),
Column('Hotel_Rating', String(6), nullable=False),
Column('Hotel_Price', String(20), nullable=False)
)
hotel=hotel.create(engine)
##MetaData of Table gathered
hotel = Table('hotel', metadata, autoload=True)
##Insert Query generated from metadata like INSERT INTO ..... Values('')
##Using hotel.insert() method
i = hotel.insert()
##Data inserted into the Table serially from the details list
k=0
if k < range(0,len(details))):
i.execute(Hotel_Name=details[k], Hotel_Rating=details[k+1], Hotel_Price=details[k+2])
k=k+3
##After Completion of Insertion Process all the data is selected like
##SELECT * FROM command using hotel.select() method.....and then using fetchall() method
s = hotel.select()
rs = s.execute()
rs.fetchall()
##Using MySQL DB
db = my.connect(host="127.0.0.1",user="root",passwd="",db="world")
data=details
cursor = db.cursor()
sql = "insert into hotel(Hotel_Name, Hotel_Rating, Hotel_Price) VALUES(%s, %s, %s)"
number_of_rows = cursor.executemany(sql, data)
db.commit()
db.close()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment