Skip to content

Instantly share code, notes, and snippets.

@scrapehero
Last active December 20, 2017 10:47
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save scrapehero/30e9c446a709dbce5d752e18cd21fb25 to your computer and use it in GitHub Desktop.
Save scrapehero/30e9c446a709dbce5d752e18cd21fb25 to your computer and use it in GitHub Desktop.
import requests
import json
import unicodecsv as csv
import argparse
from argparse import RawTextHelpFormatter
def parse(location,sort):
print "Retrieving Location Details"
location_details_url = "https://www.viator.com/ajaxSegmentSearch.jspa?term=%s"%(location)
location_response = requests.get(location_details_url).text
json_location_response = json.loads(location_response)
destinations = json_location_response['destinations'][0]
location_id = destinations['id']
location_relative_url = destinations['url']
PlaceNameClean = destinations['fullPlaceNameClean']
location_url = 'https://www.viator.com'+location_relative_url
print "Retrieving destinations"
destination_listing_link = 'https://www.viator.com/api/product.jspa?destinationID='+location_id+'&currency=USD&pageLister.pageSize=45&criteria.sortBy=%s'%(sort)
destination_response = requests.get(destination_listing_link).text
destination_json_response = json.loads(destination_response)
destination_list = []
for dest in destination_json_response['pagedList']:
tour_name = dest['s_entryName'][0] if 's_entryName' in dest.keys() else None
destination_id = dest['sts_productCode'][0] if 'sts_productCode' in dest.keys() else None
rating = dest['st_avgRating_A-SHOP'][0] if 'st_avgRating_A-SHOP' in dest.keys() else None
reviews = dest['it_totalReviews_A-SHOP'][0] if 'it_totalReviews_A-SHOP' in dest.keys() else None
location = dest['s_primaryDestName'][0] if 's_primaryDestName' in dest.keys() else None
price = dest['priceFrom'][0] if 'priceFrom' in dest.keys() else None
cleaned_duration = dest['st_durationSort'][0] if 'st_durationSort' in dest.keys() else None
if cleaned_duration:
duration = float(cleaned_duration)/60
else:
duration = None
description = ' '.join(''.join(dest['s_productText'][0]).split()) if 's_productText' in dest.keys() else None
url_PlaceNameClean = '-'.join(''.join(PlaceNameClean).split())
url_tour_name = '-'.join(''.join(tour_name).split())
url = 'https://www.viator.com/tours/%s/%s/d%s-%s'%(url_PlaceNameClean,url_tour_name,location_id,destination_id)
tour_destinations = {
'tour_name':tour_name,
'rating':rating,
'reviews':reviews,
'location':location,
'price':price,
'duration':duration,
'description':description,
'url':url
}
destination_list.append(tour_destinations)
return destination_list
if __name__ == '__main__':
parser = argparse.ArgumentParser(formatter_class=RawTextHelpFormatter)
parser.add_argument('location',help = 'Location name')
sortorder_help = """
available sort orders are :
POPULARITY: Top Seller,
REVIEW_AVG_RATING-D: Top rated destinations,
PRICE_FROM: Price(Low - High),
PRICE_FROM-D: Price(High - Low),
DURATION_SORT-DB: Duration(Short - Long),
DURATION_SORT-T: Duration(Long - Short)
"""
parser.add_argument('sort',help = sortorder_help,default ='POPULARITY ')
args = parser.parse_args()
location = args.location
sort = args.sort
data = parse(location,sort)
print "Writing to output file viator_data.csv"
with open('viator_data.csv','w')as csvfile:
fieldnames = ['tour_name','rating','reviews','location','price','duration','description','url']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames,quoting=csv.QUOTE_ALL)
writer.writeheader()
for row in data:
writer.writerow(row)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment