-
-
Save scrapehero/edc9d9dffd24402a9c176862d076db18 to your computer and use it in GitHub Desktop.
from lxml import html, etree | |
import datetime | |
import requests | |
import re | |
import os | |
import sys | |
import unicodecsv as csv | |
import argparse | |
import json | |
# from exceptions import ValueError | |
def parse(location, showdate): | |
print("Fetching Locations..") | |
searchedLocation = location | |
searchedDate = showdate | |
movie_listings = [] | |
# Cookies for searching theater location | |
cookie = { | |
'akamai_generated_location': '{"zip":"""","city":"CLIFTON","state":"NJ","county":"PASSAIC","areacode":"""","lat":"40.8800","long":"-74.1446","countrycode":""""}' | |
} | |
# Headers to get location details from their auto complete query | |
location_headers = { | |
'referer': 'https://www.fandango.com/', | |
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36', | |
'x-requested-with': 'XMLHttpRequest' | |
} | |
# Location autocomplete API endpoint | |
location_url = 'https://www.fandango.com/napi/home/autocompleteDesktopSearch/' + searchedLocation | |
data = { | |
'zipCode': '', | |
'city': '', | |
'state': '', | |
'date': str(searchedDate), | |
'page': 1, | |
'favTheaterOnly': False, | |
'limit': 30, | |
'offset': 0, | |
'isdesktop': True | |
} | |
# Retrieving available locations | |
location_response = requests.get(location_url, cookies=cookie, headers=location_headers).json() | |
locations = location_response.get('resultsByType',{}).get('locations',{}).get('items',{}) | |
if locations: | |
# Selecting first location from available locations | |
searched_location = locations[0] | |
searched_location_url = searched_location.get('link') | |
location_name = searched_location.get('name') | |
state = searched_location.get('state') | |
# Getting city from location name, city is necessary to get theater lists if you are passing location as input | |
city = location_name.split(',')[0].strip() if ',' in location_name else None | |
if city and state: | |
data['city'] = city | |
data['state'] = state | |
else: | |
# city,state is not necessary if you are passing zipcode as input | |
data['zipCode'] = location_name | |
# Headers for getting theater listing for the searched location | |
theater_headers = { | |
'accept': '*/*', | |
'accept-encoding': 'gzip, deflate, br', | |
'accept-language': 'en-US,en;q=0.9,ml;q=0.8', | |
'referer': searched_location_url, | |
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36', | |
'x-requested-with': 'XMLHttpRequest' | |
} | |
movie_url = 'https://www.fandango.com/napi/theaterswithshowtimes' | |
# Fetching Movie details for search location | |
print("Fetching movie details") | |
try: | |
movie_response = requests.get(movie_url, params=data, headers=location_headers).json() | |
except: | |
print("Failed to get movie details") | |
all_theaters = movie_response.get('theaters') | |
if all_theaters: | |
# Iterating through each each theater | |
for theater in all_theaters: | |
theater_name = theater.get('name') | |
address = theater.get('address1') | |
city = theater.get('city') | |
state = theater.get('state') | |
zipcode = theater.get('zip') | |
theater_address = address + ' ' + city + ' ' + state + ' ' + zipcode | |
all_movies = theater.get('movies') | |
# Iterating through each movie in a thaater | |
if all_movies: | |
for movie in all_movies: | |
# cleaning data | |
movie_name = movie.get('title').strip() | |
duration = str(movie.get('runtime')) | |
genre = ','.join(' '.join(movie.get('genres')).split()).strip() | |
movie_rating = movie.get('rating') | |
star_rating = str(movie['stars']['totalRating'] | |
['stars']['points']).strip() | |
movie_data = { | |
"Theatre_Name": theater_name, | |
"Theatre_Address": theater_address, | |
"Movie_Name": movie_name, | |
"Show_Date": searchedDate, | |
"Movie_Rating": movie_rating, | |
"Star_Rating": star_rating, | |
"Duration": duration, | |
"Genre": genre, | |
"Location_or_Zipcode": searchedLocation | |
} | |
movie_listings.append(movie_data) | |
else: | |
print("No movies in %s"%(theater_name)) | |
return movie_listings | |
else: | |
print("No theaters found") | |
else: | |
print("No location found") | |
if __name__ == "__main__": | |
''' eg-:python fandango.py 20001 2017-12-31 ''' | |
argparser = argparse.ArgumentParser() | |
argparser.add_argument('location', help='theater location (zipcode or city+state)', type=str) | |
argparser.add_argument('showdate', help='movie show time', type=str) | |
args = argparser.parse_args() | |
location = args.location | |
showdate = args.showdate | |
validdate = False | |
try: | |
datetime.datetime.strptime(showdate, '%Y-%m-%d') | |
validdate =True | |
except ValueError: | |
print("Invalid showdate, showdate should be YYYY-MM-DD format") | |
if validdate: | |
searchdate = datetime.datetime.strptime(showdate, '%Y-%m-%d').date() | |
today = str(datetime.datetime.today().strftime('%Y-%m-%d')) | |
datenow = datetime.datetime.strptime(today,'%Y-%m-%d').date() | |
if searchdate >= datenow: | |
scraped_data = parse(location, showdate) | |
if scraped_data: | |
print("Writing data to output file") | |
with open('%s-%s-movie-results.csv' % (location, showdate), 'wb')as csvfile: | |
fieldnames = ['Theatre_Name', 'Theatre_Address', 'Movie_Name', | |
'Show_Date', 'Location_or_Zipcode', 'Duration', 'Genre', 'Movie_Rating', 'Star_Rating'] | |
writer = csv.DictWriter(csvfile, fieldnames=fieldnames, quoting=csv.QUOTE_ALL) | |
writer.writeheader() | |
for data in scraped_data: | |
writer.writerow(data) | |
else: | |
print("Your search for %s, in %s does not match any movies" % (location, showdate)) | |
else: | |
print("Entered date is already passed") |
I have exactly the same error. Any help would be greatly appreciated.
Traceback:
Traceback (most recent call last):
File "./fandago.py", line 149, in
scraped_data = parse(location, showdate)
File "./fandago.py", line 43, in parse
location_response = requests.get(location_url, cookies=cookie, headers=location_headers).json()
File "/Users/user/.virtualenvs/movies/lib/python3.7/site-packages/requests/models.py", line 897, in json
return complexjson.loads(self.text, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/init.py", line 348, in loads
return _default_decoder.decode(s)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/decoder.py", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/decoder.py", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
This looks to be very useful, but I'm running into an error. Here is the full Trackback:
Traceback (most recent call last):
File "fandango.py", line 148, in
scraped_data = parse(location, showdate)
File "fandango.py", line 42, in parse
location_response = requests.get(location_url, cookies=cookie, headers=location_headers).json()
File "/Users/Doug/.pyenv/versions/cinenv/lib/python3.6/site-packages/requests/models.py", line 897, in json
return complexjson.loads(self.text, **kwargs)
File "/Users/Doug/.pyenv/versions/3.6.0/lib/python3.6/json/init.py", line 354, in loads
return _default_decoder.decode(s)
File "/Users/Doug/.pyenv/versions/3.6.0/lib/python3.6/json/decoder.py", line 339, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/Users/Doug/.pyenv/versions/3.6.0/lib/python3.6/json/decoder.py", line 357, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)
Any idea what I'm running into? Thanks!