Skip to content

Instantly share code, notes, and snippets.

Created February 12, 2018 07:46
Show Gist options
  • Star 3 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save scrapehero/edc9d9dffd24402a9c176862d076db18 to your computer and use it in GitHub Desktop.
Save scrapehero/edc9d9dffd24402a9c176862d076db18 to your computer and use it in GitHub Desktop.
Python 3 Code for scraping movie details from
from lxml import html, etree
import datetime
import requests
import re
import os
import sys
import unicodecsv as csv
import argparse
import json
# from exceptions import ValueError
def parse(location, showdate):
print("Fetching Locations..")
searchedLocation = location
searchedDate = showdate
movie_listings = []
# Cookies for searching theater location
cookie = {
'akamai_generated_location': '{"zip":"""","city":"CLIFTON","state":"NJ","county":"PASSAIC","areacode":"""","lat":"40.8800","long":"-74.1446","countrycode":""""}'
# Headers to get location details from their auto complete query
location_headers = {
'referer': '',
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36',
'x-requested-with': 'XMLHttpRequest'
# Location autocomplete API endpoint
location_url = '' + searchedLocation
data = {
'zipCode': '',
'city': '',
'state': '',
'date': str(searchedDate),
'page': 1,
'favTheaterOnly': False,
'limit': 30,
'offset': 0,
'isdesktop': True
# Retrieving available locations
location_response = requests.get(location_url, cookies=cookie, headers=location_headers).json()
locations = location_response.get('resultsByType',{}).get('locations',{}).get('items',{})
if locations:
# Selecting first location from available locations
searched_location = locations[0]
searched_location_url = searched_location.get('link')
location_name = searched_location.get('name')
state = searched_location.get('state')
# Getting city from location name, city is necessary to get theater lists if you are passing location as input
city = location_name.split(',')[0].strip() if ',' in location_name else None
if city and state:
data['city'] = city
data['state'] = state
# city,state is not necessary if you are passing zipcode as input
data['zipCode'] = location_name
# Headers for getting theater listing for the searched location
theater_headers = {
'accept': '*/*',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'en-US,en;q=0.9,ml;q=0.8',
'referer': searched_location_url,
'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.75 Safari/537.36',
'x-requested-with': 'XMLHttpRequest'
movie_url = ''
# Fetching Movie details for search location
print("Fetching movie details")
movie_response = requests.get(movie_url, params=data, headers=location_headers).json()
print("Failed to get movie details")
all_theaters = movie_response.get('theaters')
if all_theaters:
# Iterating through each each theater
for theater in all_theaters:
theater_name = theater.get('name')
address = theater.get('address1')
city = theater.get('city')
state = theater.get('state')
zipcode = theater.get('zip')
theater_address = address + ' ' + city + ' ' + state + ' ' + zipcode
all_movies = theater.get('movies')
# Iterating through each movie in a thaater
if all_movies:
for movie in all_movies:
# cleaning data
movie_name = movie.get('title').strip()
duration = str(movie.get('runtime'))
genre = ','.join(' '.join(movie.get('genres')).split()).strip()
movie_rating = movie.get('rating')
star_rating = str(movie['stars']['totalRating']
movie_data = {
"Theatre_Name": theater_name,
"Theatre_Address": theater_address,
"Movie_Name": movie_name,
"Show_Date": searchedDate,
"Movie_Rating": movie_rating,
"Star_Rating": star_rating,
"Duration": duration,
"Genre": genre,
"Location_or_Zipcode": searchedLocation
print("No movies in %s"%(theater_name))
return movie_listings
print("No theaters found")
print("No location found")
if __name__ == "__main__":
''' eg-:python 20001 2017-12-31 '''
argparser = argparse.ArgumentParser()
argparser.add_argument('location', help='theater location (zipcode or city+state)', type=str)
argparser.add_argument('showdate', help='movie show time', type=str)
args = argparser.parse_args()
location = args.location
showdate = args.showdate
validdate = False
datetime.datetime.strptime(showdate, '%Y-%m-%d')
validdate =True
except ValueError:
print("Invalid showdate, showdate should be YYYY-MM-DD format")
if validdate:
searchdate = datetime.datetime.strptime(showdate, '%Y-%m-%d').date()
today = str('%Y-%m-%d'))
datenow = datetime.datetime.strptime(today,'%Y-%m-%d').date()
if searchdate >= datenow:
scraped_data = parse(location, showdate)
if scraped_data:
print("Writing data to output file")
with open('%s-%s-movie-results.csv' % (location, showdate), 'wb')as csvfile:
fieldnames = ['Theatre_Name', 'Theatre_Address', 'Movie_Name',
'Show_Date', 'Location_or_Zipcode', 'Duration', 'Genre', 'Movie_Rating', 'Star_Rating']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames, quoting=csv.QUOTE_ALL)
for data in scraped_data:
print("Your search for %s, in %s does not match any movies" % (location, showdate))
print("Entered date is already passed")
Copy link

lpuv commented Mar 19, 2019

I have exactly the same error. Any help would be greatly appreciated.
Traceback (most recent call last):
File "./", line 149, in
scraped_data = parse(location, showdate)
File "./", line 43, in parse
location_response = requests.get(location_url, cookies=cookie, headers=location_headers).json()
File "/Users/user/.virtualenvs/movies/lib/python3.7/site-packages/requests/", line 897, in json
return complexjson.loads(self.text, **kwargs)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/", line 348, in loads
return _default_decoder.decode(s)
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/", line 337, in decode
obj, end = self.raw_decode(s, idx=_w(s, 0).end())
File "/Library/Frameworks/Python.framework/Versions/3.7/lib/python3.7/json/", line 355, in raw_decode
raise JSONDecodeError("Expecting value", s, err.value) from None
json.decoder.JSONDecodeError: Expecting value: line 1 column 1 (char 0)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment