Skip to content

Instantly share code, notes, and snippets.

@mlarocca
Last active August 29, 2015 14:00
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mlarocca/c1cf90848dbc60b42852 to your computer and use it in GitHub Desktop.
Save mlarocca/c1cf90848dbc60b42852 to your computer and use it in GitHub Desktop.
application: pweb-14
version: 1
runtime: python27
api_version: 1
threadsafe: yes
handlers:
- url: /favicon\.ico
static_files: favicon.ico
upload: favicon\.ico
- url: /
static_files: static/index.html
upload: /
- url: /json.*
script: main.app
- url: /videos.*
script: videos.app
- url: /youtube.*
script: videos.app
- url: /dailymotion.*
script: videos.app
- url: /vimeo.*
script: videos.app
libraries:
- name: webapp2
version: "2.5.2"
from video_search_handler import VideoSearchHandler
from google.appengine.api import urlfetch
from urllib import quote_plus
from json import loads as json_loads
from google.appengine.api import memcache
from datetime import datetime
import logging
DAILYMOTION_DEFAULT_MAX_RESULTS = 10
DAILYMOTION_MAX_RESULTS_PER_PAGE = 50
DAILYMOTION_SOURCE_STR = 'd'
DAILYMOTION_VIDEOS_URL = 'https://api.dailymotion.com/videos?'
DAILYMOTION_VIDEOS_RELATED_URL = 'https://api.dailymotion.com/video/%s/related?'
def merge_dicts(old_d, new_d):
for k in old_d:
if not k in new_d:
new_d[k] = old_d[k]
return new_d
def result_transform(search_result):
""" Filter results to retain only matching videos, and filter out channels and playlists.
: param search_result : The single search result to result_transform.
: type search_result : Dictionary
: return : A Dictionary with the relevant fields for this item.
"""
return {
'id': search_result.get('id', ""),
'url': search_result.get('url', ""),
'title': search_result.get('title', ""),
'thumbnail': search_result.get('thumbnail_url', ""),
'date': str(datetime.fromtimestamp(search_result['created_time'])) if 'created_time' in search_result else '',
'source': DAILYMOTION_SOURCE_STR
}
class DailyMotionSearchHandler(VideoSearchHandler):
""" This class handles queries on DailyMotion.
: param __VALID_ORDER_CRITERIA : A list of the valid values for the order criterion.
: type __VALID_ORDER_CRITERIA : Array
: param __DEFAULT_SORTING_CRITERION : The default value for the order parameter.
: type __DEFAULT_SORTING_CRITERION : String
"""
#list all the possible valid values for the order
__VALID_ORDER_CRITERIA = set(['relevance', 'recent', 'rated', 'visited', 'title', 'random', 'ranking'])
__DEFAULT_SORTING_CRITERION = 'relevance'
def __memcache_key(self, q, related_id, country):
""" Construct the key to store the results for the current query on memcached.
: param q : The keywords for the query
: type q : String
: param related_id : For related queries, the id of the video whose related results needs to be retrieved.
: type related_id : String | None
: param country : For queries restricted to particular Countries, the international 2-letter County code.
: type country : String | None
: return : The key to use for memcached.
"""
return DAILYMOTION_SOURCE_STR + '_' + q + (VideoSearchHandler.RELATED_PREFIX + related_id if related_id else "") + (VideoSearchHandler.COUNTRY_PREFIX + country if country else "")
def validate_order(self, criterion):
""" Validate the criterion passed, by verifying it is among the ones acceptable by the API
: param criterion : The order criterion to validate.
: type criterion : String
: return : The value passed, if it is validated, or the default sorting criterion, otherwise.
"""
return criterion if criterion in DailyMotionSearchHandler.__VALID_ORDER_CRITERIA else DailyMotionSearchHandler.__DEFAULT_SORTING_CRITERION
def __search_dailymotion_videos(self, keywords, params, related_id, country):
""" Perform the search on DailyMotion and return the list of videos retrieved.
: param keywords : The search terms for the query.
: type keywords : String
: param params : A list of the parameters in the request that are relevant for our search, like 'order', 'first_result', 'max_results'.
: type params : Dictionary
: param related_id : If the query is about retrieving results related to a single video, this parameter will contain the ID of that video.
: type related_id : String
: param regionCode : The international code (as a 2-letter string) for the Country from where the results should come from.
: type regionCode : String
"""
#Init a memcached client; It's important that each request instantiate a different memcached client, otherwise the gets-cas pattern won't work.
memcached_client = memcache.Client()
#try to retrieve data from the cache, if present
key = self.__memcache_key(keywords, related_id, country, params.get('order', None))
cached = self.get_from_memcached(key, memcached_client)
if cached is None:
cached = {} #If the query wasn't present in cache, just init the object to an empty container.
#Computes the index of the first result to be returned (by default, 1)
first_result = params['first_result'] if 'first_result' in params else 1
#Computes the index of the last result to be returned, based on the number of results requested by the user.
if params.get('max_results') is None:
last_result = first_result + DAILYMOTION_DEFAULT_MAX_RESULTS - 1
else:
last_result = first_result + params['max_results'] - 1
#Computes the index of the first and last page of results to retrieve from DailyMotion
first_page = int((first_result - 1) / DAILYMOTION_MAX_RESULTS_PER_PAGE) #floor division (forced even on newer versions on Python or if "future" division is imported)
last_page = int((last_result - 1) / DAILYMOTION_MAX_RESULTS_PER_PAGE) #floor division
#Prepare the mandatory parameters for the DailyMotion API
search_params = {
'search': quote_plus(keywords),
'fields': "id,url,title,thumbnail_url,created_time",
'limit': DAILYMOTION_MAX_RESULTS_PER_PAGE
}
#Handles the optional parameters
if not country is None:
search_params['country'] = country
if not params['order'] is None:
search_params['sort'] = params['order']
#DailyMotion requires different API calls for related and normal searches
if not related_id is None:
dm_url = DAILYMOTION_VIDEOS_RELATED_URL % str(related_id) #safe conversion
del search_params['sort'] #'sort' param not allowed by dailymotion on related queries
del search_params['search'] #'search' not allowed by dailymotion on related queries
else:
dm_url = DAILYMOTION_VIDEOS_URL
#Start the iteration to retrieve all the pages
page = first_page
results = []
while page <= last_page:
if page in cached:
#This page of the results is already cached.
results += cached[page]
page += 1
continue
#else: the results must be downloaded from DailyMotion
search_params['page'] = page + 1
#Build the URL for the API call
url = dm_url + '&'.join(["%s=%s" % (k, str(v)) for k,v in search_params.items()])
#Fetches the data from the API call
search_response = urlfetch.fetch(url)
#If the request has been successfull
if search_response.status_code == 200:
search_response = json_loads(search_response.content)
search_results = map(result_transform, search_response.get("list", []))
#logging.info("search_response " + str(search_response))
cached[page] = search_results
results += search_results
if search_response.get('has_more', False):
page += 1
else:
#The responsefrom DailyMotion states that this is the last page of results.
break
else:
#Couldn't retrieve the data: for consistency, the whole call is stopped.
self.return_error(search_response.status_code)
#Attempt to store what had been retrieved so far on memcached anyway.
self.store_to_memcached(cached, key, merge_dicts, memcached_client)
return None
#Attempt to store the result on memcached.
self.store_to_memcached(cached, key, merge_dicts, memcached_client)
#Adjust the indices to compensate for pages that we could avoid to download.
first_result -= first_page * DAILYMOTION_MAX_RESULTS_PER_PAGE
last_result -= first_page * DAILYMOTION_MAX_RESULTS_PER_PAGE
#Return only the subset of the retrieved results requested by the caller.
return results[first_result - 1:last_result]
def search_videos(self, keywords="", related_id=None, regionCode=None):
""" Main method: search the provider and returns the list of videos retrieved.
: param keywords : The search terms for the query.
: type keywords : String
: param related_id : If the query is about retrieving results related to a single video, this parameter will contain the ID of that video.
: type related_id : String
: param regionCode : The international code (as a 2-letter string) for the Country from where the results should come from.
: type regionCode : String
"""
res = self.__search_dailymotion_videos(keywords, params=self.extract_params(), related_id=related_id, country=regionCode)
if not res is None:
self.return_results(res)
import webapp2
import json
class VideoSearchHandler(webapp2.RequestHandler):
""" Base class for search handlers specific to video provider (YouTube, DailyMotion, Vimeo...)
: param RELATED_PREFIX : Common prefix used for memcached keys identifying queries for videos related to a specific (video's) id.
: type RELATED_PREFIX : String
: param COUNTRY_PREFIX : Common prefix used for memcached keys for queries restricted to a single country/region code.
: type COUNTRY_PREFIX : String
: param MEMCACHED_KEY_MAX_LEN : Maximum acceptable length for a memcached key.
: type MEMCACHED_KEY_MAX_LEN : Integer
"""
RELATED_PREFIX = "_ID_"
COUNTRY_PREFIX = "_CY_"
ORDER_PREFIX = "_O_"
MEMCACHED_KEY_MAX_LEN = 255
def get_from_memcached(self, key, memcached_client):
""" Attempt to retrieve a value from memcached
: param key : The memcached key to be retrieved.
: type key : String
: param memcached_client : the memcached client instance to use (so that the same instance can be used throughout a single class).
It's important that each request instantiate a different memcached client, otherwise the gets-cas pattern won't work.
: type memcached_client : Object
: return : the value retrieved from the cache, or None, if the key is not in cache.
"""
return memcached_client.gets(key)
def store_to_memcached(self, value, key, merge_values, memcached_client):
""" Attempt to store a value on memcached. If the key is longer than MEMCACHED_KEY_MAX_LEN characters, the value isn't stored.
If the value stored for key has been updated after value has been retrieved, the two values are merged
according to a function passed by the caller.
: param value : The value to be stored on memcached.
: param key : The memcached key to be retrieved.
: type key : String
: param merge_values : The function that will be used to merge values, in case of race conditions.
: type merge_values : Function
: param memcached_client : the memcached client instance to use (so that the same instance can be used throughout a single class)
: type memcached_client : Object
: return : True iff the key-value pair is successfully stored.
"""
#key might be too long: in that case, it would hashed, and we can't risk that 2 different searches hashes to the same value:
#it's better not to cash content in that case
if len(key) >= VideoSearchHandler.MEMCACHED_KEY_MAX_LEN:
return False
if memcached_client.cas(key, value):
#The value has been stored successfully
return True
else:
#Try to retrieve the value stored for the given key
tmp_value = memcached_client.gets(key)
if tmp_value is None:
#If the key is not stored in memcached, cas will always return false
return memcached_client.set(key, value)
else:
#There has been a race condition
while True:
#if cas returned false and the key is already set, then there must be a newer version stored on memcached
#how to merge the two versions, is left to the caller
value = merge_values(value, tmp_value)
if memcached_client.cas(key, value):
#The value has been stored successfully
return True
else:
#retrieve the lastest valued stored for the given key
value = tmp_value
tmp_value = memcached_client.gets(key)
def extract_positive_int(self, param_name, default_value):
""" Retrieve a parameter from the request, and check that the value retrieved
is a positive integer.
: param param_name : The name of the parameter to retrieve.
: type param_name : String
: param default_value : The default value that will be returned if the parameter is not
present in the request, or if its value is not valid.
: type default_value : Integer (positive)
: return : The validated value, or the default value, if validation fails.
"""
try:
#Try to get the value from the request (if it isn't set, the default value will be stored in v)
v = int(self.request.get(param_name, default_value=default_value))
if v <= 0:
return default_value
else:
return v
except TypeError:
return default_value
def extract_params(self):
""" Retrieve the parameters we need from the request, and return a dictionary with them.
: return : A dictionary containing name-value pairs for all the parameters needed to query the video providers.
"""
return {
'first_result': self.extract_positive_int('first_result', 1),
'max_results': self.extract_positive_int('max_results', None),
'order': self.validate_order(self.request.get('order', default_value=None))
}
def return_error(self, error_code, error_message=""):
""" Respond to the HTTP request with an error code, and a mesage.
: param error_code : The HTTP error code to be returned (should be a 4XY code).
: type error_code : Integer
: param error_message : The error message to display with the response.
: type error_message : String
"""
self.response.clear()
self.response.set_status(error_code, error_message)
self.response.out.write(error_message)
def return_results(self, results):
""" Respond to the HTTP request with a JSON response.
: param results : The results to be returned.
: type results : Dictionary
"""
self.response.headers['Content-Type'] = 'application/json'
self.response.out.write(json.dumps(results))
import webapp2
from apiclient.errors import HttpError
from youtube import YouTubeSearchHandler
from dailymotion import DailyMotionSearchHandler
from vimeo import VimeoSearchHandler
from google.appengine.api import memcache
from google.appengine.api.urlfetch import fetch
import logging
class SimpleVideoSearchHandler(YouTubeSearchHandler):
def get(self, keywords):
self.search_videos(keywords)
class RelatedVideoSearchHandler(YouTubeSearchHandler):
def get(self, related_id):
self.search_videos(related_id=related_id)
class VideoSearchHandlerWithRegion(YouTubeSearchHandler):
def get(self, keywords, regionCode):
self.search_videos(keywords, regionCode=regionCode.upper())
class SimpleYouTubeSearchHandler(YouTubeSearchHandler):
def get(self, keywords):
self.search_videos(keywords)
class RelatedYouTubeSearchHandler(YouTubeSearchHandler):
def get(self, related_id):
self.search_videos(related_id=related_id)
class YouTubeSearchHandlerWithRegion(YouTubeSearchHandler):
def get(self, keywords, regionCode):
self.search_videos(keywords, regionCode=regionCode.upper())
class SimpleDailyMotionSearchHandler(DailyMotionSearchHandler):
def get(self, keywords):
self.search_videos(keywords)
class RelatedDailyMotionSearchHandler(DailyMotionSearchHandler):
def get(self, related_id):
self.search_videos(related_id=related_id)
class DailyMotionSearchHandlerWithRegion(DailyMotionSearchHandler):
def get(self, keywords, regionCode):
self.search_videos(keywords, regionCode=regionCode.upper())
class SimpleVimeoSearchHandler(VimeoSearchHandler):
def get(self, keywords):
self.search_videos(keywords)
class RelatedVimeoSearchHandler(VimeoSearchHandler):
def get(self, related_id):
self.search_videos(related_id=related_id)
class VimeoSearchHandlerWithRegion(VimeoSearchHandler):
def get(self, keywords, regionCode):
self.search_videos(keywords, regionCode=regionCode.upper())
app = webapp2.WSGIApplication([
('/videos/([^/]+)/?', SimpleVideoSearchHandler),
('/videos/([^/]+)/countries/([^/]+)/?', VideoSearchHandlerWithRegion),
('/videos/related/([^/]+)/?', RelatedVideoSearchHandler),
('/youtube/([^/]+)/?', SimpleYouTubeSearchHandler),
('/youtube/([^/]+)/countries/([^/]+)/?', YouTubeSearchHandlerWithRegion),
('/youtube/related/([^/]+)/?', RelatedYouTubeSearchHandler),
('/dailymotion/([^/]+)/?', SimpleDailyMotionSearchHandler),
('/dailymotion/([^/]+)/countries/([^/]+)/?', DailyMotionSearchHandlerWithRegion),
('/dailymotion/related/([^/]+)/?', RelatedDailyMotionSearchHandler),
('/vimeo/([^/]+)/?', SimpleVimeoSearchHandler),
('/vimeo/([^/]+)/countries/([^/]+)/?', VimeoSearchHandlerWithRegion),
('/vimeo/related/([^/]+)/?', RelatedVimeoSearchHandler)
], debug=True)
from video_search_handler import VideoSearchHandler
from google.appengine.api import urlfetch
from urllib import quote_plus, urlencode
from json import loads as json_loads
from google.appengine.api import memcache
from datetime import datetime
import base64
import logging
VIMEO_DEFAULT_MAX_RESULTS = 5
VIMEO_MAX_RESULTS_PER_PAGE = 50
VIMEO_SOURCE_STR = 'v'
VIMEO_VIDEOS_URL = 'https://api.vimeo.com/videos?'
VIMEO_VIDEOS_RELATED_URL = 'https://api.vimeo.com/videos/%s/related?filter=related&'
VIMEO_CLIENT_ID = "REPLACE_ME"
VIMEO_CLIENT_SECRET = "REPLACE_ME"
VIMEO_OAUTH_HEADERS = {
'Accept': 'application/vnd.vimeo.*+json;version=3.0',
'Authorization': 'REPLACE_ME'
}
def merge_dicts(old_d, new_d):
for k in old_d:
if not k in new_d:
new_d[k] = old_d[k]
return new_d
def result_transform(search_result):
""" Filter results to retain only matching videos, and filter out channels and playlists.
: param search_result : The single search result to result_transform.
: type search_result : Dictionary
: return : A Dictionary with the relevant fields for this item.
"""
pictures = search_result.get('pictures', [])
if len(pictures) == 0:
thumbnail = ""
elif len(pictures) < 3:
thumbnail = pictures[0].get("link", "")
else:
thumbnail = pictures[-3].get("link", "") #the 3rd to last picture is a medium sized thumbnail,
#the last picture is usually the smallest
return {
'id': search_result.get('uri', "/ / ").split("/")[2],
'url': search_result.get('link', ""),
'title': search_result.get('description', ""),
'thumbnail': thumbnail,
'date': search_result['created_time'],
'source': VIMEO_SOURCE_STR
}
class VimeoSearchHandler(VideoSearchHandler):
""" This class handles queries on Vimeo.
: param __VALID_ORDER_CRITERIA : A list of the valid values for the order criterion.
: type __VALID_ORDER_CRITERIA : Array
: param __DEFAULT_SORTING_CRITERION : The default value for the order parameter.
: type __DEFAULT_SORTING_CRITERION : String
"""
#list all the possible valid values for the order
__VALID_ORDER_CRITERIA = set(['relevant', 'date', 'likes', 'plays', 'comments'])
__DEFAULT_SORTING_CRITERION = 'relevant'
def get_access_token(self, cid, secret, api_url='https://api.vimeo.com/oauth/authorize/client'):
"""
Generates a new access token given the authorization code generated by the page
at get_auth_url().
In the context of a web server, the programmer should retrieve the auth_code
generated by the page at get_auth_url() and use it as the input to this function.
The programmer should then use the string returned from this function to
authenticate calls to the API library on behalf of the corresponding user.
Note: The following URLs must be identical:
This function's redirect parameter
The redirect parameter passed to get_access_token
One of the redirect URIs listed on the app setup page
Args:
api_url - The root url of the API being used for unauthenticated authorization
cid - The client ID for the current app
secret - The client secret for the current app
"""
encoded = base64.b64encode("%s:%s" % (cid, secret))
payload = {
"grant_type": "client_credentials",
"scope": "public create"
}
headers = {
"Accept": "application/vnd.vimeo.*+json; version=3.0",
"Authorization": "basic %s" % encoded
}
response = urlfetch.fetch(api_url,
method="POST",
headers=headers,
payload=urlencode(payload),
)
if response.status_code != 200:
raise ValueError(response.status_code)
else:
return json_loads(response.content) #['access_token']
def __memcache_key(self, q, related_id, country, order):
""" Construct the key to store the results for the current query on memcached.
: param q : The keywords for the query
: type q : String
: param related_id : For related queries, the id of the video whose related results needs to be retrieved.
: type related_id : String | None
: param country : For queries restricted to particular Countries, the international 2-letter County code.
: type country : String | None
: return : The key to use for memcached.
"""
return VIMEO_SOURCE_STR + '_' + q + (VideoSearchHandler.RELATED_PREFIX + related_id if related_id else "") + (VideoSearchHandler.COUNTRY_PREFIX + country if country else "") + (VideoSearchHandler.ORDER_PREFIX + order if order else "")
def validate_order(self, criterion):
""" Validate the criterion passed, by verifying it is among the ones acceptable by the API
: param criterion : The order criterion to validate.
: type criterion : String
: return : The value passed, if it is validated, or the default sorting criterion, otherwise.
"""
return criterion if criterion in VimeoSearchHandler.__VALID_ORDER_CRITERIA else VimeoSearchHandler.__DEFAULT_SORTING_CRITERION
def __search_dailymotion_videos(self, keywords, params, related_id, country):
""" Perform the search on Vimeo and return the list of videos retrieved.
: param keywords : The search terms for the query.
: type keywords : String
: param params : A list of the parameters in the request that are relevant for our search, like 'order', 'first_result', 'max_results'.
: type params : Dictionary
: param related_id : If the query is about retrieving results related to a single video, this parameter will contain the ID of that video.
: type related_id : String
: param regionCode : The international code (as a 2-letter string) for the Country from where the results should come from.
: type regionCode : String
"""
#Init a memcached client; It's important that each request instantiate a different memcached client, otherwise the gets-cas pattern won't work.
memcached_client = memcache.Client()
#try to retrieve data from the cache, if present
key = self.__memcache_key(keywords, related_id, country, params.get('order', None))
cached = self.get_from_memcached(key, memcached_client)
if cached is None:
cached = {} #If the query wasn't present in cache, just init the object to an empty container.
#Computes the index of the first result to be returned (by default, 1)
first_result = params['first_result'] if 'first_result' in params else 1
#Computes the index of the last result to be returned, based on the number of results requested by the user.
if params.get('max_results') is None:
last_result = first_result + VIMEO_DEFAULT_MAX_RESULTS - 1
else:
last_result = first_result + params['max_results'] - 1
#Computes the index of the first and last page of results to retrieve from Vimeo
first_page = int((first_result - 1) / VIMEO_MAX_RESULTS_PER_PAGE) #floor division (forced even on newer versions on Python or if "future" division is imported)
last_page = int((last_result - 1) / VIMEO_MAX_RESULTS_PER_PAGE) #floor division
#Prepare the mandatory parameters for the Vimeo API
search_params = {
'query': quote_plus(keywords),
#'fields': "id,url,title,thumbnail_url,created_time",
'per_page': VIMEO_MAX_RESULTS_PER_PAGE
}
#Handles the optional parameters
#if not country is None:
# search_params['country'] = country
if not params['order'] is None:
search_params['sort'] = params['order']
if related_id is None:
api_URL = VIMEO_VIDEOS_URL
else:
api_URL = VIMEO_VIDEOS_RELATED_URL % str(related_id) #safe conversion
#Start the iteration to retrieve all the pages
page = first_page
results = []
while page <= last_page:
if page in cached:
#This page of the results is already cached.
results += cached[page]
page += 1
continue
#else: the results must be downloaded from Vimeo
search_params['page'] = page + 1
#Build the URL for the API call
url = api_URL + '&'.join(["%s=%s" % (k, str(v)) for k,v in search_params.items()])
#Fetches the data from the API call
search_response = urlfetch.fetch(url, method="GET", headers=VIMEO_OAUTH_HEADERS)
#If the request has been successfull
if search_response.status_code == 200:
search_response = json_loads(search_response.content)
search_results = map(result_transform, search_response.get("data", []))
cached[page] = search_results
results += search_results
if not search_response.get('paging', {}).get("next", False) is None:
page += 1
else:
#The responsefrom Vimeo states that this is the last page of results.
break
else:
#Couldn't retrieve the data: for consistency, the whole call is stopped.
self.return_error(search_response.status_code)
#Attempt to store what had been retrieved so far on memcached anyway.
self.store_to_memcached(cached, key, merge_dicts, memcached_client)
return None
#Attempt to store the result on memcached.
self.store_to_memcached(cached, key, merge_dicts, memcached_client)
#Adjust the indices to compensate for pages that we could avoid to download.
first_result -= first_page * VIMEO_MAX_RESULTS_PER_PAGE
last_result -= first_page * VIMEO_MAX_RESULTS_PER_PAGE
#Return only the subset of the retrieved results requested by the caller.
return results[first_result - 1:last_result]
def search_videos(self, keywords="", related_id=None, regionCode=None):
""" Main method: search the provider and returns the list of videos retrieved.
: param keywords : The search terms for the query.
: type keywords : String
: param related_id : If the query is about retrieving results related to a single video, this parameter will contain the ID of that video.
: type related_id : String
: param regionCode : The international code (as a 2-letter string) for the Country from where the results should come from.
: type regionCode : String
"""
auth_token = self.get_access_token(cid=VIMEO_CLIENT_ID, secret=VIMEO_CLIENT_SECRET)
VIMEO_OAUTH_HEADERS['Authorization'] = 'bearer %s' % auth_token['access_token']
logging.warn(auth_token)
res = self.__search_dailymotion_videos(keywords, params=self.extract_params(), related_id=related_id, country=regionCode)
if not res is None:
self.return_results(res)
from video_search_handler import VideoSearchHandler
from apiclient.discovery import build
from apiclient.errors import HttpError
from google.appengine.api import memcache
import logging
DEVELOPER_KEY = "REPLACE_ME"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
YOUTUBE_DEFAULT_MAX_RESULTS = 5
YOUTUBE_MAX_RESULTS_PER_PAGE = 50
YOUTUBE_VIDEO_URL_PREFIX = "https://www.youtube.com/watch?v="
YOUTUBE_SOURCE_STR = 'y'
PAGE_TOKES_KEYS = 'youtube_page_tokens'
def merge_dicts(old_d, new_d):
for k in old_d:
if not k in new_d:
new_d[k] = old_d[k]
return new_d
class YouTubeSearchHandler(VideoSearchHandler):
""" This class handles queries on YouTube.
: param __VALID_ORDER_CRITERIA : A list of the valid values for the order criterion.
: type __VALID_ORDER_CRITERIA : Array
: param __DEFAULT_SORTING_CRITERION : The default value for the order parameter.
: type __DEFAULT_SORTING_CRITERION : String
"""
#list all the possible valid values for the order
__VALID_ORDER_CRITERIA = set(['date', 'rating', 'relevance', 'title', 'videoCount', 'viewCount'])
__DEFAULT_SORTING_CRITERION = 'relevance'
def __memcache_key(self, q, related_id, country):
""" Construct the key to store the results for the current query on memcached.
: param q : The keywords for the query
: type q : String
: param related_id : For related queries, the id of the video whose related results needs to be retrieved.
: type related_id : String | None
: param country : For queries restricted to particular Countries, the international 2-letter County code.
: type country : String | None
: return : The key to use for memcached.
"""
return YOUTUBE_SOURCE_STR + '_' + q + (VideoSearchHandler.RELATED_PREFIX + related_id if related_id else "") + (VideoSearchHandler.COUNTRY_PREFIX + country if country else "")
def validate_order(self, criterion):
""" Validate the criterion passed, by verifying it is among the ones acceptable by the API
: param criterion : The order criterion to validate.
: type criterion : String
: return : The value passed, if it is validated, or the default sorting criterion, otherwise.
"""
return criterion if criterion in YouTubeSearchHandler.__VALID_ORDER_CRITERIA else YouTubeSearchHandler.__DEFAULT_SORTING_CRITERION
def __search_youtube_videos(self, keywords, params, related_id, regionCode):
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)
memcached_client = memcache.Client()
# Filter results to retaun only matching videos, and filter out channels and playlists.
result_transform = lambda search_result: {
'id': search_result['id']['videoId'],
'url': YOUTUBE_VIDEO_URL_PREFIX + search_result['id']['videoId'],
'title': search_result['snippet']['title'],
'thumbnail': search_result['snippet']['thumbnails']['default']['url'],
'date': search_result['snippet']['publishedAt'],
'source': YOUTUBE_SOURCE_STR
}
#try to retrieve data from the cache, if present
key = self.__memcache_key(keywords, related_id, regionCode, params.get('order', None))
cached = self.get_from_memcached(key, memcached_client)
if cached is None:
cached = {}
first_result = params['first_result'] if 'first_result' in params else 1
if params['max_results'] is None:
params['max_results'] = YOUTUBE_DEFAULT_MAX_RESULTS
last_result = first_result + params['max_results'] - 1
#else:
first_page = int((first_result - 1) / YOUTUBE_MAX_RESULTS_PER_PAGE) #floor division
last_page = int((last_result - 1) / YOUTUBE_MAX_RESULTS_PER_PAGE) #floor division
page_tokens = self.get_from_memcached(PAGE_TOKES_KEYS, memcached_client)
if page_tokens is None:
page_tokens = {
0: None #placeHolder so that max(page_tokens) won't trigger a ValueError
}
#prepare the parameters for the list method
search_params = {
'q':keywords,
'part':"id,snippet",
'type':'video',
'maxResults': YOUTUBE_MAX_RESULTS_PER_PAGE
}
if not related_id is None:
search_params['relatedToVideoId'] = related_id
if not regionCode is None:
search_params['regionCode'] = regionCode
if not params['order'] is None:
search_params['order'] = params['order']
page = min(max(page_tokens), first_page)
#search_params['maxResults'] = YOUTUBE_MAX_RESULTS_PER_PAGE
results = []
while page <= last_page:
if page in cached:
#this page of the results is already cached
results += cached[page]
page += 1
continue
if page > 0:
#which page should be retrieved?
#Invariant: page in page_tokens
search_params['pageToken'] = page_tokens[page]
# Call the search.list method to retrieve results matching the keywords.
try:
search_response = youtube.search().list(
**search_params #unpack the dictionary to a list of named parameters
).execute()
except HttpError as e:
logging.warning("YOUTUBE API ERROR " + str(e))
self.return_error(400, e)
return None
search_results = map(result_transform, search_response.get("items", []))
cached[page] = search_results
results += search_results
page += 1
if 'nextPageToken' in page_tokens:
page_tokens[page] = search_response['nextPageToken']
else:
#there is not a next page to download
break
self.store_to_memcached(cached, key, merge_dicts, memcached_client)
self.store_to_memcached(page_tokens, PAGE_TOKES_KEYS, merge_dicts, memcached_client)
first_result -= first_page * YOUTUBE_MAX_RESULTS_PER_PAGE
last_result -= first_page * YOUTUBE_MAX_RESULTS_PER_PAGE
return results[first_result - 1:last_result]
def search_videos(self, keywords="", related_id=None, regionCode=None):
""" Main method: search the provider and returns the list of videos retrieved.
: param keywords : The search terms for the query.
: type keywords : String
: param related_id : If the query is about retrieving results related to a single video, this parameter will contain the ID of that video.
: type related_id : String
: param regionCode : The international code (as a 2-letter string) for the Country from where the results should come from.
: type regionCode : String
"""
res = self.__search_youtube_videos(keywords, params=self.extract_params(), related_id=related_id, regionCode=regionCode)
if not res is None:
self.return_results(res)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment