-
-
Save mlarocca/c1cf90848dbc60b42852 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
application: pweb-14 | |
version: 1 | |
runtime: python27 | |
api_version: 1 | |
threadsafe: yes | |
handlers: | |
- url: /favicon\.ico | |
static_files: favicon.ico | |
upload: favicon\.ico | |
- url: / | |
static_files: static/index.html | |
upload: / | |
- url: /json.* | |
script: main.app | |
- url: /videos.* | |
script: videos.app | |
- url: /youtube.* | |
script: videos.app | |
- url: /dailymotion.* | |
script: videos.app | |
- url: /vimeo.* | |
script: videos.app | |
libraries: | |
- name: webapp2 | |
version: "2.5.2" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from video_search_handler import VideoSearchHandler | |
from google.appengine.api import urlfetch | |
from urllib import quote_plus | |
from json import loads as json_loads | |
from google.appengine.api import memcache | |
from datetime import datetime | |
import logging | |
DAILYMOTION_DEFAULT_MAX_RESULTS = 10 | |
DAILYMOTION_MAX_RESULTS_PER_PAGE = 50 | |
DAILYMOTION_SOURCE_STR = 'd' | |
DAILYMOTION_VIDEOS_URL = 'https://api.dailymotion.com/videos?' | |
DAILYMOTION_VIDEOS_RELATED_URL = 'https://api.dailymotion.com/video/%s/related?' | |
def merge_dicts(old_d, new_d): | |
for k in old_d: | |
if not k in new_d: | |
new_d[k] = old_d[k] | |
return new_d | |
def result_transform(search_result): | |
""" Filter results to retain only matching videos, and filter out channels and playlists. | |
: param search_result : The single search result to result_transform. | |
: type search_result : Dictionary | |
: return : A Dictionary with the relevant fields for this item. | |
""" | |
return { | |
'id': search_result.get('id', ""), | |
'url': search_result.get('url', ""), | |
'title': search_result.get('title', ""), | |
'thumbnail': search_result.get('thumbnail_url', ""), | |
'date': str(datetime.fromtimestamp(search_result['created_time'])) if 'created_time' in search_result else '', | |
'source': DAILYMOTION_SOURCE_STR | |
} | |
class DailyMotionSearchHandler(VideoSearchHandler): | |
""" This class handles queries on DailyMotion. | |
: param __VALID_ORDER_CRITERIA : A list of the valid values for the order criterion. | |
: type __VALID_ORDER_CRITERIA : Array | |
: param __DEFAULT_SORTING_CRITERION : The default value for the order parameter. | |
: type __DEFAULT_SORTING_CRITERION : String | |
""" | |
#list all the possible valid values for the order | |
__VALID_ORDER_CRITERIA = set(['relevance', 'recent', 'rated', 'visited', 'title', 'random', 'ranking']) | |
__DEFAULT_SORTING_CRITERION = 'relevance' | |
def __memcache_key(self, q, related_id, country): | |
""" Construct the key to store the results for the current query on memcached. | |
: param q : The keywords for the query | |
: type q : String | |
: param related_id : For related queries, the id of the video whose related results needs to be retrieved. | |
: type related_id : String | None | |
: param country : For queries restricted to particular Countries, the international 2-letter County code. | |
: type country : String | None | |
: return : The key to use for memcached. | |
""" | |
return DAILYMOTION_SOURCE_STR + '_' + q + (VideoSearchHandler.RELATED_PREFIX + related_id if related_id else "") + (VideoSearchHandler.COUNTRY_PREFIX + country if country else "") | |
def validate_order(self, criterion): | |
""" Validate the criterion passed, by verifying it is among the ones acceptable by the API | |
: param criterion : The order criterion to validate. | |
: type criterion : String | |
: return : The value passed, if it is validated, or the default sorting criterion, otherwise. | |
""" | |
return criterion if criterion in DailyMotionSearchHandler.__VALID_ORDER_CRITERIA else DailyMotionSearchHandler.__DEFAULT_SORTING_CRITERION | |
def __search_dailymotion_videos(self, keywords, params, related_id, country): | |
""" Perform the search on DailyMotion and return the list of videos retrieved. | |
: param keywords : The search terms for the query. | |
: type keywords : String | |
: param params : A list of the parameters in the request that are relevant for our search, like 'order', 'first_result', 'max_results'. | |
: type params : Dictionary | |
: param related_id : If the query is about retrieving results related to a single video, this parameter will contain the ID of that video. | |
: type related_id : String | |
: param regionCode : The international code (as a 2-letter string) for the Country from where the results should come from. | |
: type regionCode : String | |
""" | |
#Init a memcached client; It's important that each request instantiate a different memcached client, otherwise the gets-cas pattern won't work. | |
memcached_client = memcache.Client() | |
#try to retrieve data from the cache, if present | |
key = self.__memcache_key(keywords, related_id, country, params.get('order', None)) | |
cached = self.get_from_memcached(key, memcached_client) | |
if cached is None: | |
cached = {} #If the query wasn't present in cache, just init the object to an empty container. | |
#Computes the index of the first result to be returned (by default, 1) | |
first_result = params['first_result'] if 'first_result' in params else 1 | |
#Computes the index of the last result to be returned, based on the number of results requested by the user. | |
if params.get('max_results') is None: | |
last_result = first_result + DAILYMOTION_DEFAULT_MAX_RESULTS - 1 | |
else: | |
last_result = first_result + params['max_results'] - 1 | |
#Computes the index of the first and last page of results to retrieve from DailyMotion | |
first_page = int((first_result - 1) / DAILYMOTION_MAX_RESULTS_PER_PAGE) #floor division (forced even on newer versions on Python or if "future" division is imported) | |
last_page = int((last_result - 1) / DAILYMOTION_MAX_RESULTS_PER_PAGE) #floor division | |
#Prepare the mandatory parameters for the DailyMotion API | |
search_params = { | |
'search': quote_plus(keywords), | |
'fields': "id,url,title,thumbnail_url,created_time", | |
'limit': DAILYMOTION_MAX_RESULTS_PER_PAGE | |
} | |
#Handles the optional parameters | |
if not country is None: | |
search_params['country'] = country | |
if not params['order'] is None: | |
search_params['sort'] = params['order'] | |
#DailyMotion requires different API calls for related and normal searches | |
if not related_id is None: | |
dm_url = DAILYMOTION_VIDEOS_RELATED_URL % str(related_id) #safe conversion | |
del search_params['sort'] #'sort' param not allowed by dailymotion on related queries | |
del search_params['search'] #'search' not allowed by dailymotion on related queries | |
else: | |
dm_url = DAILYMOTION_VIDEOS_URL | |
#Start the iteration to retrieve all the pages | |
page = first_page | |
results = [] | |
while page <= last_page: | |
if page in cached: | |
#This page of the results is already cached. | |
results += cached[page] | |
page += 1 | |
continue | |
#else: the results must be downloaded from DailyMotion | |
search_params['page'] = page + 1 | |
#Build the URL for the API call | |
url = dm_url + '&'.join(["%s=%s" % (k, str(v)) for k,v in search_params.items()]) | |
#Fetches the data from the API call | |
search_response = urlfetch.fetch(url) | |
#If the request has been successfull | |
if search_response.status_code == 200: | |
search_response = json_loads(search_response.content) | |
search_results = map(result_transform, search_response.get("list", [])) | |
#logging.info("search_response " + str(search_response)) | |
cached[page] = search_results | |
results += search_results | |
if search_response.get('has_more', False): | |
page += 1 | |
else: | |
#The responsefrom DailyMotion states that this is the last page of results. | |
break | |
else: | |
#Couldn't retrieve the data: for consistency, the whole call is stopped. | |
self.return_error(search_response.status_code) | |
#Attempt to store what had been retrieved so far on memcached anyway. | |
self.store_to_memcached(cached, key, merge_dicts, memcached_client) | |
return None | |
#Attempt to store the result on memcached. | |
self.store_to_memcached(cached, key, merge_dicts, memcached_client) | |
#Adjust the indices to compensate for pages that we could avoid to download. | |
first_result -= first_page * DAILYMOTION_MAX_RESULTS_PER_PAGE | |
last_result -= first_page * DAILYMOTION_MAX_RESULTS_PER_PAGE | |
#Return only the subset of the retrieved results requested by the caller. | |
return results[first_result - 1:last_result] | |
def search_videos(self, keywords="", related_id=None, regionCode=None): | |
""" Main method: search the provider and returns the list of videos retrieved. | |
: param keywords : The search terms for the query. | |
: type keywords : String | |
: param related_id : If the query is about retrieving results related to a single video, this parameter will contain the ID of that video. | |
: type related_id : String | |
: param regionCode : The international code (as a 2-letter string) for the Country from where the results should come from. | |
: type regionCode : String | |
""" | |
res = self.__search_dailymotion_videos(keywords, params=self.extract_params(), related_id=related_id, country=regionCode) | |
if not res is None: | |
self.return_results(res) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import webapp2 | |
import json | |
class VideoSearchHandler(webapp2.RequestHandler): | |
""" Base class for search handlers specific to video provider (YouTube, DailyMotion, Vimeo...) | |
: param RELATED_PREFIX : Common prefix used for memcached keys identifying queries for videos related to a specific (video's) id. | |
: type RELATED_PREFIX : String | |
: param COUNTRY_PREFIX : Common prefix used for memcached keys for queries restricted to a single country/region code. | |
: type COUNTRY_PREFIX : String | |
: param MEMCACHED_KEY_MAX_LEN : Maximum acceptable length for a memcached key. | |
: type MEMCACHED_KEY_MAX_LEN : Integer | |
""" | |
RELATED_PREFIX = "_ID_" | |
COUNTRY_PREFIX = "_CY_" | |
ORDER_PREFIX = "_O_" | |
MEMCACHED_KEY_MAX_LEN = 255 | |
def get_from_memcached(self, key, memcached_client): | |
""" Attempt to retrieve a value from memcached | |
: param key : The memcached key to be retrieved. | |
: type key : String | |
: param memcached_client : the memcached client instance to use (so that the same instance can be used throughout a single class). | |
It's important that each request instantiate a different memcached client, otherwise the gets-cas pattern won't work. | |
: type memcached_client : Object | |
: return : the value retrieved from the cache, or None, if the key is not in cache. | |
""" | |
return memcached_client.gets(key) | |
def store_to_memcached(self, value, key, merge_values, memcached_client): | |
""" Attempt to store a value on memcached. If the key is longer than MEMCACHED_KEY_MAX_LEN characters, the value isn't stored. | |
If the value stored for key has been updated after value has been retrieved, the two values are merged | |
according to a function passed by the caller. | |
: param value : The value to be stored on memcached. | |
: param key : The memcached key to be retrieved. | |
: type key : String | |
: param merge_values : The function that will be used to merge values, in case of race conditions. | |
: type merge_values : Function | |
: param memcached_client : the memcached client instance to use (so that the same instance can be used throughout a single class) | |
: type memcached_client : Object | |
: return : True iff the key-value pair is successfully stored. | |
""" | |
#key might be too long: in that case, it would hashed, and we can't risk that 2 different searches hashes to the same value: | |
#it's better not to cash content in that case | |
if len(key) >= VideoSearchHandler.MEMCACHED_KEY_MAX_LEN: | |
return False | |
if memcached_client.cas(key, value): | |
#The value has been stored successfully | |
return True | |
else: | |
#Try to retrieve the value stored for the given key | |
tmp_value = memcached_client.gets(key) | |
if tmp_value is None: | |
#If the key is not stored in memcached, cas will always return false | |
return memcached_client.set(key, value) | |
else: | |
#There has been a race condition | |
while True: | |
#if cas returned false and the key is already set, then there must be a newer version stored on memcached | |
#how to merge the two versions, is left to the caller | |
value = merge_values(value, tmp_value) | |
if memcached_client.cas(key, value): | |
#The value has been stored successfully | |
return True | |
else: | |
#retrieve the lastest valued stored for the given key | |
value = tmp_value | |
tmp_value = memcached_client.gets(key) | |
def extract_positive_int(self, param_name, default_value): | |
""" Retrieve a parameter from the request, and check that the value retrieved | |
is a positive integer. | |
: param param_name : The name of the parameter to retrieve. | |
: type param_name : String | |
: param default_value : The default value that will be returned if the parameter is not | |
present in the request, or if its value is not valid. | |
: type default_value : Integer (positive) | |
: return : The validated value, or the default value, if validation fails. | |
""" | |
try: | |
#Try to get the value from the request (if it isn't set, the default value will be stored in v) | |
v = int(self.request.get(param_name, default_value=default_value)) | |
if v <= 0: | |
return default_value | |
else: | |
return v | |
except TypeError: | |
return default_value | |
def extract_params(self): | |
""" Retrieve the parameters we need from the request, and return a dictionary with them. | |
: return : A dictionary containing name-value pairs for all the parameters needed to query the video providers. | |
""" | |
return { | |
'first_result': self.extract_positive_int('first_result', 1), | |
'max_results': self.extract_positive_int('max_results', None), | |
'order': self.validate_order(self.request.get('order', default_value=None)) | |
} | |
def return_error(self, error_code, error_message=""): | |
""" Respond to the HTTP request with an error code, and a mesage. | |
: param error_code : The HTTP error code to be returned (should be a 4XY code). | |
: type error_code : Integer | |
: param error_message : The error message to display with the response. | |
: type error_message : String | |
""" | |
self.response.clear() | |
self.response.set_status(error_code, error_message) | |
self.response.out.write(error_message) | |
def return_results(self, results): | |
""" Respond to the HTTP request with a JSON response. | |
: param results : The results to be returned. | |
: type results : Dictionary | |
""" | |
self.response.headers['Content-Type'] = 'application/json' | |
self.response.out.write(json.dumps(results)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import webapp2 | |
from apiclient.errors import HttpError | |
from youtube import YouTubeSearchHandler | |
from dailymotion import DailyMotionSearchHandler | |
from vimeo import VimeoSearchHandler | |
from google.appengine.api import memcache | |
from google.appengine.api.urlfetch import fetch | |
import logging | |
class SimpleVideoSearchHandler(YouTubeSearchHandler): | |
def get(self, keywords): | |
self.search_videos(keywords) | |
class RelatedVideoSearchHandler(YouTubeSearchHandler): | |
def get(self, related_id): | |
self.search_videos(related_id=related_id) | |
class VideoSearchHandlerWithRegion(YouTubeSearchHandler): | |
def get(self, keywords, regionCode): | |
self.search_videos(keywords, regionCode=regionCode.upper()) | |
class SimpleYouTubeSearchHandler(YouTubeSearchHandler): | |
def get(self, keywords): | |
self.search_videos(keywords) | |
class RelatedYouTubeSearchHandler(YouTubeSearchHandler): | |
def get(self, related_id): | |
self.search_videos(related_id=related_id) | |
class YouTubeSearchHandlerWithRegion(YouTubeSearchHandler): | |
def get(self, keywords, regionCode): | |
self.search_videos(keywords, regionCode=regionCode.upper()) | |
class SimpleDailyMotionSearchHandler(DailyMotionSearchHandler): | |
def get(self, keywords): | |
self.search_videos(keywords) | |
class RelatedDailyMotionSearchHandler(DailyMotionSearchHandler): | |
def get(self, related_id): | |
self.search_videos(related_id=related_id) | |
class DailyMotionSearchHandlerWithRegion(DailyMotionSearchHandler): | |
def get(self, keywords, regionCode): | |
self.search_videos(keywords, regionCode=regionCode.upper()) | |
class SimpleVimeoSearchHandler(VimeoSearchHandler): | |
def get(self, keywords): | |
self.search_videos(keywords) | |
class RelatedVimeoSearchHandler(VimeoSearchHandler): | |
def get(self, related_id): | |
self.search_videos(related_id=related_id) | |
class VimeoSearchHandlerWithRegion(VimeoSearchHandler): | |
def get(self, keywords, regionCode): | |
self.search_videos(keywords, regionCode=regionCode.upper()) | |
app = webapp2.WSGIApplication([ | |
('/videos/([^/]+)/?', SimpleVideoSearchHandler), | |
('/videos/([^/]+)/countries/([^/]+)/?', VideoSearchHandlerWithRegion), | |
('/videos/related/([^/]+)/?', RelatedVideoSearchHandler), | |
('/youtube/([^/]+)/?', SimpleYouTubeSearchHandler), | |
('/youtube/([^/]+)/countries/([^/]+)/?', YouTubeSearchHandlerWithRegion), | |
('/youtube/related/([^/]+)/?', RelatedYouTubeSearchHandler), | |
('/dailymotion/([^/]+)/?', SimpleDailyMotionSearchHandler), | |
('/dailymotion/([^/]+)/countries/([^/]+)/?', DailyMotionSearchHandlerWithRegion), | |
('/dailymotion/related/([^/]+)/?', RelatedDailyMotionSearchHandler), | |
('/vimeo/([^/]+)/?', SimpleVimeoSearchHandler), | |
('/vimeo/([^/]+)/countries/([^/]+)/?', VimeoSearchHandlerWithRegion), | |
('/vimeo/related/([^/]+)/?', RelatedVimeoSearchHandler) | |
], debug=True) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from video_search_handler import VideoSearchHandler | |
from google.appengine.api import urlfetch | |
from urllib import quote_plus, urlencode | |
from json import loads as json_loads | |
from google.appengine.api import memcache | |
from datetime import datetime | |
import base64 | |
import logging | |
VIMEO_DEFAULT_MAX_RESULTS = 5 | |
VIMEO_MAX_RESULTS_PER_PAGE = 50 | |
VIMEO_SOURCE_STR = 'v' | |
VIMEO_VIDEOS_URL = 'https://api.vimeo.com/videos?' | |
VIMEO_VIDEOS_RELATED_URL = 'https://api.vimeo.com/videos/%s/related?filter=related&' | |
VIMEO_CLIENT_ID = "REPLACE_ME" | |
VIMEO_CLIENT_SECRET = "REPLACE_ME" | |
VIMEO_OAUTH_HEADERS = { | |
'Accept': 'application/vnd.vimeo.*+json;version=3.0', | |
'Authorization': 'REPLACE_ME' | |
} | |
def merge_dicts(old_d, new_d): | |
for k in old_d: | |
if not k in new_d: | |
new_d[k] = old_d[k] | |
return new_d | |
def result_transform(search_result): | |
""" Filter results to retain only matching videos, and filter out channels and playlists. | |
: param search_result : The single search result to result_transform. | |
: type search_result : Dictionary | |
: return : A Dictionary with the relevant fields for this item. | |
""" | |
pictures = search_result.get('pictures', []) | |
if len(pictures) == 0: | |
thumbnail = "" | |
elif len(pictures) < 3: | |
thumbnail = pictures[0].get("link", "") | |
else: | |
thumbnail = pictures[-3].get("link", "") #the 3rd to last picture is a medium sized thumbnail, | |
#the last picture is usually the smallest | |
return { | |
'id': search_result.get('uri', "/ / ").split("/")[2], | |
'url': search_result.get('link', ""), | |
'title': search_result.get('description', ""), | |
'thumbnail': thumbnail, | |
'date': search_result['created_time'], | |
'source': VIMEO_SOURCE_STR | |
} | |
class VimeoSearchHandler(VideoSearchHandler): | |
""" This class handles queries on Vimeo. | |
: param __VALID_ORDER_CRITERIA : A list of the valid values for the order criterion. | |
: type __VALID_ORDER_CRITERIA : Array | |
: param __DEFAULT_SORTING_CRITERION : The default value for the order parameter. | |
: type __DEFAULT_SORTING_CRITERION : String | |
""" | |
#list all the possible valid values for the order | |
__VALID_ORDER_CRITERIA = set(['relevant', 'date', 'likes', 'plays', 'comments']) | |
__DEFAULT_SORTING_CRITERION = 'relevant' | |
def get_access_token(self, cid, secret, api_url='https://api.vimeo.com/oauth/authorize/client'): | |
""" | |
Generates a new access token given the authorization code generated by the page | |
at get_auth_url(). | |
In the context of a web server, the programmer should retrieve the auth_code | |
generated by the page at get_auth_url() and use it as the input to this function. | |
The programmer should then use the string returned from this function to | |
authenticate calls to the API library on behalf of the corresponding user. | |
Note: The following URLs must be identical: | |
This function's redirect parameter | |
The redirect parameter passed to get_access_token | |
One of the redirect URIs listed on the app setup page | |
Args: | |
api_url - The root url of the API being used for unauthenticated authorization | |
cid - The client ID for the current app | |
secret - The client secret for the current app | |
""" | |
encoded = base64.b64encode("%s:%s" % (cid, secret)) | |
payload = { | |
"grant_type": "client_credentials", | |
"scope": "public create" | |
} | |
headers = { | |
"Accept": "application/vnd.vimeo.*+json; version=3.0", | |
"Authorization": "basic %s" % encoded | |
} | |
response = urlfetch.fetch(api_url, | |
method="POST", | |
headers=headers, | |
payload=urlencode(payload), | |
) | |
if response.status_code != 200: | |
raise ValueError(response.status_code) | |
else: | |
return json_loads(response.content) #['access_token'] | |
def __memcache_key(self, q, related_id, country, order): | |
""" Construct the key to store the results for the current query on memcached. | |
: param q : The keywords for the query | |
: type q : String | |
: param related_id : For related queries, the id of the video whose related results needs to be retrieved. | |
: type related_id : String | None | |
: param country : For queries restricted to particular Countries, the international 2-letter County code. | |
: type country : String | None | |
: return : The key to use for memcached. | |
""" | |
return VIMEO_SOURCE_STR + '_' + q + (VideoSearchHandler.RELATED_PREFIX + related_id if related_id else "") + (VideoSearchHandler.COUNTRY_PREFIX + country if country else "") + (VideoSearchHandler.ORDER_PREFIX + order if order else "") | |
def validate_order(self, criterion): | |
""" Validate the criterion passed, by verifying it is among the ones acceptable by the API | |
: param criterion : The order criterion to validate. | |
: type criterion : String | |
: return : The value passed, if it is validated, or the default sorting criterion, otherwise. | |
""" | |
return criterion if criterion in VimeoSearchHandler.__VALID_ORDER_CRITERIA else VimeoSearchHandler.__DEFAULT_SORTING_CRITERION | |
def __search_dailymotion_videos(self, keywords, params, related_id, country): | |
""" Perform the search on Vimeo and return the list of videos retrieved. | |
: param keywords : The search terms for the query. | |
: type keywords : String | |
: param params : A list of the parameters in the request that are relevant for our search, like 'order', 'first_result', 'max_results'. | |
: type params : Dictionary | |
: param related_id : If the query is about retrieving results related to a single video, this parameter will contain the ID of that video. | |
: type related_id : String | |
: param regionCode : The international code (as a 2-letter string) for the Country from where the results should come from. | |
: type regionCode : String | |
""" | |
#Init a memcached client; It's important that each request instantiate a different memcached client, otherwise the gets-cas pattern won't work. | |
memcached_client = memcache.Client() | |
#try to retrieve data from the cache, if present | |
key = self.__memcache_key(keywords, related_id, country, params.get('order', None)) | |
cached = self.get_from_memcached(key, memcached_client) | |
if cached is None: | |
cached = {} #If the query wasn't present in cache, just init the object to an empty container. | |
#Computes the index of the first result to be returned (by default, 1) | |
first_result = params['first_result'] if 'first_result' in params else 1 | |
#Computes the index of the last result to be returned, based on the number of results requested by the user. | |
if params.get('max_results') is None: | |
last_result = first_result + VIMEO_DEFAULT_MAX_RESULTS - 1 | |
else: | |
last_result = first_result + params['max_results'] - 1 | |
#Computes the index of the first and last page of results to retrieve from Vimeo | |
first_page = int((first_result - 1) / VIMEO_MAX_RESULTS_PER_PAGE) #floor division (forced even on newer versions on Python or if "future" division is imported) | |
last_page = int((last_result - 1) / VIMEO_MAX_RESULTS_PER_PAGE) #floor division | |
#Prepare the mandatory parameters for the Vimeo API | |
search_params = { | |
'query': quote_plus(keywords), | |
#'fields': "id,url,title,thumbnail_url,created_time", | |
'per_page': VIMEO_MAX_RESULTS_PER_PAGE | |
} | |
#Handles the optional parameters | |
#if not country is None: | |
# search_params['country'] = country | |
if not params['order'] is None: | |
search_params['sort'] = params['order'] | |
if related_id is None: | |
api_URL = VIMEO_VIDEOS_URL | |
else: | |
api_URL = VIMEO_VIDEOS_RELATED_URL % str(related_id) #safe conversion | |
#Start the iteration to retrieve all the pages | |
page = first_page | |
results = [] | |
while page <= last_page: | |
if page in cached: | |
#This page of the results is already cached. | |
results += cached[page] | |
page += 1 | |
continue | |
#else: the results must be downloaded from Vimeo | |
search_params['page'] = page + 1 | |
#Build the URL for the API call | |
url = api_URL + '&'.join(["%s=%s" % (k, str(v)) for k,v in search_params.items()]) | |
#Fetches the data from the API call | |
search_response = urlfetch.fetch(url, method="GET", headers=VIMEO_OAUTH_HEADERS) | |
#If the request has been successfull | |
if search_response.status_code == 200: | |
search_response = json_loads(search_response.content) | |
search_results = map(result_transform, search_response.get("data", [])) | |
cached[page] = search_results | |
results += search_results | |
if not search_response.get('paging', {}).get("next", False) is None: | |
page += 1 | |
else: | |
#The responsefrom Vimeo states that this is the last page of results. | |
break | |
else: | |
#Couldn't retrieve the data: for consistency, the whole call is stopped. | |
self.return_error(search_response.status_code) | |
#Attempt to store what had been retrieved so far on memcached anyway. | |
self.store_to_memcached(cached, key, merge_dicts, memcached_client) | |
return None | |
#Attempt to store the result on memcached. | |
self.store_to_memcached(cached, key, merge_dicts, memcached_client) | |
#Adjust the indices to compensate for pages that we could avoid to download. | |
first_result -= first_page * VIMEO_MAX_RESULTS_PER_PAGE | |
last_result -= first_page * VIMEO_MAX_RESULTS_PER_PAGE | |
#Return only the subset of the retrieved results requested by the caller. | |
return results[first_result - 1:last_result] | |
def search_videos(self, keywords="", related_id=None, regionCode=None): | |
""" Main method: search the provider and returns the list of videos retrieved. | |
: param keywords : The search terms for the query. | |
: type keywords : String | |
: param related_id : If the query is about retrieving results related to a single video, this parameter will contain the ID of that video. | |
: type related_id : String | |
: param regionCode : The international code (as a 2-letter string) for the Country from where the results should come from. | |
: type regionCode : String | |
""" | |
auth_token = self.get_access_token(cid=VIMEO_CLIENT_ID, secret=VIMEO_CLIENT_SECRET) | |
VIMEO_OAUTH_HEADERS['Authorization'] = 'bearer %s' % auth_token['access_token'] | |
logging.warn(auth_token) | |
res = self.__search_dailymotion_videos(keywords, params=self.extract_params(), related_id=related_id, country=regionCode) | |
if not res is None: | |
self.return_results(res) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from video_search_handler import VideoSearchHandler | |
from apiclient.discovery import build | |
from apiclient.errors import HttpError | |
from google.appengine.api import memcache | |
import logging | |
DEVELOPER_KEY = "REPLACE_ME" | |
YOUTUBE_API_SERVICE_NAME = "youtube" | |
YOUTUBE_API_VERSION = "v3" | |
YOUTUBE_DEFAULT_MAX_RESULTS = 5 | |
YOUTUBE_MAX_RESULTS_PER_PAGE = 50 | |
YOUTUBE_VIDEO_URL_PREFIX = "https://www.youtube.com/watch?v=" | |
YOUTUBE_SOURCE_STR = 'y' | |
PAGE_TOKES_KEYS = 'youtube_page_tokens' | |
def merge_dicts(old_d, new_d): | |
for k in old_d: | |
if not k in new_d: | |
new_d[k] = old_d[k] | |
return new_d | |
class YouTubeSearchHandler(VideoSearchHandler): | |
""" This class handles queries on YouTube. | |
: param __VALID_ORDER_CRITERIA : A list of the valid values for the order criterion. | |
: type __VALID_ORDER_CRITERIA : Array | |
: param __DEFAULT_SORTING_CRITERION : The default value for the order parameter. | |
: type __DEFAULT_SORTING_CRITERION : String | |
""" | |
#list all the possible valid values for the order | |
__VALID_ORDER_CRITERIA = set(['date', 'rating', 'relevance', 'title', 'videoCount', 'viewCount']) | |
__DEFAULT_SORTING_CRITERION = 'relevance' | |
def __memcache_key(self, q, related_id, country): | |
""" Construct the key to store the results for the current query on memcached. | |
: param q : The keywords for the query | |
: type q : String | |
: param related_id : For related queries, the id of the video whose related results needs to be retrieved. | |
: type related_id : String | None | |
: param country : For queries restricted to particular Countries, the international 2-letter County code. | |
: type country : String | None | |
: return : The key to use for memcached. | |
""" | |
return YOUTUBE_SOURCE_STR + '_' + q + (VideoSearchHandler.RELATED_PREFIX + related_id if related_id else "") + (VideoSearchHandler.COUNTRY_PREFIX + country if country else "") | |
def validate_order(self, criterion): | |
""" Validate the criterion passed, by verifying it is among the ones acceptable by the API | |
: param criterion : The order criterion to validate. | |
: type criterion : String | |
: return : The value passed, if it is validated, or the default sorting criterion, otherwise. | |
""" | |
return criterion if criterion in YouTubeSearchHandler.__VALID_ORDER_CRITERIA else YouTubeSearchHandler.__DEFAULT_SORTING_CRITERION | |
def __search_youtube_videos(self, keywords, params, related_id, regionCode): | |
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY) | |
memcached_client = memcache.Client() | |
# Filter results to retaun only matching videos, and filter out channels and playlists. | |
result_transform = lambda search_result: { | |
'id': search_result['id']['videoId'], | |
'url': YOUTUBE_VIDEO_URL_PREFIX + search_result['id']['videoId'], | |
'title': search_result['snippet']['title'], | |
'thumbnail': search_result['snippet']['thumbnails']['default']['url'], | |
'date': search_result['snippet']['publishedAt'], | |
'source': YOUTUBE_SOURCE_STR | |
} | |
#try to retrieve data from the cache, if present | |
key = self.__memcache_key(keywords, related_id, regionCode, params.get('order', None)) | |
cached = self.get_from_memcached(key, memcached_client) | |
if cached is None: | |
cached = {} | |
first_result = params['first_result'] if 'first_result' in params else 1 | |
if params['max_results'] is None: | |
params['max_results'] = YOUTUBE_DEFAULT_MAX_RESULTS | |
last_result = first_result + params['max_results'] - 1 | |
#else: | |
first_page = int((first_result - 1) / YOUTUBE_MAX_RESULTS_PER_PAGE) #floor division | |
last_page = int((last_result - 1) / YOUTUBE_MAX_RESULTS_PER_PAGE) #floor division | |
page_tokens = self.get_from_memcached(PAGE_TOKES_KEYS, memcached_client) | |
if page_tokens is None: | |
page_tokens = { | |
0: None #placeHolder so that max(page_tokens) won't trigger a ValueError | |
} | |
#prepare the parameters for the list method | |
search_params = { | |
'q':keywords, | |
'part':"id,snippet", | |
'type':'video', | |
'maxResults': YOUTUBE_MAX_RESULTS_PER_PAGE | |
} | |
if not related_id is None: | |
search_params['relatedToVideoId'] = related_id | |
if not regionCode is None: | |
search_params['regionCode'] = regionCode | |
if not params['order'] is None: | |
search_params['order'] = params['order'] | |
page = min(max(page_tokens), first_page) | |
#search_params['maxResults'] = YOUTUBE_MAX_RESULTS_PER_PAGE | |
results = [] | |
while page <= last_page: | |
if page in cached: | |
#this page of the results is already cached | |
results += cached[page] | |
page += 1 | |
continue | |
if page > 0: | |
#which page should be retrieved? | |
#Invariant: page in page_tokens | |
search_params['pageToken'] = page_tokens[page] | |
# Call the search.list method to retrieve results matching the keywords. | |
try: | |
search_response = youtube.search().list( | |
**search_params #unpack the dictionary to a list of named parameters | |
).execute() | |
except HttpError as e: | |
logging.warning("YOUTUBE API ERROR " + str(e)) | |
self.return_error(400, e) | |
return None | |
search_results = map(result_transform, search_response.get("items", [])) | |
cached[page] = search_results | |
results += search_results | |
page += 1 | |
if 'nextPageToken' in page_tokens: | |
page_tokens[page] = search_response['nextPageToken'] | |
else: | |
#there is not a next page to download | |
break | |
self.store_to_memcached(cached, key, merge_dicts, memcached_client) | |
self.store_to_memcached(page_tokens, PAGE_TOKES_KEYS, merge_dicts, memcached_client) | |
first_result -= first_page * YOUTUBE_MAX_RESULTS_PER_PAGE | |
last_result -= first_page * YOUTUBE_MAX_RESULTS_PER_PAGE | |
return results[first_result - 1:last_result] | |
def search_videos(self, keywords="", related_id=None, regionCode=None): | |
""" Main method: search the provider and returns the list of videos retrieved. | |
: param keywords : The search terms for the query. | |
: type keywords : String | |
: param related_id : If the query is about retrieving results related to a single video, this parameter will contain the ID of that video. | |
: type related_id : String | |
: param regionCode : The international code (as a 2-letter string) for the Country from where the results should come from. | |
: type regionCode : String | |
""" | |
res = self.__search_youtube_videos(keywords, params=self.extract_params(), related_id=related_id, regionCode=regionCode) | |
if not res is None: | |
self.return_results(res) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment