Skip to content

Instantly share code, notes, and snippets.

@mlarocca mlarocca/app.yaml Secret
Last active Aug 29, 2015

Embed
What would you like to do?
application: pweb-14
version: 1
runtime: python27
api_version: 1
threadsafe: yes
handlers:
- url: /favicon\.ico
static_files: favicon.ico
upload: favicon\.ico
- url: /
static_files: static/index.html
upload: /
- url: /videos.*
script: videos.app
- url: /youtube.*
script: videos.app
- url: /dailymotion.*
script: videos.app
libraries:
- name: webapp2
version: "2.5.2"
from video_search_handler import VideoSearchHandler
from google.appengine.api import urlfetch
from urllib import quote_plus
from json import loads as json_loads
from google.appengine.api import memcache
from datetime import datetime
import logging
DAILYMOTION_DEFAULT_MAX_RESULTS = 10
DAILYMOTION_MAX_RESULTS_PER_PAGE = 50
DAILYMOTION_SOURCE_STR = 'd'
DAILYMOTION_VIDEOS_URL = 'https://api.dailymotion.com/videos?'
DAILYMOTION_VIDEOS_RELATED_URL = 'https://api.dailymotion.com/video/%s/related?'
def merge_dicts(old_d, new_d):
for k in old_d:
if not k in new_d:
new_d[k] = old_d[k]
return new_d
def result_transform(search_result):
""" Filter results to retain only matching videos, and filter out channels and playlists.
: param search_result : The single search result to result_transform.
: type search_result : Dictionary
: return : A Dictionary with the relevant fields for this item.
"""
return {
'id': search_result.get('id', ""),
'url': search_result.get('url', ""),
'title': search_result.get('title', ""),
'thumbnail': search_result.get('thumbnail_url', ""),
'date': str(datetime.fromtimestamp(search_result['created_time'])) if 'created_time' in search_result else '',
'source': DAILYMOTION_SOURCE_STR
}
class DailyMotionSearchHandler(VideoSearchHandler):
""" This class handles queries on DailyMotion.
: param __VALID_ORDER_CRITERIA : A list of the valid values for the order criterion.
: type __VALID_ORDER_CRITERIA : Array
: param __DEFAULT_SORTING_CRITERION : The default value for the order parameter.
: type __DEFAULT_SORTING_CRITERION : String
"""
#list all the possible valid values for the order
__VALID_ORDER_CRITERIA = set(['relevance', 'recent', 'rated', 'visited', 'title', 'random', 'ranking'])
__DEFAULT_SORTING_CRITERION = 'relevance'
def __memcache_key(self, q, related_id, country):
""" Construct the key to store the results for the current query on memcached.
: param q : The keywords for the query
: type q : String
: param related_id : For related queries, the id of the video whose related results needs to be retrieved.
: type related_id : String | None
: param country : For queries restricted to particular Countries, the international 2-letter County code.
: type country : String | None
: return : The key to use for memcached.
"""
return DAILYMOTION_SOURCE_STR + '_' + q + (VideoSearchHandler.RELATED_PREFIX + related_id if related_id else "") + (VideoSearchHandler.COUNTRY_PREFIX + country if country else "")
def validate_order(self, criterion):
""" Validate the criterion passed, by verifying it is among the ones acceptable by the API
: param criterion : The order criterion to validate.
: type criterion : String
: return : The value passed, if it is validated, or the default sorting criterion, otherwise.
"""
return criterion if criterion in DailyMotionSearchHandler.__VALID_ORDER_CRITERIA else DailyMotionSearchHandler.__DEFAULT_SORTING_CRITERION
def __search_dailymotion_videos(self, keywords, params, related_id, country):
""" Perform the search on DailyMotion and return the list of videos retrieved.
: param keywords : The search terms for the query.
: type keywords : String
: param params : A list of the parameters in the request that are relevant for our search, like 'order', 'first_result', 'max_results'.
: type params : Dictionary
: param related_id : If the query is about retrieving results related to a single video, this parameter will contain the ID of that video.
: type related_id : String
: param regionCode : The international code (as a 2-letter string) for the Country from where the results should come from.
: type regionCode : String
"""
#Init a memcached client; It's important that each request instantiate a different memcached client, otherwise the gets-cas pattern won't work.
memcached_client = memcache.Client()
#try to retrieve data from the cache, if present
key = self.__memcache_key(keywords, related_id, country)
cached = self.get_from_memcached(key, memcached_client)
if cached is None:
cached = {} #If the query wasn't present in cache, just init the object to an empty container.
#Computes the index of the first result to be returned (by default, 1)
first_result = params['first_result'] if 'first_result' in params else 1
#Computes the index of the last result to be returned, based on the number of results requested by the user.
if params.get('max_results') is None:
last_result = first_result + DAILYMOTION_DEFAULT_MAX_RESULTS - 1
else:
last_result = first_result + params['max_results'] - 1
#Computes the index of the first and last page of results to retrieve from DailyMotion
first_page = int((first_result - 1) / DAILYMOTION_MAX_RESULTS_PER_PAGE) #floor division (forced even on newer versions on Python or if "future" division is imported)
last_page = int((last_result - 1) / DAILYMOTION_MAX_RESULTS_PER_PAGE) #floor division
#Prepare the mandatory parameters for the DailyMotion API
search_params = {
'search': quote_plus(keywords),
'fields': "id,url,title,thumbnail_url,created_time",
'limit': DAILYMOTION_MAX_RESULTS_PER_PAGE
}
#Handles the optional parameters
if not country is None:
search_params['country'] = country
if not params['order'] is None:
search_params['sort'] = params['order']
#DailyMotion requires different API calls for related and normal searches
if not related_id is None:
dm_url = DAILYMOTION_VIDEOS_RELATED_URL % str(related_id) #safe conversion
del search_params['sort'] #'sort' param not allowed by dailymotion on related queries
del search_params['search'] #'search' not allowed by dailymotion on related queries
else:
dm_url = DAILYMOTION_VIDEOS_URL
#Start the iteration to retrieve all the pages
page = first_page
results = []
while page <= last_page:
if page in cached:
#This page of the results is already cached.
results += cached[page]
page += 1
continue
#else: the results must be downloaded from DailyMotion
search_params['page'] = page + 1
#Build the URL for the API call
url = dm_url + '&'.join(["%s=%s" % (k, str(v)) for k,v in search_params.items()])
#Fetches the data from the API call
search_response = urlfetch.fetch(url)
#If the request has been successfull
if search_response.status_code == 200:
search_response = json_loads(search_response.content)
search_results = map(result_transform, search_response.get("list", []))
#logging.info("search_response " + str(search_response))
cached[page] = search_results
results += search_results
if search_response.get('has_more', False):
page += 1
else:
#The responsefrom DailyMotion states that this is the last page of results.
break
else:
#Couldn't retrieve the data: for consistency, the whole call is stopped.
self.return_error(search_response.status_code)
#Attempt to store what had been retrieved so far on memcached anyway.
self.store_to_memcached(cached, key, merge_dicts, memcached_client)
return None
#Attempt to store the result on memcached.
self.store_to_memcached(cached, key, merge_dicts, memcached_client)
#Adjust the indices to compensate for pages that we could avoid to download.
first_result -= first_page * DAILYMOTION_MAX_RESULTS_PER_PAGE
last_result -= first_page * DAILYMOTION_MAX_RESULTS_PER_PAGE
#Return only the subset of the retrieved results requested by the caller.
return results[first_result - 1:last_result]
def search_videos(self, keywords="", related_id=None, regionCode=None):
""" Main method: search the provider and returns the list of videos retrieved.
: param keywords : The search terms for the query.
: type keywords : String
: param related_id : If the query is about retrieving results related to a single video, this parameter will contain the ID of that video.
: type related_id : String
: param regionCode : The international code (as a 2-letter string) for the Country from where the results should come from.
: type regionCode : String
"""
res = self.__search_dailymotion_videos(keywords, params=self.extract_params(), related_id=related_id, country=regionCode)
if not res is None:
self.return_results(res)
import webapp2
import json
class VideoSearchHandler(webapp2.RequestHandler):
""" Base class for search handlers specific to video provider (YouTube, DailyMotion, Vimeo...)
: param RELATED_PREFIX : Common prefix used for memcached keys identifying queries for videos related to a specific (video's) id.
: type RELATED_PREFIX : String
: param COUNTRY_PREFIX : Common prefix used for memcached keys for queries restricted to a single country/region code.
: type COUNTRY_PREFIX : String
: param MEMCACHED_KEY_MAX_LEN : Maximum acceptable length for a memcached key.
: type MEMCACHED_KEY_MAX_LEN : Integer
"""
RELATED_PREFIX = "_ID_"
COUNTRY_PREFIX = "_CY_"
MEMCACHED_KEY_MAX_LEN = 255
def get_from_memcached(self, key, memcached_client):
""" Attempt to retrieve a value from memcached
: param key : The memcached key to be retrieved.
: type key : String
: param memcached_client : the memcached client instance to use (so that the same instance can be used throughout a single class).
It's important that each request instantiate a different memcached client, otherwise the gets-cas pattern won't work.
: type memcached_client : Object
: return : the value retrieved from the cache, or None, if the key is not in cache.
"""
return memcached_client.gets(key)
def store_to_memcached(self, value, key, merge_values, memcached_client):
""" Attempt to store a value on memcached. If the key is longer than MEMCACHED_KEY_MAX_LEN characters, the value isn't stored.
If the value stored for key has been updated after value has been retrieved, the two values are merged
according to a function passed by the caller.
: param value : The value to be stored on memcached.
: param key : The memcached key to be retrieved.
: type key : String
: param merge_values : The function that will be used to merge values, in case of race conditions.
: type merge_values : Function
: param memcached_client : the memcached client instance to use (so that the same instance can be used throughout a single class)
: type memcached_client : Object
: return : True iff the key-value pair is successfully stored.
"""
#key might be too long: in that case, it would hashed, and we can't risk that 2 different searches hashes to the same value:
#it's better not to cash content in that case
if len(key) >= VideoSearchHandler.MEMCACHED_KEY_MAX_LEN:
return False
if memcached_client.cas(key, value):
#The value has been stored successfully
return True
else:
#Try to retrieve the value stored for the given key
tmp_value = memcached_client.gets(key)
if tmp_value is None:
#If the key is not stored in memcached, cas will always return false
return memcached_client.set(key, value)
else:
#There has been a race condition
while True:
#if cas returned false and the key is already set, then there must be a newer version stored on memcached
#how to merge the two versions, is left to the caller
value = merge_values(value, tmp_value)
if memcached_client.cas(key, value):
#The value has been stored successfully
return True
else:
#retrieve the lastest valued stored for the given key
value = tmp_value
tmp_value = memcached_client.gets(key)
def extract_positive_int(self, param_name, default_value):
""" Retrieve a parameter from the request, and check that the value retrieved
is a positive integer.
: param param_name : The name of the parameter to retrieve.
: type param_name : String
: param default_value : The default value that will be returned if the parameter is not
present in the request, or if its value is not valid.
: type default_value : Integer (positive)
: return : The validated value, or the default value, if validation fails.
"""
try:
#Try to get the value from the request (if it isn't set, the default value will be stored in v)
v = int(self.request.get(param_name, default_value=default_value))
if v <= 0:
return default_value
else:
return v
except TypeError:
return default_value
def extract_params(self):
""" Retrieve the parameters we need from the request, and return a dictionary with them.
: return : A dictionary containing name-value pairs for all the parameters needed to query the video providers.
"""
return {
'first_result': self.extract_positive_int('first_result', 1),
'max_results': self.extract_positive_int('max_results', None),
'order': self.validate_order(self.request.get('order', default_value=None))
}
def return_error(self, error_code, error_message=""):
""" Respond to the HTTP request with an error code, and a mesage.
: param error_code : The HTTP error code to be returned (should be a 4XY code).
: type error_code : Integer
: param error_message : The error message to display with the response.
: type error_message : String
"""
self.response.clear()
self.response.set_status(error_code, error_message)
self.response.out.write(error_message)
def return_results(self, results):
""" Respond to the HTTP request with a JSON response.
: param results : The results to be returned.
: type results : Dictionary
"""
self.response.headers['Content-Type'] = 'application/json'
self.response.out.write(json.dumps(results))
#!/usr/bin/env python
#
# Copyright 2007 Google Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
import webapp2
from apiclient.errors import HttpError
from youtube import YouTubeSearchHandler
from dailymotion import DailyMotionSearchHandler
from google.appengine.api import memcache
class SimpleVideoSearchHandler(YouTubeSearchHandler):
def get(self, keywords):
self.search_videos(keywords)
class RelatedVideoSearchHandler(YouTubeSearchHandler):
def get(self, related_id):
self.search_videos(related_id=related_id)
class VideoSearchHandlerWithRegion(YouTubeSearchHandler):
def get(self, keywords, regionCode):
self.search_videos(keywords, regionCode=regionCode.upper())
class SimpleYouTubeSearchHandler(YouTubeSearchHandler):
def get(self, keywords):
self.search_videos(keywords)
class RelatedYouTubeSearchHandler(YouTubeSearchHandler):
def get(self, related_id):
self.search_videos(related_id=related_id)
class YouTubeSearchHandlerWithRegion(YouTubeSearchHandler):
def get(self, keywords, regionCode):
self.search_videos(keywords, regionCode=regionCode.upper())
class SimpleDailyMotionSearchHandler(DailyMotionSearchHandler):
def get(self, keywords):
self.search_videos(keywords)
class RelatedDailyMotionSearchHandler(DailyMotionSearchHandler):
def get(self, related_id):
self.search_videos(related_id=related_id)
class DailyMotionSearchHandlerWithRegion(DailyMotionSearchHandler):
def get(self, keywords, regionCode):
self.search_videos(keywords, regionCode=regionCode.upper())
app = webapp2.WSGIApplication([
('/videos/([^/]+)/?', SimpleVideoSearchHandler),
('/videos/([^/]+)/countries/([^/]+)/?', VideoSearchHandlerWithRegion),
('/videos/related/([^/]+)/?', RelatedVideoSearchHandler),
('/youtube/([^/]+)/?', SimpleYouTubeSearchHandler),
('/youtube/([^/]+)/countries/([^/]+)/?', YouTubeSearchHandlerWithRegion),
('/youtube/related/([^/]+)/?', RelatedYouTubeSearchHandler),
('/dailymotion/([^/]+)/?', SimpleDailyMotionSearchHandler),
('/dailymotion/([^/]+)/countries/([^/]+)/?', DailyMotionSearchHandlerWithRegion),
('/dailymotion/related/([^/]+)/?', RelatedDailyMotionSearchHandler)
], debug=True)
from video_search_handler import VideoSearchHandler
from apiclient.discovery import build
from apiclient.errors import HttpError
from google.appengine.api import memcache
import logging
DEVELOPER_KEY = "REPLACE_ME"
YOUTUBE_API_SERVICE_NAME = "youtube"
YOUTUBE_API_VERSION = "v3"
YOUTUBE_DEFAULT_MAX_RESULTS = 5
YOUTUBE_MAX_RESULTS_PER_PAGE = 50
YOUTUBE_VIDEO_URL_PREFIX = "https://www.youtube.com/watch?v="
YOUTUBE_SOURCE_STR = 'y'
PAGE_TOKES_KEYS = 'youtube_page_tokens'
def merge_dicts(old_d, new_d):
for k in old_d:
if not k in new_d:
new_d[k] = old_d[k]
return new_d
class YouTubeSearchHandler(VideoSearchHandler):
""" This class handles queries on YouTube.
: param __VALID_ORDER_CRITERIA : A list of the valid values for the order criterion.
: type __VALID_ORDER_CRITERIA : Array
: param __DEFAULT_SORTING_CRITERION : The default value for the order parameter.
: type __DEFAULT_SORTING_CRITERION : String
"""
#list all the possible valid values for the order
__VALID_ORDER_CRITERIA = set(['date', 'rating', 'relevance', 'title', 'videoCount', 'viewCount'])
__DEFAULT_SORTING_CRITERION = 'relevance'
def __memcache_key(self, q, related_id, country):
""" Construct the key to store the results for the current query on memcached.
: param q : The keywords for the query
: type q : String
: param related_id : For related queries, the id of the video whose related results needs to be retrieved.
: type related_id : String | None
: param country : For queries restricted to particular Countries, the international 2-letter County code.
: type country : String | None
: return : The key to use for memcached.
"""
return YOUTUBE_SOURCE_STR + '_' + q + (VideoSearchHandler.RELATED_PREFIX + related_id if related_id else "") + (VideoSearchHandler.COUNTRY_PREFIX + country if country else "")
def validate_order(self, criterion):
""" Validate the criterion passed, by verifying it is among the ones acceptable by the API
: param criterion : The order criterion to validate.
: type criterion : String
: return : The value passed, if it is validated, or the default sorting criterion, otherwise.
"""
return criterion if criterion in YouTubeSearchHandler.__VALID_ORDER_CRITERIA else YouTubeSearchHandler.__DEFAULT_SORTING_CRITERION
def __search_youtube_videos(self, keywords, params, related_id, regionCode):
youtube = build(YOUTUBE_API_SERVICE_NAME, YOUTUBE_API_VERSION, developerKey=DEVELOPER_KEY)
memcached_client = memcache.Client()
# Filter results to retaun only matching videos, and filter out channels and playlists.
result_transform = lambda search_result: {
'id': search_result['id']['videoId'],
'url': YOUTUBE_VIDEO_URL_PREFIX + search_result['id']['videoId'],
'title': search_result['snippet']['title'],
'thumbnail': search_result['snippet']['thumbnails']['default']['url'],
'date': search_result['snippet']['publishedAt'],
'source': YOUTUBE_SOURCE_STR
}
#try to retrieve data from the cache, if present
key = self.__memcache_key(keywords, related_id, regionCode)
cached = self.get_from_memcached(key, memcached_client)
if cached is None:
cached = {}
first_result = params['first_result'] if 'first_result' in params else 1
if params['max_results'] is None:
params['max_results'] = YOUTUBE_DEFAULT_MAX_RESULTS
last_result = first_result + params['max_results'] - 1
#else:
first_page = int((first_result - 1) / YOUTUBE_MAX_RESULTS_PER_PAGE) #floor division
last_page = int((last_result - 1) / YOUTUBE_MAX_RESULTS_PER_PAGE) #floor division
page_tokens = self.get_from_memcached(PAGE_TOKES_KEYS, memcached_client)
if page_tokens is None:
page_tokens = {
0: None #placeHolder so that max(page_tokens) won't trigger a ValueError
}
#prepare the parameters for the list method
search_params = {
'q':keywords,
'part':"id,snippet",
'type':'video',
'maxResults': YOUTUBE_MAX_RESULTS_PER_PAGE
}
if not related_id is None:
search_params['relatedToVideoId'] = related_id
if not regionCode is None:
search_params['regionCode'] = regionCode
if not params['order'] is None:
search_params['order'] = params['order']
page = min(max(page_tokens), first_page)
#search_params['maxResults'] = YOUTUBE_MAX_RESULTS_PER_PAGE
results = []
while page <= last_page:
if page in cached:
#this page of the results is already cached
results += cached[page]
page += 1
continue
if page > 0:
#which page should be retrieved?
#Invariant: page in page_tokens
search_params['pageToken'] = page_tokens[page]
# Call the search.list method to retrieve results matching the keywords.
try:
search_response = youtube.search().list(
**search_params #unpack the dictionary to a list of named parameters
).execute()
except HttpError as e:
logging.warning("YOUTUBE API ERROR " + str(e))
self.return_error(400, e)
return None
search_results = map(result_transform, search_response.get("items", []))
cached[page] = search_results
results += search_results
page += 1
if 'nextPageToken' in page_tokens:
page_tokens[page] = search_response['nextPageToken']
else:
#there is not a next page to download
break
self.store_to_memcached(cached, key, merge_dicts, memcached_client)
self.store_to_memcached(page_tokens, PAGE_TOKES_KEYS, merge_dicts, memcached_client)
first_result -= first_page * YOUTUBE_MAX_RESULTS_PER_PAGE
last_result -= first_page * YOUTUBE_MAX_RESULTS_PER_PAGE
return results[first_result - 1:last_result]
def search_videos(self, keywords="", related_id=None, regionCode=None):
""" Main method: search the provider and returns the list of videos retrieved.
: param keywords : The search terms for the query.
: type keywords : String
: param related_id : If the query is about retrieving results related to a single video, this parameter will contain the ID of that video.
: type related_id : String
: param regionCode : The international code (as a 2-letter string) for the Country from where the results should come from.
: type regionCode : String
"""
res = self.__search_youtube_videos(keywords, params=self.extract_params(), related_id=related_id, regionCode=regionCode)
if not res is None:
self.return_results(res)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.