Skip to content

Instantly share code, notes, and snippets.

@codersquid
Created August 23, 2015 02:49
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save codersquid/e7b5237c397b0fcd6662 to your computer and use it in GitHub Desktop.
Save codersquid/e7b5237c397b0fcd6662 to your computer and use it in GitHub Desktop.
pyconza 2014 scratchpad
#!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals
from collections import defaultdict
from steve.util import (
#get_from_config,
get_project_config,
save_json_files,
#load_json_files,
)
import json
import internetarchive
def files_by_format(item):
d = defaultdict(list)
for f in item.iter_files():
d[f.format].append(f)
return d
def get_format_url(files_lookup, fmt):
formats = files_lookup.get(fmt)
if formats:
return formats[0].url
return ''
def subject2tags(metadata):
subject = metadata.get('subject')
if not subject:
return []
return [t.strip() for t in subject.split(';')]
def creator2speakers(metadata):
if 'creator' in metadata:
return [metadata['creator']]
return []
def language2language(metadata):
# need to lookup 3 letter codes
return metadata['language']
def item2source_url(item):
return '{}//archive.org/details/{}'.format(item.protocol, item.identifier)
def item2video(item, category, language):
video = {}
if not item.exists:
return {}
md = item.metadata
video['category'] = category
video['state'] = 2
video['title'] = md['title']
video['description'] = md.get('description', '')
video['summary'] = md.get('description', '')
video['tags'] = subject2tags(md)
video['speakers'] = creator2speakers(md)
video['language'] = language
video['copyright_text'] = md.get('licenseurl', '')
video['recorded'] = md.get('date', '')
video['whiteboard'] = 'ia scrape'
video['source_url'] = item2source_url(item)
file_lookup = files_by_format(item)
video['thumbnail_url'] = get_format_url(file_lookup, 'Thumbnail')
video['video_ogv_url'] = get_format_url(file_lookup, 'Ogg Video')
video['video_ogv_download_only'] = False
video['video_mp4_url'] = get_format_url(file_lookup, 'MPEG4')
video['video_mp4_download_only'] = False
video['video_webm_download_only'] = False
video['video_webm_url'] = ''
video['video_flv_download_only'] = False
video['video_flv_url'] = ''
return video
if __name__ == "__main__":
cfg = get_project_config()
videos = []
search = internetarchive.search_items('subject:pyconza2014')
identifiers = [result['identifier'] for result in search]
for identifier in identifiers:
item = internetarchive.Item(identifier)
video = item2video(item, 'PyCon ZA 2014', 'English')
if item.exists:
videos.append(('json/{}.json'.format(identifier), video))
save_json_files(cfg, videos)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment