Skip to content

Instantly share code, notes, and snippets.

@mikemccabe
Created September 13, 2012 07:29
Show Gist options
  • Save mikemccabe/3712588 to your computer and use it in GitHub Desktop.
Save mikemccabe/3712588 to your computer and use it in GitHub Desktop.
import json
import urllib
import collections
def get_iterable(x):
return (x,) if not isinstance(x, (tuple, list)) else x
# if isinstance(x, collections.Iterable):
# return x
# else:
# return (x,)
def iter_contains_prefix(iter, prefix):
for s in iter:
if s.startswith(prefix):
return True
return False
def get_url(url):
f = urllib.urlopen(url)
c = f.read()
f.close()
return c
def get_meta(item_iter):
i = 0
for item in item_iter:
i += 1
if i % 100 == 0:
print i
j = get_url("http://archive.org/metadata/" + item)
yield json.loads(j);
def item_iterator():
filename = 'items'
f = open("items")
for line in f:
yield line.strip()
ies = item_iterator()
ems = get_meta(ies)
def file_has_audio(file):
if (file.get('source') == 'original' and
file['format'] in ('VBR MP3', 'AIFF', '24bit Flac', 'Flac',
'Apple Lossless Audio', 'Advanced Audio Coding')):
return True;
for md in ems:
for file in md['files']:
if file_has_audio(file):
fexts = file.get('external-identifier', [])
found_accoustid = iter_contains_prefix(get_iterable(fexts),
'urn:acoustid')
if found_accoustid:
pass
# print (file['format'] + ' ' + md['metadata']['identifier'] +
# u"has file " + file['name'].encode('ascii', 'ignore') + u" HAS accoustid")
else:
pass
# print (file['format'] + ' ' + md['metadata']['identifier'] +
# u"has file " + file['name'].encode('ascii', 'ignore') + u"w/o accoustid")
if found_accoustid:
mdexts = md.get('external_identifier', [])
found_mb = iter_contains_prefix(get_iterable(mdexts),
'urn:mb_release')
if not found_mb:
print md['metadata']['identifier'] + u' has accoustid but no mb_release'
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment