Skip to content

Instantly share code, notes, and snippets.

@quad
Created May 2, 2012 12:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save quad/2576135 to your computer and use it in GitHub Desktop.
Save quad/2576135 to your computer and use it in GitHub Desktop.
Blonde, brunette, redhead...
#
# The hair colors of Australian fashion models
#
import json
import re
import urllib.request
API = 'http://en.wikipedia.org/w/api.php?%s'
def hair_color(revision):
match = re.search('haircolor\s*=\s*([^|]*)\|', revision)
if match:
return match.group(1).strip()
else:
return ''
def aussie_female_models():
query = urllib.parse.urlencode({
'action': 'query',
'prop': 'revisions',
'format': 'json',
'rvprop': 'content',
'rvsection': '0',
'generator': 'categorymembers',
'gcmtitle': 'Category:Australian female models',
'gcmnamespace': '0',
'gcmlimit': '500'})
response = urllib.request.urlopen(API % query)
str_response = response.readall().decode('utf-8')
data = json.loads(str_response)
return [(info['title'], hair_color(info['revisions'][0]['*']))
for pageid, info in data['query']['pages'].items()]
def main():
for name, color in aussie_female_models():
print("%s\t%s" % (name, color))
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment