Skip to content

Instantly share code, notes, and snippets.

@rummelonp
Created February 29, 2012 07:12
Show Gist options
  • Save rummelonp/1938799 to your computer and use it in GitHub Desktop.
Save rummelonp/1938799 to your computer and use it in GitHub Desktop.
いかにしておっぱい画像をダウンロードするか〜2012 for Python
# -*- coding: utf-8 -*-
import os.path, re, hashlib, urllib, json
appid = ''
uri = 'http://api.bing.net/json.aspx'
dir = './data'
page_count = 0
download_count = 0
while True:
offset = page_count * 50
query = urllib.urlencode({
'Appid': appid,
'Version': '2.2',
'Market': 'ja-JP',
'Sources': 'Image',
'Image.Count': 50,
'Image.Offset': offset,
'Adult': 'off',
'Query': 'おっぱい'
})
res = urllib.urlopen(uri + '?' + query).read()
ref = json.loads(res)
try:
ref['SearchResponse']['Image']['Results']
except:
break
for entry in ref['SearchResponse']['Image']['Results']:
if re.compile('\.jpg$').match(entry['MediaUrl']):
continue
download_count += 1
filename = hashlib.sha224(entry['MediaUrl']).hexdigest() + '.jpg'
filepath = dir + '/' + filename
if os.path.exists(filepath):
continue
print str(download_count) + ": Download... " + entry['MediaUrl']
try:
res = urllib.urlopen(entry['MediaUrl'])
except:
continue
if re.compile('^image').match(res.info()['Content-Type']):
f = open(filepath, 'w')
f.write(res.read())
f.close()
page_count += 1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment