Skip to content

Instantly share code, notes, and snippets.

@mursts
Created September 15, 2015 15:34
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mursts/1709f01572365a98d057 to your computer and use it in GitHub Desktop.
Save mursts/1709f01572365a98d057 to your computer and use it in GitHub Desktop.
BING APIサーチを使って画像をダウンロードします
#!/usr/bin/env python
# coding: utf-8
import asyncio
import aiohttp
import requests
import json
import os
import click
from hashlib import md5
ENDPOINT = 'https://api.datamarket.azure.com/Bing/Search/v1/Image'
SAVE_PATH = os.path.join(os.environ['HOME'], 'image')
# 並列数
semaphore = asyncio.Semaphore(4)
CHUNK_SIZE = 1024
@asyncio.coroutine
def fetch_and_save(url):
name, ext = os.path.splitext(url)
m = md5()
m.update(url.encode('utf-8'))
file_name = os.path.join(SAVE_PATH, m.hexdigest() + ext)
r = yield from aiohttp.get(url)
if r.status != 200:
return
with open(file_name, 'wb') as f:
while True:
chunk = yield from r.content.read(CHUNK_SIZE)
if not chunk:
break
f.write(chunk)
@asyncio.coroutine
def get(url):
with (yield from semaphore):
yield from fetch_and_save(url)
CONTEXT_SETTINGS = dict(help_option_names=['-h', '--help'])
@click.command(context_settings=CONTEXT_SETTINGS)
@click.option('-a', '--adult', flag_value=True, default=False, help='Adult filter OFF')
@click.argument('word')
def main(adult, word):
if not 'BING_ACCESS_KEY' in os.environ:
print('BINGのアクセスキーを設定してください')
print(' ex) export BING_ACCESS_KEY="Your access key"')
return
access_key = os.environ['BING_ACCESS_KEY']
adult_filter = 'Moderate'
if adult:
adult_filter = 'Off'
params = {'Query': "'{}'".format(word),
'Market': "'ja-JP'",
'Adult': "'{}'".format(adult_filter),
'$format': 'json'}
r = requests.get(ENDPOINT, params=params, auth=('', access_key))
r.raise_for_status()
tasks = [get(x['MediaUrl']) for x in json.loads(r.text)['d']['results']]
try:
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
finally:
loop.close()
if __name__ == '__main__':
main()
aiohttp
requests
click
@mursts
Copy link
Author

mursts commented Sep 15, 2015

  • Python3.4
  • Python3.3 # pip install asyncio

実行方法

$ pip install -r requirements.txt
$ export BING_ACCESS_KEY="Your access key"'
$ ./image_downloader.py -h

BINGのアクセスキー

https://datamarket.azure.com/dataset/bing/search

ログインして、マイアカウントのプライマリ アカウント キーから取得する

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment