Skip to content

Instantly share code, notes, and snippets.

@ljmccarthy
Last active March 16, 2018 17:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ljmccarthy/1329fec1a2f693f62af3301a58f75c0a to your computer and use it in GitHub Desktop.
Save ljmccarthy/1329fec1a2f693f62af3301a58f75c0a to your computer and use it in GitHub Desktop.
import json
import os
import requests
import sys
import urllib.parse
def google_search(**params):
params = dict(params, source='python', output='json')
response = requests.get('https://serpapi.com/search', params)
if response.status_code != 200:
return []
data = json.loads(response.text)
return [result['link'] for result in data['organic_results']]
def google_search_all(q):
urls = set()
offset = 0
while True:
results = google_search(q=q, num=100, start=offset)
if not results:
return urls
urls.update(results)
offset += len(results)
def download(url, filename):
response = requests.get(url, stream=True)
try:
with open(filename, 'wb') as fout:
for chunk in response.iter_content(None):
fout.write(chunk)
except KeyboardInterrupt:
try:
os.remove(filename)
except Exception:
pass
raise
if __name__ == '__main__':
try:
urls = google_search_all('site:www.motherboards.org/files/manuals filetype:pdf')
for url in sorted(urls):
url_parts = url.rsplit('/', 1)
if len(url_parts) == 2:
filename = urllib.parse.unquote(url_parts[1])
if not filename.endswith('.pdf'):
filename = filename + '.pdf'
if not os.path.isfile(filename):
print('Downloading {}...'.format(url))
download(url, filename)
else:
print('Already downloaded {}'.format(url))
except KeyboardInterrupt:
sys.exit(1)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment