Last active
March 16, 2018 17:21
-
-
Save ljmccarthy/1329fec1a2f693f62af3301a58f75c0a to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import json | |
import os | |
import requests | |
import sys | |
import urllib.parse | |
def google_search(**params): | |
params = dict(params, source='python', output='json') | |
response = requests.get('https://serpapi.com/search', params) | |
if response.status_code != 200: | |
return [] | |
data = json.loads(response.text) | |
return [result['link'] for result in data['organic_results']] | |
def google_search_all(q): | |
urls = set() | |
offset = 0 | |
while True: | |
results = google_search(q=q, num=100, start=offset) | |
if not results: | |
return urls | |
urls.update(results) | |
offset += len(results) | |
def download(url, filename): | |
response = requests.get(url, stream=True) | |
try: | |
with open(filename, 'wb') as fout: | |
for chunk in response.iter_content(None): | |
fout.write(chunk) | |
except KeyboardInterrupt: | |
try: | |
os.remove(filename) | |
except Exception: | |
pass | |
raise | |
if __name__ == '__main__': | |
try: | |
urls = google_search_all('site:www.motherboards.org/files/manuals filetype:pdf') | |
for url in sorted(urls): | |
url_parts = url.rsplit('/', 1) | |
if len(url_parts) == 2: | |
filename = urllib.parse.unquote(url_parts[1]) | |
if not filename.endswith('.pdf'): | |
filename = filename + '.pdf' | |
if not os.path.isfile(filename): | |
print('Downloading {}...'.format(url)) | |
download(url, filename) | |
else: | |
print('Already downloaded {}'.format(url)) | |
except KeyboardInterrupt: | |
sys.exit(1) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment