Skip to content

Instantly share code, notes, and snippets.

@yeiichi
Last active July 31, 2020 08:05
Show Gist options
  • Save yeiichi/401115f869fcfc8b5102f2c8fa36eea1 to your computer and use it in GitHub Desktop.
Save yeiichi/401115f869fcfc8b5102f2c8fa36eea1 to your computer and use it in GitHub Desktop.
Download a Google Results page.
import os
from urllib.parse import quote_plus, urlunsplit
import requests
import re
PROJECT_ROOT_PATH = '.'
class GoogleResultsPage:
'''Query text, Results number per page -> search results response'''
def __init__(self, query, rslts_num):
self.__qry = query
self.__num = rslts_num
query_string = 'q='+quote_plus(self.__qry)+'&num='+str(self.__num)
search_string = urlunsplit(
('https', 'www.google.com', '/search', query_string, ''))
self.__sstr = search_string
def page_fetcher(self):
'''Fetch the result page and return as a text response'''
my_headers = {'user-agent':
'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_6)\
AppleWebKit/537.36 (KHTML, like Gecko)\
Chrome/84.0.4147.105 Safari/537.36'}
response = requests.get(self.__sstr,
headers=my_headers, timeout=(3.05, 27))
return response.text
################################
# Output to a file.
def html_to_file(html_text):
'''Text response content to a HTML file.'''
output_file_name = re.sub(r'[\/.:;*?"<>|  ]', r'_', query)+'.html'
output_file_path = os.path.join(PROJECT_ROOT_PATH, output_file_name)
with open(output_file_path, 'w') as f:
f.write(html_text)
print('Done! ', end='')
print('File path:', output_file_path)
if __name__ == '__main__':
query = input('Query? >> ')
rslts_num = input('Results per page (upto 100)? >> ')
html_text = GoogleResultsPage(query, rslts_num).page_fetcher()
html_to_file(html_text)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment