Skip to content

Instantly share code, notes, and snippets.

@Isopach
Created December 15, 2021 01:58
Show Gist options
  • Save Isopach/a62d20af72ce1421a20e8f8c53ca1205 to your computer and use it in GitHub Desktop.
Save Isopach/a62d20af72ce1421a20e8f8c53ca1205 to your computer and use it in GitHub Desktop.
Search Google and get summary
# Adapted from googlesearch-python
def findTerm():
q = ['term1', 'term2', 'etc']
d = open("google_results.txt","w",encoding='utf-8')
for query in q:
res = search(query, num_results=1, lang="ja")
print(res[0])
d.write(res[0])
d.write("\n")
d.close()
def search(term, num_results=10, lang="en", proxy=None):
from bs4 import BeautifulSoup
from requests import get
usr_agent = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/61.0.3163.100 Safari/537.36'}
def fetch_results(search_term, number_results, language_code):
escaped_search_term = search_term.replace(' ', '+')
google_url = 'https://www.google.com/search?q={}&num={}&hl={}'.format(escaped_search_term, number_results+1,
language_code)
proxies = None
if proxy:
if proxy[:5]=="https":
proxies = {"https":proxy}
else:
proxies = {"http":proxy}
response = get(google_url, headers=usr_agent, proxies=proxies)
response.raise_for_status()
return response.text
def parse_results(raw_html):
soup = BeautifulSoup(raw_html, 'html.parser')
result_block = soup.find_all('div', attrs={'style': '-webkit-line-clamp:2'})
for result in result_block:
yield result.text
html = fetch_results(term, num_results, lang)
return list(parse_results(html))
if __name__ == "__main__":
findTerm()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment