Instantly share code, notes, and snippets.

What would you like to do?
import requests
from bs4 import BeautifulSoup
import time
USER_AGENT = {'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36'}
def fetch_results(search_term, number_results, language_code):
assert isinstance(search_term, str), 'Search term must be a string'
assert isinstance(number_results, int), 'Number of results must be an integer'
escaped_search_term = search_term.replace(' ', '+')
google_url = '{}&num={}&hl={}'.format(escaped_search_term, number_results, language_code)
response = requests.get(google_url, headers=USER_AGENT)
return search_term, response.text
def parse_results(html, keyword):
soup = BeautifulSoup(html, 'html.parser')
found_results = []
rank = 1
result_block = soup.find_all('div', attrs={'class': 'g'})
for result in result_block:
link = result.find('a', href=True)
title = result.find('h3', attrs={'class': 'r'})
description = result.find('span', attrs={'class': 'st'})
if link and title:
link = link['href']
title = title.get_text()
if description:
description = description.get_text()
if link != '#':
found_results.append({'keyword': keyword, 'rank': rank, 'title': title, 'description': description, 'link': link})
rank += 1
return found_results
def scrape_google(search_term, number_results, language_code):
keyword, html = fetch_results(search_term, number_results, language_code)
results = parse_results(html, keyword)
return results
except AssertionError:
raise Exception("Incorrect arguments parsed to function")
except requests.HTTPError:
raise Exception("You appear to have been blocked by Google")
except requests.RequestException:
raise Exception("Appears to be an issue with your connection")
if __name__ == '__main__':
keywords = ['edmund martin', 'python', 'google scraping']
data = []
for keyword in keywords:
results = scrape_google(keyword, 100, "en")
for result in results:
except Exception as e:

This comment has been minimized.

meckin commented Nov 9, 2017

Have you seen an increase in blocking based on language requesting?


This comment has been minimized.

Lowell130 commented Jan 7, 2018

Hello how i can add these:
metatags = soup.find_all('meta',attrs={'name':'generator'})


This comment has been minimized.

cabbage-dealer commented Mar 2, 2018

When i try to use it for the keyword "Commbank" i get this error:
'NoneType' object has no attribute 'get_text'


This comment has been minimized.

rajrsingh commented Jun 8, 2018

On line 36 don't you want to include the link in the dictionary you append to found_results?


This comment has been minimized.


EdmundMartin commented Jun 11, 2018

@rajrsingh Thanks for pointing this out. I have made the changes.

@cabbage-dealer The code on the blog was updated to fix this error. This happens when a result doesn't have a standard description. I have also updated the code to avoid this issue.


This comment has been minimized.

duchonghoang commented Jun 21, 2018

I keep getting all elements printed as in reversed (link ,description, title, tank, keyword) instead of (keyword, rank, title, description, link), sometimes it's not on order. I Google this and it's because the elements are contained in a set, rather than a list.
Is there a solution to print out elements in specific order?


This comment has been minimized.

dripti commented Jul 4, 2018

How can i get results for different countries?


This comment has been minimized.

CodeOctal commented Oct 30, 2018

Hi when I write this code into spyder I get the result as [ ].
What does this means.
Can you please help me out Iam new into this domain

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment