Skip to content

Instantly share code, notes, and snippets.

@dimitryzub
Last active May 13, 2021 10:46
Show Gist options
  • Save dimitryzub/688d4d6990d8a8f6e50764efb66e69c5 to your computer and use it in GitHub Desktop.
Save dimitryzub/688d4d6990d8a8f6e50764efb66e69c5 to your computer and use it in GitHub Desktop.
medium_scrape_google_shopping_ads
import requests, lxml, urllib.parse
from bs4 import BeautifulSoup
# Adding User-agent (default user-agent from requests library is 'python-requests')
# https://github.com/psf/requests/blob/589c4547338b592b1fb77c65663d8aa6fbb7e38b/requests/utils.py#L808-L814
headers = {
"User-Agent":
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3538.102 Safari/537.36 Edge/18.19582"
}
# Search query
params = {'q': 'сoffee buy'}
# Getting HTML response
html = requests.get(f'https://www.google.com/search?q=',
headers=headers,
params=params).text
# Getting HTML code from BeautifulSoup
soup = BeautifulSoup(html, 'lxml')
# Looking for container that has all necessary data findAll() or find_all()
for container in soup.findAll('div', class_='RnJeZd top pla-unit-title'):
# Scraping title
title = container.text
# Creating beginning of the link to join afterwards
startOfLink = 'https://www.googleadservices.com/pagead'
# Scraping end of the link to join afterwards
endOfLink = container.find('a')['href']
# Combining (joining) relative and absolute URL's (adding begining and end link)
ad_link = urllib.parse.urljoin(startOfLink, endOfLink)
# Printing each title and link on a new line
print(f'{title}\n{ad_link}\n')
# Output
'''
Jot Ultra Coffee Triple | Ultra Concentrated
https://www.googleadservices.com/aclk?sa=l&ai=DChcSEwiP0dmfvcbwAhX48OMHHYyRBuoYABABGgJ5bQ&sig=AOD64_0x-PlrWek-JFlDTSo7E9Z7YhUOjg&ctype=5&q=&ved=2ahUKEwjhr9GfvcbwAhXHQs0KHQCbCAUQww96BAgCED4&adurl=
MUD\WTR | A Healthier Coffee Alternative, 30 servings
https://www.googleadservices.com/aclk?sa=l&ai=DChcSEwiP0dmfvcbwAhX48OMHHYyRBuoYABAJGgJ5bQ&sig=AOD64_3gltZJ6kPrxic5o8yUO5cuJrHXnw&ctype=5&q=&ved=2ahUKEwjhr9GfvcbwAhXHQs0KHQCbCAUQww96BAgCEEg&adurl=
Jot Ultra Coffee Double | 2 bottles = 28 cups
https://www.googleadservices.com/aclk?sa=l&ai=DChcSEwiP0dmfvcbwAhX48OMHHYyRBuoYABAHGgJ5bQ&sig=AOD64_3hD0JWZSLr8NUgoTW5K0HMzdFvng&ctype=5&q=&ved=2ahUKEwjhr9GfvcbwAhXHQs0KHQCbCAUQww96BAgCEE4&adurl=
'''
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment