Last active
May 13, 2021 10:46
-
-
Save dimitryzub/688d4d6990d8a8f6e50764efb66e69c5 to your computer and use it in GitHub Desktop.
medium_scrape_google_shopping_ads
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests, lxml, urllib.parse | |
from bs4 import BeautifulSoup | |
# Adding User-agent (default user-agent from requests library is 'python-requests') | |
# https://github.com/psf/requests/blob/589c4547338b592b1fb77c65663d8aa6fbb7e38b/requests/utils.py#L808-L814 | |
headers = { | |
"User-Agent": | |
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3538.102 Safari/537.36 Edge/18.19582" | |
} | |
# Search query | |
params = {'q': 'сoffee buy'} | |
# Getting HTML response | |
html = requests.get(f'https://www.google.com/search?q=', | |
headers=headers, | |
params=params).text | |
# Getting HTML code from BeautifulSoup | |
soup = BeautifulSoup(html, 'lxml') | |
# Looking for container that has all necessary data findAll() or find_all() | |
for container in soup.findAll('div', class_='RnJeZd top pla-unit-title'): | |
# Scraping title | |
title = container.text | |
# Creating beginning of the link to join afterwards | |
startOfLink = 'https://www.googleadservices.com/pagead' | |
# Scraping end of the link to join afterwards | |
endOfLink = container.find('a')['href'] | |
# Combining (joining) relative and absolute URL's (adding begining and end link) | |
ad_link = urllib.parse.urljoin(startOfLink, endOfLink) | |
# Printing each title and link on a new line | |
print(f'{title}\n{ad_link}\n') | |
# Output | |
''' | |
Jot Ultra Coffee Triple | Ultra Concentrated | |
https://www.googleadservices.com/aclk?sa=l&ai=DChcSEwiP0dmfvcbwAhX48OMHHYyRBuoYABABGgJ5bQ&sig=AOD64_0x-PlrWek-JFlDTSo7E9Z7YhUOjg&ctype=5&q=&ved=2ahUKEwjhr9GfvcbwAhXHQs0KHQCbCAUQww96BAgCED4&adurl= | |
MUD\WTR | A Healthier Coffee Alternative, 30 servings | |
https://www.googleadservices.com/aclk?sa=l&ai=DChcSEwiP0dmfvcbwAhX48OMHHYyRBuoYABAJGgJ5bQ&sig=AOD64_3gltZJ6kPrxic5o8yUO5cuJrHXnw&ctype=5&q=&ved=2ahUKEwjhr9GfvcbwAhXHQs0KHQCbCAUQww96BAgCEEg&adurl= | |
Jot Ultra Coffee Double | 2 bottles = 28 cups | |
https://www.googleadservices.com/aclk?sa=l&ai=DChcSEwiP0dmfvcbwAhX48OMHHYyRBuoYABAHGgJ5bQ&sig=AOD64_3hD0JWZSLr8NUgoTW5K0HMzdFvng&ctype=5&q=&ved=2ahUKEwjhr9GfvcbwAhXHQs0KHQCbCAUQww96BAgCEE4&adurl= | |
''' |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment