dimitryzub/bs4_scrape_google_ads.py

## bs4_scrape_google_ads.py
import requests, lxml, urllib.parse
from bs4 import BeautifulSoup

# Adding user-agent to fake real user visit
headers = {
    "User-Agent":
    "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3538.102 Safari/537.36 Edge/18.19582"
}

# Search query
params = {'q': 'coffee buy'}

# HTML response
html = requests.get(f'https://www.google.com/search?q=',
                    headers=headers,
                    params=params).text
# HTML code from BeautifulSoup
soup = BeautifulSoup(html, 'lxml')

# Looking for container that has needed data and iterating over it
for container in soup.findAll('span', class_='Zu0yb LWAWHf qzEoUe'):
  # Using .text since in 'span' there's no other text other than link
  ad_link = container.text
  # Printing links
  print(ad_link)

# Output
'''
https://www.coffeeam.com/
https://www.sfbaycoffee.com/
https://www.onyxcoffeelab.com/
https://www.enjoybettercoffee.com/
https://www.klatchroasting.com/
https://www.pachamamacoffee.com/
https://www.bulletproof.com/
'''
	import requests, lxml, urllib.parse
	from bs4 import BeautifulSoup

	# Adding user-agent to fake real user visit
	headers = {
	"User-Agent":
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/72.0.3538.102 Safari/537.36 Edge/18.19582"
	}

	# Search query
	params = {'q': 'coffee buy'}

	# HTML response
	html = requests.get(f'https://www.google.com/search?q=',
	headers=headers,
	params=params).text
	# HTML code from BeautifulSoup
	soup = BeautifulSoup(html, 'lxml')

	# Looking for container that has needed data and iterating over it
	for container in soup.findAll('span', class_='Zu0yb LWAWHf qzEoUe'):
	# Using .text since in 'span' there's no other text other than link
	ad_link = container.text
	# Printing links
	print(ad_link)

	# Output
	'''
	https://www.coffeeam.com/
	https://www.sfbaycoffee.com/
	https://www.onyxcoffeelab.com/
	https://www.enjoybettercoffee.com/
	https://www.klatchroasting.com/
	https://www.pachamamacoffee.com/
	https://www.bulletproof.com/
	'''