Skip to content

Instantly share code, notes, and snippets.

@dimitryzub
Created October 26, 2021 08:26
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dimitryzub/56d2bcf4f05e84fa7bdbcafe9020ebfd to your computer and use it in GitHub Desktop.
Save dimitryzub/56d2bcf4f05e84fa7bdbcafe9020ebfd to your computer and use it in GitHub Desktop.
Scrape Brave Search Organic Results using Python
from bs4 import BeautifulSoup
import requests, lxml, json
headers = {
'User-agent':
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.102 Safari/537.36 Edge/18.19582"
}
params = {'q': 'dune film', 'source': 'web'}
def get_organic_results():
html = requests.get('https://search.brave.com/search',
headers=headers,
params=params)
soup = BeautifulSoup(html.text, 'lxml')
data = []
for result in soup.select('.snippet.fdb'):
title = result.select_one('.snippet-title').text.strip()
title_img = result.select_one('.favicon')['src']
link = result.a['href']
displayed_link = result.select_one('.snippet-url').text.strip().replace('\n', '')
try:
# removes "X time ago" -> split by \n -> removes all whitespaces to the LEFT of the string
snippet = result.select_one('.snippet-content .snippet-description').text.strip().split('\n')[1].lstrip()
snippet_img = result.select_one('.snippet-content .thumb')['src']
except:
snippet = None
snippet_img = None
sitelinks = [
{
title: sitelink.text.strip(),
link: sitelink['href']
} for sitelink in result.select('.deep-results-buttons .deep-link')]
try:
rating = result.select_one('.ml-10').text.strip().split(' - ')[0]
votes = result.select_one('.ml-10').text.strip().split(' - ')[1]
except:
rating = None
votes = None
data.append({
'title': title,
'title_img': title_img,
'link': link,
'displayed_link': displayed_link,
'snippet': snippet,
'snippet_img': snippet_img,
'rating': rating,
'votes': votes,
'sitelinks': sitelinks
})
print(json.dumps(data, indent=2, ensure_ascii=False))
@dimitryzub
Copy link
Author

Example JSON output:

[
  {
    "title": "Dune (2021 film) - Wikipedia",
    "title_img": "https://imgr.search.brave.com/wc-7XNJZ_tfrnnF72ZK8SIc1HV0ejHNf2xu1qguiQQw/fit/32/32/ce/1/aHR0cDovL2Zhdmlj/b25zLnNlYXJjaC5i/cmF2ZS5jb20vaWNv/bnMvNjQwNGZhZWY0/ZTQ1YWUzYzQ3MDUw/MmMzMGY3NTQ0ZjNj/NDUwMDk5ZTI3MWRk/NWYyNTM4N2UwOTE0/NTI3ZDQzNy9lbi53/aWtpcGVkaWEub3Jn/Lw",
    "link": "https://en.wikipedia.org/wiki/Dune_(2021_film)",
    "displayed_link": "en.wikipedia.org› wiki  › Dune_(2021_film)",
    "snippet": "Dune (titled onscreen as Dune: Part One) is a 2021 American epic science fiction film directed by Denis Villeneuve with a screenplay by Jon Spaihts, Villeneuve, and Eric Roth. It is the first of a planned two-part adaptation of the 1965 novel of the same name by Frank Herbert, primarily covering ...",
    "snippet_img": "https://imgr.search.brave.com/IClP0pAcslDAfO9KZ_RAbCFo1Mt16hng2ec6U8GI4c0/fit/200/200/ce/1/aHR0cHM6Ly91cGxv/YWQud2lraW1lZGlh/Lm9yZy93aWtpcGVk/aWEvZW4vOC84ZS9E/dW5lXyUyODIwMjFf/ZmlsbSUyOS5qcGc",
    "rating": null,
    "votes": null,
    "sitelinks": [
      {
        "Dune (2021 film) - Wikipedia": "Premise",
        "https://en.wikipedia.org/wiki/Dune_(2021_film)": "https://en.wikipedia.org/wiki/Dune_(2021_film)#Premise"
      },
      {
        "Dune (2021 film) - Wikipedia": "Cast",
        "https://en.wikipedia.org/wiki/Dune_(2021_film)": "https://en.wikipedia.org/wiki/Dune_(2021_film)#Cast"
      },
      {
        "Dune (2021 film) - Wikipedia": "Production",
        "https://en.wikipedia.org/wiki/Dune_(2021_film)": "https://en.wikipedia.org/wiki/Dune_(2021_film)#Production"
      },
      {
        "Dune (2021 film) - Wikipedia": "Music",
        "https://en.wikipedia.org/wiki/Dune_(2021_film)": "https://en.wikipedia.org/wiki/Dune_(2021_film)#Music"
      },
      {
        "Dune (2021 film) - Wikipedia": "Marketing",
        "https://en.wikipedia.org/wiki/Dune_(2021_film)": "https://en.wikipedia.org/wiki/Dune_(2021_film)#Marketing"
      },
      {
        "Dune (2021 film) - Wikipedia": "Release",
        "https://en.wikipedia.org/wiki/Dune_(2021_film)": "https://en.wikipedia.org/wiki/Dune_(2021_film)#Release"
      },
      {
        "Dune (2021 film) - Wikipedia": "Reception",
        "https://en.wikipedia.org/wiki/Dune_(2021_film)": "https://en.wikipedia.org/wiki/Dune_(2021_film)#Reception"
      },
      {
        "Dune (2021 film) - Wikipedia": "Future",
        "https://en.wikipedia.org/wiki/Dune_(2021_film)": "https://en.wikipedia.org/wiki/Dune_(2021_film)#Future"
      }
    ]
  }
... other results
]

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment