Skip to content

Instantly share code, notes, and snippets.

@CraigTheKiwi
Created August 28, 2021 21:47
Show Gist options
  • Save CraigTheKiwi/6c7f47317eadc766607d991d972d7667 to your computer and use it in GitHub Desktop.
Save CraigTheKiwi/6c7f47317eadc766607d991d972d7667 to your computer and use it in GitHub Desktop.
Solution to Cassidoo Newsletter 23 Aug 2021
#!/bin/python3
import requests
from bs4 import BeautifulSoup
from urllib.request import urlopen
#number of results per page
num = "500"
# query - site:github.com and (/blob/master "navigator.sendBeacon") and (MIT|Apache|GPL|BSD)
url = "https://www.google.com/search?q=site%3Agithub.com+and+(%2Fblob%2Fmaster+%22navigator.sendBeacon%22)+and+(MIT%7CApache%7CGPL%7CBSD)&oq=site%3Agithub.com+and+(%2Fblob%2Fmaster+%22navigator.sendBeacon%22)+and+(MIT%7CApache%7CGPL%7CBSD)&aqs=chrome..69i57j69i58.464j0j9&sourceid=chrome&ie=UTF-8&num="+num
headers = requests.utils.default_headers()
headers.update({
'User-Agent': 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0',
})
# get page of google results
link = requests.get(url, headers)
# store the results
github = []
soup = BeautifulSoup(link.content, "lxml")
# sort out titles and links
githubLinks = soup.find_all("h3")
for githubLink in githubLinks:
prep_url = githubLink.parent["href"]
prep_url = prep_url.split("blob")[0]
prep_url = prep_url.split("=")[1]
# [ num of stars, title, url ]
result = [0, githubLink.find("div").text , prep_url]
add_item = True
for exists in github:
if prep_url in exists:
add_item = False
if add_item:
github.append(result)
# Query each github url and get results
for gh in github:
gh_page = requests.get(gh[2], headers)
gh_soup = BeautifulSoup(gh_page.content, "lxml")
stars = gh_soup.find(class_="js-social-count")
if stars:
stars = stars.text.strip()
# handle the "k" for 1000 problem
if "k" in stars:
stars = float(stars.split("k")[0])*1000
gh[0]= int(stars)
# order the results by star size
github = sorted(github, key=lambda x: x[0], reverse=True)
# show only top 5
for gh in github[:5]:
print(str(gh[0]) + " : " + gh[1] + " : " + gh[2])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment