Skip to content

Instantly share code, notes, and snippets.

@genbtc
Created January 13, 2023 03:52
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save genbtc/57fdc34d1bae507ab3815edb2dab4883 to your computer and use it in GitHub Desktop.
Save genbtc/57fdc34d1bae507ab3815edb2dab4883 to your computer and use it in GitHub Desktop.
github-beautiful.py
#!/usr/bin/env python3
#Copyright 2023 - genr8eofl @IRC/gentoo
# github-beautiful.py - fetch GitHub repo "Description" (for many links at once)
# Makes 1 HTTPS connection per line, retreives the HTML page and parses it for <Title>
import requests
from requests import ReadTimeout, ConnectTimeout, HTTPError, Timeout, ConnectionError
from bs4 import BeautifulSoup
import sys
urlprefix="https://github.com/"
#Usage: Takes 1 argument, a filename containing github repos
filename = sys.argv[1]
with open(filename) as file1:
#loop, go
while True:
# get each next line from file
line = file1.readline()
# sanitize input strings, repo name
repo = line.strip()
# internet connection to github
try:
page = requests.get(urlprefix + repo)
except (ConnectTimeout, HTTPError, ReadTimeout, Timeout, ConnectionError):
continue
# parse HTML, find <Title> tag
soup = BeautifulSoup(page.text, 'html.parser')
title = soup.find('title')
# output
if title:
print(title.string)
else:
print("404_NOT_FOUND - ", repo)
# if line is empty, EOF is reached, done, end loop.
if not repo:
break
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment