genbtc/github-beautiful.py

## github-beautiful.py
#!/usr/bin/env python3
#Copyright 2023 - genr8eofl @IRC/gentoo
# github-beautiful.py - fetch GitHub repo "Description" (for many links at once)
#       Makes 1 HTTPS connection per line, retreives the HTML page and parses it for <Title>
import requests
from requests import ReadTimeout, ConnectTimeout, HTTPError, Timeout, ConnectionError
from bs4 import BeautifulSoup
import sys

urlprefix="https://github.com/"
#Usage: Takes 1 argument, a filename containing github repos
filename = sys.argv[1]
with open(filename) as file1:
  #loop, go
  while True:

    # get each next line from file
    line = file1.readline()
    # sanitize input strings, repo name
    repo = line.strip()

    # internet connection to github
    try:
      page = requests.get(urlprefix + repo)
    except (ConnectTimeout, HTTPError, ReadTimeout, Timeout, ConnectionError):
      continue
    # parse HTML, find <Title> tag
    soup = BeautifulSoup(page.text, 'html.parser')
    title = soup.find('title')
	# output
    if title:
      print(title.string)
    else:
      print("404_NOT_FOUND - ", repo)

    # if line is empty, EOF is reached, done, end loop.
    if not repo:
      break
	#!/usr/bin/env python3
	#Copyright 2023 - genr8eofl @IRC/gentoo
	# github-beautiful.py - fetch GitHub repo "Description" (for many links at once)
	# Makes 1 HTTPS connection per line, retreives the HTML page and parses it for <Title>
	import requests
	from requests import ReadTimeout, ConnectTimeout, HTTPError, Timeout, ConnectionError
	from bs4 import BeautifulSoup
	import sys

	urlprefix="https://github.com/"
	#Usage: Takes 1 argument, a filename containing github repos
	filename = sys.argv[1]
	with open(filename) as file1:
	#loop, go
	while True:

	# get each next line from file
	line = file1.readline()
	# sanitize input strings, repo name
	repo = line.strip()

	# internet connection to github
	try:
	page = requests.get(urlprefix + repo)
	except (ConnectTimeout, HTTPError, ReadTimeout, Timeout, ConnectionError):
	continue
	# parse HTML, find <Title> tag
	soup = BeautifulSoup(page.text, 'html.parser')
	title = soup.find('title')
	# output
	if title:
	print(title.string)
	else:
	print("404_NOT_FOUND - ", repo)

	# if line is empty, EOF is reached, done, end loop.
	if not repo:
	break