Skip to content

Instantly share code, notes, and snippets.

@anki-code
Created August 9, 2022 09:04
Show Gist options
  • Save anki-code/b9cf5fbe3252f4fcc626e95429ff6342 to your computer and use it in GitHub Desktop.
Save anki-code/b9cf5fbe3252f4fcc626e95429ff6342 to your computer and use it in GitHub Desktop.
Get xonsh xontribs list from pypi and add the info from pypi and Github
from bs4 import BeautifulSoup as bs
packages = []
# Get the list of packages from pypi
for p in range(1,10):
page = $(curl @(f'https://pypi.org/search/?q=xontrib-&page={p}'))
if 'Error code 404' in page:
printx('{GREEN}DONE{RESET}')
break
printx(f"{{YELLOW}}Page {p}{{RESET}}")
s = bs(page)
for l in s.select('ul.unstyled li'):
try:
name = l.select('.package-snippet__name')[0].contents[0]
version = '0.0.0'
descr = ''
try:
version = l.select('.package-snippet__version')[0].contents[0]
descr = l.select('.package-snippet__description')[0].contents[0].strip()
except:
pass
if 'xonsh' not in name+descr and 'xontrib' not in name+descr:
printx(f'{{PURPLE}}Skip {name}{{RESET}}')
continue
packages.append({'name': name, 'version': version, 'descr':descr})
except:
printx(f'{{RED}}{s}{{RESET}}')
break
##
packages = sorted(packages, key=lambda d: d['name'])
packages_cnt = len(packages)
# Get info about packages from pypi
import json
for i, p in enumerate(packages):
printx(f"{{YELLOW}}PYPI {p['name']} {i: >3}/{packages_cnt: <3}{{RESET}}")
pj = json.loads($(curl @(f"https://pypi.org/pypi/{p['name']}/json")))
packages[i]['pypi_info'] = pj['info']
for i, p in enumerate(packages):
info = p['pypi_info']
if 'project_urls' in info:
for u in ['Code', 'Homepage']:
if info['project_urls'] and u in info['project_urls']:
url = str(info['project_urls'][u])
if 'github.com' in url:
packages[i]['github'] = url.replace('http://', 'https://')
print('Github found in', len([p for p in packages if 'github' in p]), 'packages', packages_cnt)
# Get info from Github
for i, p in enumerate(packages):
if 'github' in p:
printx(f"{{YELLOW}}GITHUB {p['name']} {i: >3}/{packages_cnt: <3}{{RESET}}")
repo = p['github'].replace('https://github.com/', '')
pj = json.loads($(curl @(f"https://api.github.com/repos/{repo}")))
packages[i]['github_info'] = pj
sleep 1
# List
maxlen = max(len(p['name']) for p in packages if p['descr'] != '')
for p in packages:
printx(f"{{YELLOW}}{p['name']: <{maxlen}}{{RESET}} {{WHITE}}{p['descr']}{{RESET}}")
@anki-code
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment