Skip to content

Instantly share code, notes, and snippets.

@jhejderup
Last active May 19, 2020 13:14
Show Gist options
  • Save jhejderup/fd0ebc01333ca98b2fd285f11295e6d2 to your computer and use it in GitHub Desktop.
Save jhejderup/fd0ebc01333ca98b2fd285f11295e6d2 to your computer and use it in GitHub Desktop.
Präzi Analytics
## ON LIMA
cd /data/praezi/data
## Total number of processed releases
find . -maxdepth 2 -mindepth 2 -type d -printf '%f\n'
## Total number of releases
find . -name callgraph.json | wc -l
## Total number of packages represented
find . -name callgraph.json | awk -F/ '{print $2}' | sort | uniq | wc -l
## Total number of lockfiles
find . -name Cargo.lock | wc -l
## Total number of compile failures
find . -name compile_error.log | wc -l
## Find empty folders (unanalyzed)
find . -maxdepth 2 -mindepth 2 -type d -empty
################### Results
# Processed: 207,458
# Call graphs: 142,301
# Lockfiles: 142,050 (!)
# Compile failures: 65,101
# Packages with a call graph: 30,666
################### Reprocess
# 208,023 - 207,458 - 56 = 509
######################################
"""Extract toolchain information from docs.rs
Deps pip3 install requests beautifulsoup4s
Run: python3 docsrs.py <crate_name> <crate_version>
"""
import sys
import requests
from bs4 import BeautifulSoup
assert len(sys.argv) == 3
URL = "https://docs.rs/crate/{}/{}/builds".format(sys.argv[1], sys.argv[2])
page = requests.get(URL)
soup = BeautifulSoup(page.content, 'html.parser')
toolchains = soup.find_all('a', class_='release')
def isSuccess(clazz):
if clazz == "fa fa-check":
return True
else:
return False
for tc in toolchains:
row = tc.find('div',class_='pure-g')
status_class = row.find('i')['class']
status = ' '.join(status_class)
compiler_elem = row.find('div', class_='pure-u-1 pure-u-sm-10-24')
build_date_elem = row.find('div', class_='pure-u-1 pure-u-sm-3-24 date')
csv_entry = "{},{},{},{},{}".format(sys.argv[1], sys.argv[2],isSuccess(status),compiler_elem.text,build_date_elem.text)
print(csv_entry)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment