Last active
March 23, 2020 15:16
-
-
Save jhejderup/80bab0c079f47ac7d2778baac6db8e25 to your computer and use it in GitHub Desktop.
Praezi
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
## | |
## dep: pip3 install kafka-python | |
## run python3 cg-stats.py | |
from kafka import KafkaConsumer, KafkaProducer, TopicPartition | |
from kafka.errors import KafkaError | |
import json | |
consumer = KafkaConsumer( | |
'praezi.callgraphs', | |
bootstrap_servers=['localhost:30001', 'localhost:30002','localhost:30003'], | |
auto_offset_reset='earliest', | |
enable_auto_commit=False, | |
group_id='praezi.cgsworkers', | |
value_deserializer=lambda m: json.loads(m.decode('utf-8')), | |
api_version=(2, 2, 0), | |
max_poll_interval_ms=3600000 | |
) | |
all_topic = 0 | |
num_cgs = 0 | |
num_fails_marked = 0 | |
while True: | |
msg_pack = consumer.poll(max_records=1) | |
consumer.commit() | |
for tp, messages in msg_pack.items(): | |
for message in messages: | |
all_topic = all_topic + 1 | |
if message.value['status'] == 'FAIL' and len(message.value['cg']['nodes']) > 0 and len(message.value['cg']['edges']) > 0: | |
num_fails_marked = num_fails_marked + 1 | |
if message.value['status'] == 'OK' and len(message.value['cg']['nodes']) > 0 and len(message.value['cg']['edges']) > 0: | |
num_cgs = num_cgs + 1 | |
print("Stats [ all = {} non-empty = {} non-empty-failed = {} ]".format(all_topic,num_cgs,num_fails_marked)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 | |
# -*- coding: utf-8 -*- | |
"""Extract toolchain information from docs.rs | |
Deps pip3 install requests beautifulsoup4s | |
Run: python3 docsrs.py <crate_name> <crate_version> | |
""" | |
import sys | |
import requests | |
from bs4 import BeautifulSoup | |
assert len(sys.argv) == 3 | |
URL = "https://docs.rs/crate/{}/{}/builds".format(sys.argv[1], sys.argv[2]) | |
page = requests.get(URL) | |
soup = BeautifulSoup(page.content, 'html.parser') | |
toolchains = soup.find_all('a', class_='release') | |
def isSuccess(clazz): | |
if clazz == "fa fa-check": | |
return True | |
else: | |
return False | |
for tc in toolchains: | |
row = tc.find('div',class_='pure-g') | |
status_class = row.find('i')['class'] | |
status = ' '.join(status_class) | |
compiler_elem = row.find('div', class_='pure-u-1 pure-u-sm-10-24') | |
build_date_elem = row.find('div', class_='pure-u-1 pure-u-sm-3-24 date') | |
csv_entry = "{},{},{},{},{}".format(sys.argv[1], sys.argv[2],isSuccess(status),compiler_elem.text,build_date_elem.text) | |
print(csv_entry) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
git clone https://github.com/rust-lang/crates.io-index.git | |
cd crates.io-index/ | |
rm config.json #delete config file | |
find . -type f | parallel "cat {} | jq -c '{creation_date:{secs_since_epoch:0,nanos_since_epoch:0},crates:[{Package:{name: .name, version: .vers}}]}'" > process_list.txt | |
kafka-console-producer --broker-list localhost:30001 --topic praezi.packages < process_list.txt | |
kafkacat -b localhost:30001 -t praezi.packages #verify that everything was added |
crate names grouped by number of buildable versions:cat docsrs.csv | grep True | awk -F"," '{print $1}' | sort | uniq -c | sort -k1,1nr -k2 > docsrs_groupby_releases.txt
get downloads and num of releasess per missing crate: python3 get_stats.py docsrs_groupby_releases.txt compiled-packages.txt > missing_packages.txt
get number of dependent per package for missing crates: python3 /home/jhejderup/praezi-stats/py-src/resolve-missing-deps.py crates.io-index/ missing_packages_list.txt
sort number of releases: cat missing_packages.txt| awk -F"," '{if($2==$2+0 && $2 < 2) print $1,$2,$3}' > gt10_releases.txt
sort downloads: cat missing_packages.txt| awk -F"," '{if($3==$3+0 && $3 > 1000000) print $1,$2,$3}' > gt10_releases.txt
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Get all unique names:
find . -type f -not -path './.git*' | parallel "cat {} | jq -c '.name'" > ../packages_list.txt
get wf of yanked package versions:
find . -type f -not -path './.git*' | parallel "cat {} | jq -c '.yanked'" | sort | uniq -c
dump all yanked versions:
find . -type f -not -path './.git*' | parallel "cat {} | jq -c 'select(.yanked == true) | {name: .name, version: .vers}'" > ../yanked_versions.txt
gen unique packages:
cat ../yanked_versions.txt | jq -c '.name' | sort | uniq | wc -l
breakdown dependency type:
find . -type f -not -path './.git*' | parallel "cat {} | jq -c '.deps[].kind'" | sort | uniq
exact version:
cat dependency_req.txt | grep -v ">" | grep -v "<" | grep -F '= ' | wc -l
tilde version:
cat dependency_req.txt | grep -F '~' | wc -l
start-notion:
cat dependency_req.txt | grep -F '*' | wc -l
caret-notion:
cat dependency_req.txt | grep -F '^' | wc -l
typosquat names:
cat ../dependency_name.txt | sort | uniq | sed -e 's/^"//' -e 's/"$//' | parallel 'grep -q "{}" /data/praezi/packages_list.txt; [ $? -eq 0 ] || echo {}' | wc -l