Last active
April 13, 2018 03:33
-
-
Save awesomebytes/18c6a3f0c20ec6d62ec872db694ead68 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
""" | |
Count lines of code of ROS packages. | |
Based on https://gist.github.com/mintar/269c62f1f2b4f00b057696ad8c324d03 | |
by Martin Martin Guenther (https://github.com/mintar) | |
Part of this discussion: https://discourse.ros.org/t/are-serious-things-done-with-ros-in-python/4359/7 | |
Author: Sammy Pfeiffer <Sammy.Pfeiffer at student.uts.edu.au> | |
""" | |
from __future__ import division | |
import yaml | |
import os | |
import json | |
from glob import glob | |
from collections import defaultdict | |
from operator import itemgetter | |
from urllib2 import urlopen, Request | |
import sys | |
# Get all repo urls | |
repourls = [] | |
dist_url = 'https://raw.githubusercontent.com/ros/rosdistro/master/kinetic/distribution.yaml' | |
for info in yaml.load( | |
urlopen(dist_url))['repositories'].itervalues(): | |
try: | |
repourls.append(info['source']['url']) | |
except KeyError: | |
pass | |
print '... done (%d repos loaded).' % len(repourls) | |
# Clone all repos | |
for repourl in repourls: | |
# clone repo | |
print("Cloning: " + repourl) | |
# Tried to workaround bitbucket cloning not working on my machine... | |
# if 'bitbucket' in repourl: | |
# repourl = repourl.replace('https://', 'ssh://hg@') | |
os.system("git clone --depth 1 " + repourl) | |
# Run cloc on every repo | |
repofoldernames = glob("./*/") | |
cleaned = [] | |
for fname in repofoldernames: | |
# From "./folder_name/" to "folder_name" | |
fname = fname.replace('./', '') | |
fname = fname.replace('/', '') | |
cleaned.append(fname) | |
repofoldernames = cleaned | |
for foldername in repofoldernames: | |
reportfilename = foldername + ".yaml" | |
# sudo apt-get install cloc | |
os.system("cloc --yaml --report-file=" + reportfilename + " " + foldername) | |
# Collect stats | |
yaml_files = glob('./*.yaml') | |
num_pkgs_cpp = 0 | |
num_pkgs_python = 0 | |
num_pkgs_both = 0 | |
pkgs_both_list = [] | |
total_cpp_loc = 0 | |
total_python_loc = 0 | |
summary_dict = {} | |
for yaml_f in yaml_files: | |
stats = yaml.load(open(yaml_f, 'r')) | |
cpp = stats.get("C++", {'code': 0}) | |
cpp_loc = cpp.get('code', 0) | |
cpp_h = stats.get("C/C++ Header", {'code': 0}) | |
cpp_h_loc = cpp_h.get('code', 0) | |
cpp_locs = cpp_loc + cpp_h_loc | |
if cpp_locs > 0: | |
num_pkgs_cpp += 1 | |
total_cpp_loc += cpp_locs | |
pyt = stats.get("Python", {'code': 0}) | |
pyt_loc = pyt.get('code', 0) | |
if pyt_loc > 0: | |
num_pkgs_python += 1 | |
total_python_loc += pyt_loc | |
repo_name = yaml_f.replace('.yaml', '') | |
if pyt_loc > 0 and cpp_locs > 0: | |
num_pkgs_both += 1 | |
pkgs_both_list.append(repo_name) | |
total_loc = float(cpp_locs + pyt_loc) | |
if total_loc > 0.0: | |
print("For repo_name:" + str(repo_name)) | |
print("pct_python: " + str(pyt_loc) + " / " + | |
str(total_loc) + " = " + str(pyt_loc / total_loc)) | |
print("pct_python %: " + str(pyt_loc / total_loc * 100.0)) | |
if (pyt_loc / total_loc * 100.0) > 100.0: | |
print("--------------- OVER 100% WTF") | |
summary_dict[repo_name] = {'cpp_loc': cpp_locs, | |
'python_loc': pyt_loc, | |
'pct_cpp': cpp_locs / total_loc * 100.0, | |
'pct_python': pyt_loc / total_loc * 100.0} | |
print("From " + str(len(yaml_files)) + " packages analysed") | |
print("There are " + str(num_pkgs_cpp) + " packages using C++") | |
print("With " + str(total_cpp_loc) + " LOC") | |
print("There are " + str(num_pkgs_python) + " packages using Python") | |
print("With " + str(total_python_loc) + " LOC") | |
print("And, actually, using both languages: " + str(num_pkgs_both)) | |
total_loc = float(total_cpp_loc + total_python_loc) | |
pct_cpp = total_cpp_loc / total_loc * 100.0 | |
pct_python = total_python_loc / total_loc * 100.0 | |
print(str(pct_cpp) + " % is CPP code") | |
print(str(pct_python) + " % is Python code") | |
# Recompute with the % of the repo we got before | |
print "Please generate a personal access token here: https://github.com/settings/tokens/new ." | |
print "You don't need to give it any permissions, it is only required to increase the rate limit " | |
print "when accessing the GitHub API." | |
token = unicode(raw_input('Enter GitHub personal access token: ')) | |
request = Request(u'https://api.github.com/rate_limit') | |
request.add_header('Authorization', 'token %s' % token) | |
response = urlopen(request) | |
if response.code != 200: | |
print 'ERROR: wrong access token' | |
sys.exit(-1) | |
print '\nWorking, please be patient. This will take 5-10 minutes.' | |
print 'Loading rosdistro repos...' | |
reponames = [] | |
for info in yaml.load(urlopen('https://raw.githubusercontent.com/ros/rosdistro/master/kinetic/distribution.yaml'))[ | |
'repositories'].itervalues(): | |
try: | |
reponames.append(info['source']['url']) | |
except KeyError: | |
pass | |
print '... done (%d repos loaded).' % len(reponames) | |
# reponames = ['https://github.com/ros/kdl_parser.git', ...] | |
# filter out non-github repos, remove github prefix + '.git' | |
reponames = [r[19:-4] for r in reponames if r.find('https://github.com/') == 0] | |
# reponames = ['ros/kdl_parser', ...] | |
remaining_rate = json.load(response)['rate']['remaining'] | |
if len(reponames) > remaining_rate: | |
print 'ERROR: more repos to process (%d) than remaining rate (%d)' % (len(reponames), remaining_rate) | |
sys.exit(-1) | |
repos = [] | |
i = 0 | |
for reponame in reponames: | |
i += 1 | |
print 'Reading repo %d/%d...' % (i, len(reponames)) | |
request = Request(u'https://api.github.com/repos/%s' % reponame) | |
request.add_header('Authorization', 'token %s' % token) | |
response = urlopen(request) | |
if response.code != 200: | |
print 'ERROR: rate limited?' | |
sys.exit(-1) | |
repos.append(json.load(response)) | |
print '\n\n### ROS repos by popularity with percentage\n' | |
stargazers = [] | |
for repo in repos: | |
lang = repo['language'] | |
if lang in ("C++", "Python"): | |
reponame = repo['full_name'].split('/')[1] | |
# messed up with the ./, sorry | |
d = summary_dict.get("./" + reponame, None) | |
print("reponame: " + reponame) | |
print("dict: " + str(d)) | |
if d: | |
if lang == "C++": | |
lang = "C++ (" + str(round(d['pct_cpp'], 1)) + \ | |
"), Python (" + str(round(d['pct_python'], 1)) + ")" | |
elif lang == "Python": | |
lang = "Python (" + str(round(d['pct_python'], 1)) + \ | |
"), C++ (" + str(round(d['pct_cpp'], 1)) + ")" | |
stargazers.append((repo['full_name'], | |
repo['stargazers_count'], | |
lang)) | |
print '| rank | repo name | stars | language |' | |
print '|------|--------------------------------------------------------------|------:|-----------------|' | |
i = 0 | |
for item in sorted(stargazers, key=itemgetter(1), reverse=True): | |
i += 1 | |
print '| {:3d}. | {:60} | {:5d} | {:15} |'.format(i, *item) | |
# Count packages with actual *.py files | |
import subprocess | |
packages_with_python_files = {} | |
for foldername in repofoldernames: | |
# os.system("find " + foldername + ' -name "*.py" | wc -l') | |
output = subprocess.check_output( | |
"find " + foldername + ' -name "*.py" | wc -l', shell=True) | |
if int(output) != 0: | |
# print(foldername + ": " + output) | |
outputf = subprocess.check_output( | |
"find " + foldername + ' -name "*.py"', shell=True) | |
filelist = outputf.split() | |
packages_with_python_files[foldername] = {'num_files': int(output), | |
'file_list': filelist} | |
num_pkgs_with_python_files = len(packages_with_python_files) | |
print("There are " + str(num_pkgs_with_python_files) + | |
" packages with Python files") | |
print("Packages with less than 5 Python files:") | |
num_less_5 = 0 | |
for k, v in packages_with_python_files.iteritems(): | |
if v['num_files'] < 5: | |
num_less_5 += 1 | |
print(k + " python files:") | |
print(v['file_list']) | |
print("There are " + str(num_less_5) + | |
" packages with less than 5 Python files") | |
# check just the packages with C++ & Python | |
# print their package name, number of files, and the file list | |
# useful for doing a grep later on | |
# My output: https://pastebin.com/aiY5c1t2 | |
for pkg in pkgs_both_list: | |
pkg_name = pkg.replace('./', '') | |
d = packages_with_python_files[pkg_name] | |
print(pkg_name + " (" + str(d['num_files']) + ") python files:") | |
for f in d['file_list']: | |
print(" " + f) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment