Skip to content

Instantly share code, notes, and snippets.

@awesomebytes
Last active April 13, 2018 03:33
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save awesomebytes/18c6a3f0c20ec6d62ec872db694ead68 to your computer and use it in GitHub Desktop.
Save awesomebytes/18c6a3f0c20ec6d62ec872db694ead68 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"""
Count lines of code of ROS packages.
Based on https://gist.github.com/mintar/269c62f1f2b4f00b057696ad8c324d03
by Martin Martin Guenther (https://github.com/mintar)
Part of this discussion: https://discourse.ros.org/t/are-serious-things-done-with-ros-in-python/4359/7
Author: Sammy Pfeiffer <Sammy.Pfeiffer at student.uts.edu.au>
"""
from __future__ import division
import yaml
import os
import json
from glob import glob
from collections import defaultdict
from operator import itemgetter
from urllib2 import urlopen, Request
import sys
# Get all repo urls
repourls = []
dist_url = 'https://raw.githubusercontent.com/ros/rosdistro/master/kinetic/distribution.yaml'
for info in yaml.load(
urlopen(dist_url))['repositories'].itervalues():
try:
repourls.append(info['source']['url'])
except KeyError:
pass
print '... done (%d repos loaded).' % len(repourls)
# Clone all repos
for repourl in repourls:
# clone repo
print("Cloning: " + repourl)
# Tried to workaround bitbucket cloning not working on my machine...
# if 'bitbucket' in repourl:
# repourl = repourl.replace('https://', 'ssh://hg@')
os.system("git clone --depth 1 " + repourl)
# Run cloc on every repo
repofoldernames = glob("./*/")
cleaned = []
for fname in repofoldernames:
# From "./folder_name/" to "folder_name"
fname = fname.replace('./', '')
fname = fname.replace('/', '')
cleaned.append(fname)
repofoldernames = cleaned
for foldername in repofoldernames:
reportfilename = foldername + ".yaml"
# sudo apt-get install cloc
os.system("cloc --yaml --report-file=" + reportfilename + " " + foldername)
# Collect stats
yaml_files = glob('./*.yaml')
num_pkgs_cpp = 0
num_pkgs_python = 0
num_pkgs_both = 0
pkgs_both_list = []
total_cpp_loc = 0
total_python_loc = 0
summary_dict = {}
for yaml_f in yaml_files:
stats = yaml.load(open(yaml_f, 'r'))
cpp = stats.get("C++", {'code': 0})
cpp_loc = cpp.get('code', 0)
cpp_h = stats.get("C/C++ Header", {'code': 0})
cpp_h_loc = cpp_h.get('code', 0)
cpp_locs = cpp_loc + cpp_h_loc
if cpp_locs > 0:
num_pkgs_cpp += 1
total_cpp_loc += cpp_locs
pyt = stats.get("Python", {'code': 0})
pyt_loc = pyt.get('code', 0)
if pyt_loc > 0:
num_pkgs_python += 1
total_python_loc += pyt_loc
repo_name = yaml_f.replace('.yaml', '')
if pyt_loc > 0 and cpp_locs > 0:
num_pkgs_both += 1
pkgs_both_list.append(repo_name)
total_loc = float(cpp_locs + pyt_loc)
if total_loc > 0.0:
print("For repo_name:" + str(repo_name))
print("pct_python: " + str(pyt_loc) + " / " +
str(total_loc) + " = " + str(pyt_loc / total_loc))
print("pct_python %: " + str(pyt_loc / total_loc * 100.0))
if (pyt_loc / total_loc * 100.0) > 100.0:
print("--------------- OVER 100% WTF")
summary_dict[repo_name] = {'cpp_loc': cpp_locs,
'python_loc': pyt_loc,
'pct_cpp': cpp_locs / total_loc * 100.0,
'pct_python': pyt_loc / total_loc * 100.0}
print("From " + str(len(yaml_files)) + " packages analysed")
print("There are " + str(num_pkgs_cpp) + " packages using C++")
print("With " + str(total_cpp_loc) + " LOC")
print("There are " + str(num_pkgs_python) + " packages using Python")
print("With " + str(total_python_loc) + " LOC")
print("And, actually, using both languages: " + str(num_pkgs_both))
total_loc = float(total_cpp_loc + total_python_loc)
pct_cpp = total_cpp_loc / total_loc * 100.0
pct_python = total_python_loc / total_loc * 100.0
print(str(pct_cpp) + " % is CPP code")
print(str(pct_python) + " % is Python code")
# Recompute with the % of the repo we got before
print "Please generate a personal access token here: https://github.com/settings/tokens/new ."
print "You don't need to give it any permissions, it is only required to increase the rate limit "
print "when accessing the GitHub API."
print
token = unicode(raw_input('Enter GitHub personal access token: '))
request = Request(u'https://api.github.com/rate_limit')
request.add_header('Authorization', 'token %s' % token)
response = urlopen(request)
if response.code != 200:
print 'ERROR: wrong access token'
sys.exit(-1)
print '\nWorking, please be patient. This will take 5-10 minutes.'
print 'Loading rosdistro repos...'
reponames = []
for info in yaml.load(urlopen('https://raw.githubusercontent.com/ros/rosdistro/master/kinetic/distribution.yaml'))[
'repositories'].itervalues():
try:
reponames.append(info['source']['url'])
except KeyError:
pass
print '... done (%d repos loaded).' % len(reponames)
# reponames = ['https://github.com/ros/kdl_parser.git', ...]
# filter out non-github repos, remove github prefix + '.git'
reponames = [r[19:-4] for r in reponames if r.find('https://github.com/') == 0]
# reponames = ['ros/kdl_parser', ...]
remaining_rate = json.load(response)['rate']['remaining']
if len(reponames) > remaining_rate:
print 'ERROR: more repos to process (%d) than remaining rate (%d)' % (len(reponames), remaining_rate)
sys.exit(-1)
repos = []
i = 0
for reponame in reponames:
i += 1
print 'Reading repo %d/%d...' % (i, len(reponames))
request = Request(u'https://api.github.com/repos/%s' % reponame)
request.add_header('Authorization', 'token %s' % token)
response = urlopen(request)
if response.code != 200:
print 'ERROR: rate limited?'
sys.exit(-1)
repos.append(json.load(response))
print '\n\n### ROS repos by popularity with percentage\n'
stargazers = []
for repo in repos:
lang = repo['language']
if lang in ("C++", "Python"):
reponame = repo['full_name'].split('/')[1]
# messed up with the ./, sorry
d = summary_dict.get("./" + reponame, None)
print("reponame: " + reponame)
print("dict: " + str(d))
if d:
if lang == "C++":
lang = "C++ (" + str(round(d['pct_cpp'], 1)) + \
"), Python (" + str(round(d['pct_python'], 1)) + ")"
elif lang == "Python":
lang = "Python (" + str(round(d['pct_python'], 1)) + \
"), C++ (" + str(round(d['pct_cpp'], 1)) + ")"
stargazers.append((repo['full_name'],
repo['stargazers_count'],
lang))
print '| rank | repo name | stars | language |'
print '|------|--------------------------------------------------------------|------:|-----------------|'
i = 0
for item in sorted(stargazers, key=itemgetter(1), reverse=True):
i += 1
print '| {:3d}. | {:60} | {:5d} | {:15} |'.format(i, *item)
# Count packages with actual *.py files
import subprocess
packages_with_python_files = {}
for foldername in repofoldernames:
# os.system("find " + foldername + ' -name "*.py" | wc -l')
output = subprocess.check_output(
"find " + foldername + ' -name "*.py" | wc -l', shell=True)
if int(output) != 0:
# print(foldername + ": " + output)
outputf = subprocess.check_output(
"find " + foldername + ' -name "*.py"', shell=True)
filelist = outputf.split()
packages_with_python_files[foldername] = {'num_files': int(output),
'file_list': filelist}
num_pkgs_with_python_files = len(packages_with_python_files)
print("There are " + str(num_pkgs_with_python_files) +
" packages with Python files")
print("Packages with less than 5 Python files:")
num_less_5 = 0
for k, v in packages_with_python_files.iteritems():
if v['num_files'] < 5:
num_less_5 += 1
print(k + " python files:")
print(v['file_list'])
print("There are " + str(num_less_5) +
" packages with less than 5 Python files")
# check just the packages with C++ & Python
# print their package name, number of files, and the file list
# useful for doing a grep later on
# My output: https://pastebin.com/aiY5c1t2
for pkg in pkgs_both_list:
pkg_name = pkg.replace('./', '')
d = packages_with_python_files[pkg_name]
print(pkg_name + " (" + str(d['num_files']) + ") python files:")
for f in d['file_list']:
print(" " + f)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment