Last active
October 20, 2018 09:25
-
-
Save TheDash/a3d394cccbe921ce9a06977ef3acc28d to your computer and use it in GitHub Desktop.
ROS contributions by business
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python | |
from __future__ import division | |
import re | |
import urllib2, cookielib | |
import matplotlib.pyplot as plt | |
if __name__ == "__main__": | |
site = "http://repositories.ros.org/status_page/ros_kinetic_default.html" | |
hdr = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.11 (KHTML, like Gecko) Chrome/23.0.1271.64 Safari/537.11', | |
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', | |
'Accept-Charset': 'ISO-8859-1,utf-8;q=0.7,*;q=0.3', | |
'Accept-Encoding': 'none', | |
'Accept-Language': 'en-US,en;q=0.8', | |
'Connection': 'keep-alive'} | |
req = urllib2.Request(site, headers=hdr) | |
page = urllib2.urlopen(req) | |
content = page.read() | |
regexp = re.compile(("mailto:([a-z0-9!#$%&'*+\/=?^_`{|}~-]+@[a-z0-9]+\.[a-zA-Z0-9-.]+)")) | |
links = re.findall(regexp, content) | |
map_of_names = {} | |
OPEN_SOURCE_CONTRIBUTORS = ["gmail.com", "googlegroups.com"] | |
total_num_pkgs = 0 | |
for link in links: | |
split = link.split("@") | |
name = split[0] | |
business = split[1] | |
if business in OPEN_SOURCE_CONTRIBUTORS: | |
business = "Community Contribution" | |
map_of_names.setdefault(business, []) | |
map_of_names[business].append(name) | |
total_num_pkgs = total_num_pkgs + 1 | |
INSIGNIFICANT_PERCENTAGE = 0.01 | |
# Consolidate | |
for k, v in map_of_names.items(): | |
pct = len(v)/total_num_pkgs | |
if pct <= INSIGNIFICANT_PERCENTAGE: | |
map_of_names.setdefault("Other", []) | |
map_of_names["Other"].append(v) | |
print k + " maintains an insignificant amount. (" + str(pct) + ") Consolidating to other for viz purposes" | |
map_of_names.pop(k) | |
package_count = [] | |
business_name = [] | |
for k, v in map_of_names.items(): | |
package_count.append(len(v)) | |
business_name.append(k) | |
plt.pie(package_count, labels=business_name, startangle=90, autopct='%.1f%%') | |
plt.show() | |
Could you help me understand what orphaned means in this context?
Atm I have a few guesses:
- No longer maintained
- Deprecated but required to be released
- Released and then deprecated
It was about a 50/50 split between googlegroups.com and gmail.com
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
May I suggest to classify
ros-orphaned-packages@googlegroups.com
separately since it isn't a "Community Contribution" but basically a orphaned packages. It would be interesting how that change would split the big chunk of "Community Contribution" in the current graph.