Skip to content

Instantly share code, notes, and snippets.

@pietroalbini
Created January 9, 2021 19:04
Show Gist options
  • Save pietroalbini/95d294b96a0d0885d43711eb2f0a579b to your computer and use it in GitHub Desktop.
Save pietroalbini/95d294b96a0d0885d43711eb2f0a579b to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
# THE SOFTWARE.
import csv
import sys
GENERAL_TERMS = ["team", "developer", "author", "project"]
def is_general(authors):
for author in authors:
author = author.lower()
for term in GENERAL_TERMS:
if term in author:
return True
return False
# Fix loading versions.csv
csv.field_size_limit(sys.maxsize)
print("loading version authors")
version_authors = {}
version_authors_mut = {}
with open("data/version_authors.csv", "r") as f:
reader = csv.DictReader(f)
for line in reader:
try:
version_authors_mut[int(line["version_id"])].append(line["name"])
except KeyError:
version_authors_mut[int(line["version_id"])] = [line["name"]]
for version_id, authors in version_authors_mut.items():
version_authors[version_id] = tuple(authors)
print("loading crate versions")
crate_versions = {}
with open("data/versions.csv", "r") as f:
reader = csv.DictReader(f)
for line in reader:
try:
crate_versions[int(line["crate_id"])].append(int(line["id"]))
except KeyError:
crate_versions[int(line["crate_id"])] = [int(line["id"])]
print("counting number of changes")
number_of_changes = {}
did_generalize = 0
already_general = 0
for crate_id, versions in crate_versions.items():
different = set(version_authors.get(id, tuple()) for id in versions)
try:
number_of_changes[len(different)] += 1
except KeyError:
number_of_changes[len(different)] = 1
if len(different) == 2:
different = list(different)
general0 = is_general(different[0])
general1 = is_general(different[1])
if general0 ^ general1:
did_generalize += 1
elif general0 and general1:
already_general += 1
number_of_changes = {k: v for k, v in sorted(number_of_changes.items(), key=lambda item: item[0])}
print()
print("| # of field changes | # of crates | % of crates |")
print("| --- | --- | --- |")
for changes, crates in number_of_changes.items():
print("| {} | {} | {:.1f}% |".format(changes - 1, crates, crates * 100 / len(crate_versions)))
print()
print("did generalize: {} ({:.1f}%)".format(did_generalize, did_generalize * 100 / number_of_changes[2]))
print("already general: {} ({:.1f}%)".format(already_general, already_general * 100 / number_of_changes[2]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment