Skip to content

Instantly share code, notes, and snippets.

@rochacbruno
Last active September 13, 2023 16:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save rochacbruno/528074ebe6dddde2f848200717eece10 to your computer and use it in GitHub Desktop.
Save rochacbruno/528074ebe6dddde2f848200717eece10 to your computer and use it in GitHub Desktop.
galaxy_scores
"""
Dumps each collection and repository scores to a local scores/ folder
"""
import json
import sys
from pathlib import Path
from statistics import mean
from typing import Dict, Optional, Union
import django
from django.conf import settings
from django.db.models import QuerySet
django.setup() # noqa
from galaxy.main.models.collection import Collection
from galaxy.main.models.repository import Repository, RepositorySurvey
OUTPUT_FOLDER = "scores"
SURVEY_FIElDS = (
"docs",
"ease_of_use",
"does_what_it_says",
"works_as_is",
"used_in_production",
)
class Score:
namespace: str
name: str
kind: str
score: float
count: int
quality_score: float
docs: float
ease_of_use: float
does_what_it_says: float
works_as_is: float
used_in_production: float
def __init__(self, **kwargs):
for key, value in kwargs.items():
setattr(self, key, value)
def as_dict(self, exclude=("kind", "namespace", "name")) -> Dict:
exclude = exclude or tuple()
return {k: v for k, v in self.__dict__.items() if k not in exclude}
def as_json(self) -> str:
return json.dumps(self.as_dict())
@classmethod
def _get_mean(cls, qs: QuerySet, key: str) -> float:
# consider only the categories votes to calculate the average
# see https://galaxy.ansible.com/docs/contributing/content_scoring.html
values = [
getattr(item, key) for item in qs if getattr(item, key) not in [0, None]
]
if values:
mean_value = mean(values)
return round(mean_value, 1)
return None
@classmethod
def _calculate_score(cls, obj: Union[Repository, Collection]) -> Optional[float]:
# see https://github.com/ansible/galaxy/blob/6a374cacdf0f04de94486913bba5285e24e178d3/galaxyui/src/app/utilities/score/score.component.ts#L33
quality_score = getattr(obj, "quality_score", None)
survey_count = (
obj.community_survey_count > 3 and 3 or obj.community_survey_count
)
community_score = obj.community_score
if quality_score is not None and community_score is not None:
score = quality_score * ((6 - survey_count) / 6) + obj.community_score * (
survey_count / 6
)
else:
score = community_score or quality_score
if score is not None:
return round((score * 10)) / 10
@classmethod
def new(
cls,
obj: Union[Repository, Collection],
kind: str,
namespace: str,
surveys: QuerySet,
quality_score: Optional[float] = None,
) -> "Score":
return cls(
kind=kind,
namespace=namespace,
quality_score=round(quality_score, 1)
if quality_score is not None
else quality_score,
name=obj.name,
score=cls._calculate_score(obj),
count=obj.community_survey_count,
**{field: cls._get_mean(surveys, field) for field in SURVEY_FIElDS},
)
@classmethod
def from_collection(cls, collection: Collection) -> "Score":
return cls.new(
collection,
kind="collection",
namespace=collection.namespace.name,
surveys=collection.collectionsurvey_set.all(),
)
@classmethod
def from_repository(cls, repository: Repository) -> "Score":
return cls.new(
repository,
kind="role",
namespace=repository.github_user,
quality_score=repository.quality_score,
surveys=RepositorySurvey.objects.filter(repository=repository),
)
def save(self):
# dump individual /kind/namespace/name.json
root_folder = Path(OUTPUT_FOLDER) / Path(self.kind) / Path(self.namespace)
root_folder.mkdir(parents=True, exist_ok=True)
filepath = root_folder / Path(self.name + ".json")
filepath.write_text(self.as_json())
# dump to /kind/namespace.json
namespace_file_path = (
Path(OUTPUT_FOLDER) / Path(self.kind) / Path(self.namespace + ".json")
)
namespace_data = {}
if namespace_file_path.exists():
namespace_data.update(json.loads(namespace_file_path.read_text()))
namespace_data[self.name] = self.as_dict()
namespace_file_path.write_text(json.dumps(namespace_data))
# dump to kind.json
kind_file_path = Path(OUTPUT_FOLDER) / Path(self.kind + ".json")
kind_data = {}
if kind_file_path.exists():
kind_data.update(json.loads(kind_file_path.read_text()))
if not self.namespace in kind_data:
kind_data[self.namespace] = {}
kind_data[self.namespace][self.name] = self.as_dict()
kind_file_path.write_text(json.dumps(kind_data))
def progressbar(it, prefix="", size=60, out=sys.stdout): # Python3.6+
count = len(it)
def show(j):
x = int(size * j / count)
print(
f"{prefix}[{u'█'*x}{('.'*(size-x))}] {j}/{count}",
end="\r",
file=out,
flush=True,
)
show(0)
for i, item in enumerate(it):
yield item
show(i + 1)
print("\n", flush=True, file=out)
def main():
collections = Collection.objects.filter(community_survey_count__gt=0).order_by(
"-community_survey_count"
)
repositories = Repository.objects.filter(community_survey_count__gt=0).order_by(
"-community_survey_count"
)
for collection in progressbar(collections, "Collections: "):
score = Score.from_collection(collection)
score.save()
for repository in progressbar(repositories, "Repositories: "):
score = Score.from_repository(repository)
score.save()
print(
f"Dumped {len(collections)} collections and {len(repositories)} roles to {OUTPUT_FOLDER}/"
)
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment