Skip to content

Instantly share code, notes, and snippets.

@flodolo
Created December 20, 2022 19:59
Show Gist options
  • Save flodolo/ee76904421ad7a820a65b690e27e2542 to your computer and use it in GitHub Desktop.
Save flodolo/ee76904421ad7a820a65b690e27e2542 to your computer and use it in GitHub Desktop.
Analyze Fenix data with CSV
#!/usr/bin/env python3
"""
This script is used to check top locales for Fenix in specific countries,
and try to find a best match in Pontoon locales.
It expects a CSV file stored as "android_data.csv" from this query
https://sql.telemetry.mozilla.org/queries/89302
"""
import csv
import json
import os
from collections import defaultdict
from urllib.parse import quote as urlquote
from urllib.request import urlopen
def getPontoonBestMatch(locale, pontoon_locales):
# Perfect match
if locale in pontoon_locales:
return locale
# Match with region code
locale_no_region = locale.split("-")[0]
if locale_no_region in pontoon_locales:
return locale_no_region
# Return mapped locales in some cases, otherwise "N/A"
map_locales = {
"en": "en-US",
"es": "es-ES",
}
return map_locales.get(locale_no_region, "N/A")
def main():
# Get completion stats for locales from Pontoon
query = """
{
firefox: project(slug: "firefox-for-android") {
localizations {
locale {
code
},
missingStrings,
totalStrings
}
}
}
"""
pontoon_stats = {}
try:
print("Reading Pontoon stats...")
url = "https://pontoon.mozilla.org/graphql?query={}".format(urlquote(query))
response = urlopen(url)
json_data = json.load(response)
for project, project_data in json_data["data"].items():
for element in project_data["localizations"]:
locale = element["locale"]["code"]
pontoon_stats[locale] = round(
(float(element["totalStrings"] - element["missingStrings"]))
/ element["totalStrings"],
4,
)
except Exception as e:
print(e)
# Add en-US as 100%
pontoon_stats["en-US"] = 1
# Get data from CSV file
csv_stats = defaultdict(dict)
with open("android_data.csv", newline="") as csv_file:
reader = csv.DictReader(csv_file)
for row in reader:
country_name = row["name"]
# Only store 5 top locales
if len(csv_stats[country_name]) >= 5:
continue
# Ignore empty locale
if row["locale"] == "":
continue
csv_stats[country_name][row["locale"]] = row["client_count"]
# Put stats together
csv_output = ["Country,Locale,Users,Pontoon Locale,Fenix Completion\n"]
for country, locales in csv_stats.items():
for locale, locale_users in locales.items():
pontoon_locale = getPontoonBestMatch(locale, pontoon_stats.keys())
pontoon_completion = pontoon_stats.get(pontoon_locale, 0)
csv_output.append(
f"{country},{locale},{locale_users},{pontoon_locale},{pontoon_completion}\n"
)
print("Writing output.csv file")
with open("output.csv", "w") as f:
f.writelines(csv_output)
if __name__ == "__main__":
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment