Skip to content

Instantly share code, notes, and snippets.

@zbraniecki
Last active September 6, 2019 17:56
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save zbraniecki/422e08e23318ddf558de478c7cec8bc5 to your computer and use it in GitHub Desktop.
Save zbraniecki/422e08e23318ddf558de478c7cec8bc5 to your computer and use it in GitHub Desktop.
AWFY scripts
from arewefluentyet import data
import os
import subprocess
import json
from datetime import date, datetime, timedelta
# adjust this path to your local check-out of the gh-pages branch
REPO = "/Users/zbraniecki/projects/fluent/arewefluentyet.com/gh-pages"
MC = "/Users/zbraniecki/projects/mozilla-unified"
DATA_PATH = os.path.join(REPO, "./data")
FREQUENCY = timedelta(days=7)
def read_progress_data():
path = os.path.join(DATA_PATH, "progress.json")
if os.path.exists(path):
return json.load(open(path))
print("Warning: \"{}\" doesn't exist. Creating a new one.".format(path))
return []
def read_last_date(progress_data):
if len(progress_data) > 0:
return date(*(int(s) for s in progress_data[-1]["date"].split("-")))
return None
def pick_next_revision(last_date):
# Hardcoded until I find a way to calculate
return "f594a688b3c42ce85e3382ef6de943fb8494ab16"
def get_current_revision():
rev = subprocess.check_output([
'hg', 'id', MC, '-T{id}'
]).decode('utf-8')
return rev
def get_revision_date(rev):
rev = subprocess.check_output([
'hg', 'id', MC, '-r', rev, '-T', '{date|shortdate}'
]).decode('utf-8')
return rev
def switch_to_revision(rev):
os.chdir(MC)
resp = subprocess.check_call([
"hg", "update", "-c", "-r", rev
])
print(resp)
def extract_progress(dataset):
entries = []
progress = {}
for subset in dataset:
for path, count in subset.items():
entries.append({
"file": path,
"count": count
})
ext = os.path.splitext(path)[1]
if not ext:
continue
if ext.startswith("."):
ext = ext[1:]
if ext and ext not in progress:
progress[ext] = count
else:
progress[ext] += count
return (entries, progress)
def update_data(progress_data, next_revision):
current_revision = get_current_revision()
print("Your current revision is: {}.".format(current_revision))
if current_revision != next_revision:
print("Updating data for revision: {}.".format(next_revision))
switch_to_revision(next_revision)
else:
print("Collecting data for this revision.")
aggregator = data.Aggregator(
[os.path.join(MC, "browser/locales/l10n.toml")]
)
aggregator.load()
result = aggregator.gather()
rev_date = get_revision_date(next_revision)
if current_revision != next_revision:
switch_to_revision(current_revision)
(entries, progress) = extract_progress(result)
progress_data.append({
"data": progress,
"date": rev_date,
"revision": next_revision,
})
snapshot = {
"date": rev_date,
"revision": next_revision,
"data": entries
}
return snapshot
progress_data = read_progress_data()
last_date = read_last_date(progress_data)
next_revision = pick_next_revision(last_date)
snapshot_data = update_data(progress_data, next_revision)
json.dump(
progress_data, open(os.path.join(DATA_PATH, "progress.json"), "w"),
indent=0,
separators=(",", ": "),
sort_keys=True,
)
json.dump(
snapshot_data, open(os.path.join(DATA_PATH, "snapshot.json"), "w"),
indent=0,
)
import json
import os
REPO = "/Users/zbraniecki/projects/fluent/arewefluentyet.com/gh-pages"
PROGRESS_JSON = "/data/progress.json"
SNAPSHOT_JSON = "/data/snapshot.json"
def normalize_path(path):
start = path.find("mozilla-unified/")
return path[start + 16:]
global_json = json.load(open(REPO + PROGRESS_JSON))
last_entry = None
rev = None
date = None
for i, entry in enumerate(global_json):
if i == len(global_json) - 1:
last_entry = entry['data'][0]
rev = entry['revision']
date = entry['date']
snap = {
"dtd": 0,
"inc": 0,
"ini": 0,
"ftl": 0,
"properties": 0,
}
for path in entry['data'][0]:
value = entry['data'][0][path]
p, ext = os.path.splitext(path)
if not ext:
ext = path
ext = ext[1:]
snap[ext] += value
entry['data'] = snap
entries = []
for path in last_entry:
value = last_entry[path]
entries.append({
"file": normalize_path(path),
"count": value,
})
snapshot = {
"date": date,
"revision": rev,
"data": entries
}
json.dump(
global_json, open(REPO + PROGRESS_JSON, "w"),
indent=0,
separators=(",", ": "),
sort_keys=True,
)
json.dump(
snapshot, open(REPO + SNAPSHOT_JSON, "w"),
indent=0,
)
import re
import json
def collect_dtd(path):
dtds = []
source = open(path).read()
re_entity = re.compile("<!ENTITY ([^ ]+) ")
matches = re_entity.findall(source)
for match in matches:
dtds.append(match)
return dtds
def collect_used_dtds(path):
dtds = []
data = json.load(open(path))
for entry in data["data"]:
if entry["type"] == "dtd":
dtds.append(entry["id"])
return dtds
dtds = collect_dtd("/Users/zbraniecki/projects/mozilla-unified/browser/locales/en-US/chrome/browser/browser.dtd")
used_dtds = collect_used_dtds("/Users/zbraniecki/projects/fluent/arewefluentyet.com/snapshot.json")
for key in dtds:
if key not in used_dtds:
print(key)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment