-
Compose a list of build ids which exist in the database:
$ psql piwheels -c "select build_id from builds" > build_ids.txt
-
Cut the header and footer lines from the output
-
Copy the file over to the piwheels master
-
On piwheels master, iterate the
logs
directory. If you come across a log file with a build id which doesn't exist in the database, delete the file:from pathlib import Path from datetime import datetime logs_dir = Path('www/logs') db_file = Path('build_ids_db.txt') db_build_ids = {int(line) for line in db_file.read_text().split()} start = datetime.now() for lvl_1 in logs_dir.iterdir(): for lvl_2 in lvl_1.iterdir(): for log_file in lvl_2.iterdir(): build_id = int(f"{lvl_1.name}{lvl_2.name}{log_file.stem.split('.')[0]}") if build_id not in db_build_ids: log_file.unlink() end = datetime.now() print(end - start) # see how long it took
Last active
October 30, 2022 13:43
-
-
Save bennuttall/e5b15e4d0cffcacdd0a2b710e5ada572 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from pathlib import Path | |
from collections import defaultdict | |
missing_wheels = defaultdict(set) | |
extra_wheels = defaultdict(set) | |
rewrites = set() | |
total_missing = 0 | |
total_extra = 0 | |
with open('missing.txt') as f: | |
for whl in f: | |
whl = whl.strip() | |
pkg = whl.split('/')[-2] | |
filename = whl.split('/')[-1] | |
path = Path(whl) | |
rewrites.add(pkg) | |
if path.is_file(): | |
missing_wheels[pkg].add(filename) | |
total_missing += 1 | |
with open('extra.txt') as f: | |
for whl in f: | |
whl = whl.strip() | |
pkg = whl.split('/')[-2] | |
filename = whl.split('/')[-1] | |
rewrites.add(pkg) | |
extra_wheels[pkg].add(filename) | |
total_extra += 1 | |
with open('rewrites.sh', 'w') as f: | |
f.write('\n'.join([f'piw-rebuild index {pkg}' for pkg in rewrites]) + '\n') | |
print(total_missing, "missing wheels") | |
print(total_extra, "extra wheels") | |
print(len(rewrites), "package indexes need rewriting") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xmlrpc.client | |
client = xmlrpc.client.ServerProxy('https://pypi.org/pypi') | |
now = 1597017600 | |
timestamp = 0 | |
serial = 0 | |
packages_created = {} | |
packages_removed = {} | |
versions_created = {} | |
versions_removed = {} | |
versions_yanked = {} | |
versions_unyanked = {} | |
while timestamp < now: | |
for package, version, timestamp, action, serial in client.changelog_since_serial(serial): | |
if action == 'create': | |
last_timestamp = packages_created.get(package, 0) | |
if timestamp > last_timestamp: | |
packages_created[package] = timestamp | |
elif action == 'new release': | |
last_timestamp = versions_created.get(package, 0) | |
if timestamp > last_timestamp: | |
versions_created[package] = timestamp | |
elif action == 'remove': | |
if version is None: | |
last_timestamp = packages_removed.get(package, 0) | |
if timestamp > last_timestamp: | |
packages_removed[package] = timestamp | |
else: | |
last_timestamp = versions_removed.get((package, version), 0) | |
if timestamp > last_timestamp: | |
versions_removed[(package, version)] = timestamp | |
elif action == 'yank release': | |
last_timestamp = versions_yanked.get((package, version), 0) | |
if timestamp > last_timestamp: | |
versions_yanked[(package, version)] = timestamp | |
elif action == 'unyank release': | |
last_timestamp = versions_unyanked.get((package, version), 0) | |
if timestamp > last_timestamp: | |
versions_unyanked[(package, version)] = timestamp | |
print( | |
serial, | |
len(packages_created), | |
len(packages_removed), | |
len(versions_created), | |
len(versions_removed), | |
len(versions_yanked), | |
len(versions_unyanked), | |
) | |
# make a set of package versions which were yanked after being unyanked (or | |
# were never unyanked) | |
yanked = set() | |
for (p, v), yanked_ts in versions_yanked.items(): | |
unyanked_ts = versions_unyanked.get((p, v), 0) | |
if yanked_ts > unyanked_ts: | |
yanked.add((p, v)) | |
print(f'{len(yanked):,} versions to yank') | |
# write out an sql file to update the versions table setting those versions as | |
# yanked | |
with open('yanked.sql', 'w') as f: | |
for p, v in yanked: | |
f.write(f"UPDATE versions SET yanked = true WHERE package = '{p}' AND version = '{v}';\n") | |
# create a set of packages which have had versions yanked | |
yanked_packages = {p for p, v in yanked} | |
# write out a bash script to rewrite the indexes and project pages for all | |
# packages which have had versions yanked | |
with open('yanked.sh', 'w') as f: | |
for p, v in yanked: | |
f.write(f"piw-rebuild index {p}\n") | |
# create a set of all packages which have been deleted since their last creation | |
# date | |
p_removed = set() | |
for p, removed_ts in packages_removed.items(): | |
created_ts = packages_created.get(p, 0) | |
if removed_ts > created_ts: | |
p_removed.add(p) | |
print(f'{len(p_removed):,} packages to delete') | |
# write out a plain text file of all packages marked for deletion | |
with open('deleted_packages.txt', 'w') as f: | |
for p in p_removed: | |
f.write(f"{p}\n") | |
# write out a bash script with rm's for deleted packages (simple and project) | |
# this may be a bad idea | |
with open('deleted_packages.sh', 'w') as f: | |
for p in p_removed: | |
f.write(f"rm -r /home/piwheels/www/simple/{p}/ /home/piwheels/www/project/{p}/\n") | |
# write out an sql script to delete versions which have been removed | |
with open('deleted_packages.sql', 'w') as f: | |
for p in p_removed: | |
f.write(f"DELETE FROM versions WHERE package = '{p}';\n") | |
# create a set of all versions which have been deleted since their last creation | |
v_removed = set() | |
for (p, v), removed_ts in versions_removed.items(): | |
created_ts = versions_created.get((p, v), 0) | |
if p not in p_removed and removed_ts > created_ts: | |
v_removed.add((p, v)) | |
print(f'{len(v_removed):,} versions to delete') | |
# write out a text file of all deleted versions | |
with open('deleted_versions.txt', 'w') as f: | |
for p, v in v_removed: | |
f.write(f"{p} {v}\n") | |
# write out a bash script of piw-remove commands for all deleted versions | |
with open('deleted_versions.sh', 'w') as f: | |
for p, v in v_removed: | |
f.write(f"echo 'removing {p} {v}'\n") | |
f.write(f"piw-remove -v -y '{p}' '{v}'\n") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xmlrpc.client | |
import requests | |
from piwheels.format import canonicalize_name | |
client = xmlrpc.client.ServerProxy('https://pypi.org/pypi') | |
package_data = {} | |
def get_pypi_packages(): | |
return {canonicalize_name(p) for p in client.list_packages()} | |
def get_piwheels_packages(): | |
url = "https://www.piwheels.org/packages.json" | |
packages = requests.get(url).json() | |
return {canonicalize_name(p[0]) for p in packages} | |
def get_package_data(pkg): | |
url = f"https://pypi.org/pypi/{pkg}/json" | |
package_data[pkg] = requests.get(url).json() | |
def get_package_versions(pkg): | |
versions = package_data[pkg]['releases'] | |
return {(v, d[0]['upload_time']) for v, d in versions.items() if d} | |
pypi_packages = get_pypi_packages() | |
piwheels_packages = get_piwheels_packages() | |
missing_packages = pypi_packages - piwheels_packages | |
extra_packages = piwheels_packages - pypi_packages | |
# create a dict of missing packages with their list of versions from pypi | |
missing_package_versions = {} | |
for pkg in missing_packages: | |
try: | |
get_package_data(pkg) | |
versions = get_package_versions(pkg) | |
if versions: | |
missing_package_versions[pkg] = versions | |
except Exception as e: | |
print(repr(e)) | |
# write out an sql file to add the missing packages and versions | |
# write out a bash file to rebuild the project pages | |
with open('missing_packages.sh') as shf, open('missing_packages.sql') as sqlf: | |
for pkg, vers in missing_package_versions.items(): | |
description = package_data[pkg]['info']['summary'].replace("'", "''") | |
shf.write(f"piw-rebuild project {pkg}\n") | |
sqlf.write(f"select add_package_name('{pkg}', '{pkg}', '1970-01-01 00:00:00');\n") | |
sqlf.write(f"select add_new_package('{pkg}', '', '{description}');\n") | |
for ver, rel in vers: | |
rel = rel.replace('T', ' ') | |
sqlf.write(f"select add_new_package_version('{pkg}', '{ver}', '{rel}', '');\n") | |
# write out a bash file to remove the extra packages | |
with open('extra_packages.sh', 'w') as f: | |
for pkg in extra_packages: | |
f.write(f"piw-remove {pkg} -y\n") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
from time import sleep | |
from piwheels.master.pypi import PyPIEvents | |
pypi = PyPIEvents() | |
latest_serial = pypi._buffer._client.changelog_last_serial() | |
with open('pypi.csv', 'w') as f: | |
writer = csv.writer(f) | |
while pypi.serial < latest_serial: | |
sleep(1) | |
writer.writerows(list(pypi)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import xmlrpc.client | |
from piwheels.format import canonicalize_name | |
from time import sleep | |
import csv | |
client = xmlrpc.client.ServerProxy('https://pypi.org/pypi') | |
serial = 0 | |
latest_serial = client.changelog_last_serial() | |
sleep(1) | |
with open('pypi_log_full.csv', 'w') as f: | |
w = csv.writer(f) | |
while serial < latest_serial: | |
sleep(1) | |
for package_alias, version, timestamp, action, serial in client.changelog_since_serial(serial): | |
package = canonicalize_name(package_alias) | |
w.writerow((package, version, timestamp, action, serial)) | |
print(100 * serial / latest_serial) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
def get_piwheels_packages(): | |
"Return a set of all packages in piwheels" | |
url = "https://www.piwheels.org/packages.json" | |
data = requests.get(url).json() | |
return {d[0] for d in data} | |
def get_piwheels_versions(pkg): | |
""" | |
Return a dict of versions of pkg in piwheels with a bool representing | |
whether a version is currently skipped (True) or unskipped (False) in | |
piwheels | |
""" | |
url = f"https://www.piwheels.org/project/{pkg}/json" | |
r = requests.get(url) | |
r.raise_for_status() | |
return { | |
v: info['skip_reason'] == 'binary only' | |
for v, info in r.json()['releases'].items() | |
} | |
def get_pypi_versions(pkg): | |
""" | |
Return a dict of versions of pkg on pypi with a bool representing whether | |
a version should be skipped (True) or unskipped (False) in piwheels | |
according to whether or each version includes an sdist | |
""" | |
url = f"https://pypi.org/pypi/{pkg}/json" | |
r = requests.get(url) | |
r.raise_for_status() | |
versions = r.json()['releases'] | |
return { | |
v: { | |
'released': get_release_date(files), | |
'skipped': not version_has_sdist(files), | |
} | |
for v, files in versions.items() | |
if files | |
} | |
def version_has_sdist(files): | |
"Return True if files contains an sdist" | |
return 'sdist' in {f['packagetype'] for f in files} | |
def get_release_date(files): | |
"Look up the release date for a file and return a datetime string" | |
if files: | |
return files[0]['upload_time'].replace('T', ' ') | |
versions_to_add = {} | |
versions_to_add_skipped = {} | |
versions_to_remove = {} | |
versions_to_skip = {} | |
versions_to_unskip = {} | |
packages_to_remove = set() | |
piwheels_errors = {} | |
pypi_errors = {} | |
packages = get_piwheels_packages() | |
for i, pkg in enumerate(packages): | |
print(f"Checking {pkg}... {100*(i+1) / len(packages):.2f}% [{len(versions_to_add)} {len(versions_to_add_skipped)} {len(versions_to_remove)} {len(versions_to_skip)} {len(versions_to_unskip)}]") | |
# get dicts of versions in piwheels and pypi | |
try: | |
pypi_versions_dict = get_pypi_versions(pkg) | |
except requests.exceptions.HTTPError as exc: | |
if exc.response.status_code == 404: | |
packages_to_remove.add(pkg) | |
else: | |
pypi_errors[pkg] = exc.response.status_code | |
continue | |
except Exception as exc: | |
pypi_errors[pkg] = str(exc) | |
continue | |
try: | |
piwheels_versions_dict = get_piwheels_versions(pkg) | |
except requests.exceptions.HTTPError as exc: | |
piwheels_errors[pkg] = exc.response.status_code | |
continue | |
except Exception as exc: | |
piwheels_errors[pkg] = repr(exc) | |
continue | |
# get sets of versions only to determine which are missing/extra | |
piwheels_versions = set(piwheels_versions_dict) | |
pypi_versions = set(pypi_versions_dict) | |
# deal with versions missing from piwheels - either add skipped or unskipped | |
missing_versions = pypi_versions - piwheels_versions | |
_to_add = { | |
(v, pypi_versions_dict[v]['released']) | |
for v in missing_versions | |
if not pypi_versions_dict[v]['skipped'] | |
} | |
if _to_add: | |
versions_to_add[pkg] = _to_add | |
_to_add_skipped = { | |
(v, pypi_versions_dict[v]['released']) | |
for v in missing_versions | |
if pypi_versions_dict[v]['skipped'] | |
} | |
if _to_add_skipped: | |
versions_to_add_skipped[pkg] = _to_add_skipped | |
# deal with versions that should be removed from piwheels | |
_to_remove = piwheels_versions - pypi_versions | |
if _to_remove: | |
versions_to_remove[pkg] = _to_remove | |
versions_in_both = piwheels_versions & pypi_versions | |
# deal with versions that should be skipped | |
_to_skip = { | |
v | |
for v in versions_in_both | |
if pypi_versions_dict[v]['skipped'] | |
and not piwheels_versions_dict[v] | |
} | |
if _to_skip: | |
versions_to_skip[pkg] = _to_skip | |
# deal with versions that should be unskipped | |
_to_unskip = { | |
v | |
for v in versions_in_both | |
if piwheels_versions_dict[v] | |
and not pypi_versions_dict[v]['skipped'] | |
} | |
if _to_unskip: | |
versions_to_unskip[pkg] = _to_unskip | |
# write out bash scripts and sql scripts to make the neccesary changes | |
with open('versions_to_add.sql', 'w') as f: | |
for pkg, versions in versions_to_add.items(): | |
for v, released in versions: | |
f.write(f"select add_new_package_version('{pkg}', '{v}', '{released}', '');\n") | |
with open('versions_to_add_skipped.sql', 'w') as f: | |
for pkg, versions in versions_to_add_skipped.items(): | |
for v, released in versions: | |
f.write(f"select add_new_package_version('{pkg}', '{v}', '{released}', 'binary only');\n") | |
with open('versions_to_skip.sh', 'w') as f: | |
for pkg, versions in versions_to_skip.items(): | |
for v in versions: | |
f.write(f"piw-remove {pkg} '{v}' --skip 'binary only' -y\n") | |
with open('versions_to_unskip.sql', 'w') as f: | |
for pkg, versions in versions_to_unskip.items(): | |
for v in versions: | |
f.write(f"update versions set skip = '' where package = '{pkg}' and version = '{v}';\n") | |
with open('versions_to_remove.sh', 'w') as f: | |
for pkg, versions in versions_to_remove.items(): | |
for v in versions: | |
f.write(f"piw-remove {pkg} '{v}' -y\n") | |
with open('packages_to_remove.sh', 'w') as f: | |
for pkg in packages_to_remove: | |
f.write(f"piw-remove {pkg} -y\n") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment