Created
June 7, 2019 13:54
-
-
Save barthalion/644b1314c15362809411d39c11098101 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/python3 -u | |
# -*- mode: Python; coding: utf-8 -*- | |
# Fix issues with missing OSTree objects | |
# | |
# Copyright (C) 2017 Endless Mobile, Inc. | |
# | |
# This program is free software; you can redistribute it and/or modify | |
# it under the terms of the GNU General Public License as published by | |
# the Free Software Foundation; either version 2 of the License, or | |
# (at your option) any later version. | |
# | |
# This program is distributed in the hope that it will be useful, | |
# but WITHOUT ANY WARRANTY; without even the implied warranty of | |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | |
# GNU General Public License for more details. | |
# | |
# You should have received a copy of the GNU General Public License along | |
# with this program; if not, write to the Free Software Foundation, Inc., | |
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. | |
"""Fix issues with OSTree missing objects | |
When OSTree repository objects have been inadvertently deleted, it can | |
cause two types of problems (among others): | |
1. If the deleted object is part of a commit, then the commit is now | |
partial, but OSTree doesn't know that unless a commitpartial file | |
exists. Without that, it will assume the commit is fully intact and use | |
it as the source for a static delta. | |
2. If the deleted object is a commit, then any references to it will be | |
dangling. This will cause errors since OSTree assumes that a referenced | |
commit will exist and will raise errors as soon as it tries to be used. | |
This script attempts to address these 2 issues by repulling the commits | |
for any dangling references and marking any commits with missing objects | |
as partial. | |
To guard against another program operating on the repository, all | |
processes that have the repository open are killed. | |
Some commands for testing this script when hacking on it (from a throwaway | |
working system): | |
``` | |
for objtype in commit dirtree dirmeta file; do | |
sudo find /ostree/repo/objects -type f -name "*.${objtype}" -print -delete -quit | |
done | |
sudo eos-fix-ostree-repo | |
sudo ostree fsck | |
``` | |
Make sure to test on normal and split-disk systems. | |
""" | |
from argparse import ArgumentParser | |
from fnmatch import fnmatch | |
import gi | |
gi.require_version('OSTree', '1.0') | |
from gi.repository import GLib, Gio, OSTree | |
import os | |
import pwd | |
import signal | |
import stat | |
import sys | |
import time | |
# Older OSTree versions had the GI annotation wrong, and the enum | |
# value was OSTree.RepoCommitState.REPO_COMMIT_STATE_PARTIAL while | |
# recent versions have OSTree.RepoCommitState.PARTIAL [1]. | |
# | |
# So we try to load OSTree.RepoCommitState.PARTIAL and, if that fails, | |
# override OSTree.RepoCommitState.PARTIAL to be the old enum value. | |
# | |
# [1] See https://github.com/ostreedev/ostree/pull/1335 | |
if 'PARTIAL' not in dir(OSTree.RepoCommitState): | |
OSTree.RepoCommitState.PARTIAL = OSTree.RepoCommitState.REPO_COMMIT_STATE_PARTIAL | |
# Prior to ostree-2017.1, the GI annotation for ostree_repo_list_objects | |
# was wrong, making it unusable from bindings[1]. This is tricky to | |
# detect since the version checking was not added until ostree-2017.3. | |
# However, in the same commit, | |
# ostree_repo_list_commit_objects_starting_with was fixed so that the | |
# out_commits parameter was marked properly. | |
# | |
# Use the direction of this argument as a proxy to know when | |
# ostree_repo_list_objects will work. Otherwise, make our own | |
# list_objects implementation and monkey-patch it into the Repo class. | |
# | |
# 1. https://github.com/ostreedev/ostree/commit/300752e5 | |
_func = OSTree.Repo.list_commit_objects_starting_with | |
if _func.get_arguments()[1].get_direction() == gi._gi.Direction.IN: | |
import glob | |
# Fake ostree_repo_list_objects. See | |
# https://github.com/ostreedev/ostree/blob/master/src/libostree/ostree-repo.c | |
# for the real implementation. | |
def _list_objects(self, flags, cancellable=None): | |
objects = {} | |
repo_path = self.get_path().get_path() | |
repo_mode = self.get_mode() | |
# Objects live in the objects directory split after the 2nd | |
# character of their sha256sum. E.g., | |
# 8d/2925839245dd91ac9fbfcc0e7a383cddf5145bd7c5bc5de0d46929a3fa5963.file. | |
objdir_pattern = repo_path + '/objects/[a-f0-9][a-f0-9]' | |
for objdir in glob.iglob(objdir_pattern): | |
for entry in os.listdir(objdir): | |
name, ext = os.path.splitext(entry) | |
if len(name) != 62: | |
# Not a partial sha256 | |
continue | |
if ext == '': | |
continue | |
elif (ext == '.filez' and | |
repo_mode == OSTree.RepoMode.ARCHIVE_Z2): | |
objtype = OSTree.ObjectType.FILE | |
elif (ext == '.file' and | |
repo_mode != OSTree.RepoMode.ARCHIVE_Z2): | |
objtype = OSTree.ObjectType.FILE | |
elif ext == '.dirtree': | |
objtype = OSTree.ObjectType.DIR_TREE | |
elif ext == '.dirmeta': | |
objtype = OSTree.ObjectType.DIR_META | |
elif ext == '.commit': | |
objtype = OSTree.ObjectType.COMMIT | |
else: | |
continue | |
# Insert the object. The key is the serialized object | |
# name and the value is always the same (bas) variant (I | |
# think packed objects were supposed to put something | |
# here, but only loose objects ever exist). | |
checksum = os.path.basename(objdir) + name | |
key = OSTree.object_name_serialize(checksum, objtype) | |
value = GLib.Variant.new_tuple( | |
GLib.Variant('b', True), | |
GLib.Variant('as', []) | |
) | |
objects[key] = value | |
return True, objects | |
# Override the standard list_objects | |
OSTree.Repo.list_objects = _list_objects | |
def kill_repo_procs(repo_path, sig): | |
"""Kill all processes with repo open | |
Walk /proc to find any process with the repo directory open and kill | |
them with signal sig. | |
""" | |
print('Killing processes with', repo_path, 'open with signal', sig) | |
self_pid = os.getpid() | |
for pid in os.listdir('/proc'): | |
if not pid.isnumeric(): | |
continue | |
if int(pid) == self_pid: | |
continue | |
# The process may have exited | |
try: | |
proc_fds = os.listdir(os.path.join('/proc', pid, 'fd')) | |
except FileNotFoundError: | |
continue | |
for fd in proc_fds: | |
# The process may have exited or the file may have been closed | |
try: | |
fd_path = os.readlink(os.path.join('/proc', pid, 'fd', fd)) | |
except FileNotFoundError: | |
continue | |
# If the open file is the repo or a path within the repo, | |
# kill the process | |
if fd_path == repo_path or fd_path.startswith(repo_path + '/'): | |
# Try to read the exe file for information, but in some | |
# cases (kernel thread), it may not exist | |
try: | |
pid_exe = os.readlink(os.path.join('/proc', pid, 'exe')) | |
except: | |
pid_exe = '' | |
# Kill it and go to the next process | |
print('Killing pid', pid, pid_exe, 'with signal', sig) | |
os.kill(int(pid), sig) | |
break | |
def pull_commit(repo, remote, checksum, full=False): | |
"""Pull commit from remote | |
When full is False, only the commit metadata will be pulled. | |
""" | |
if full: | |
flags = OSTree.RepoPullFlags.NONE | |
else: | |
flags = OSTree.RepoPullFlags.COMMIT_ONLY | |
opts = GLib.Variant('a{sv}', { | |
'flags': GLib.Variant('i', flags), | |
'refs': GLib.Variant('as', (checksum,)), | |
'depth': GLib.Variant('i', 0), | |
}) | |
# FIXME: For some reason, pull_with_options cannot be stopped with | |
# ^C from the keyboard (SIGINT). This could be a problem in ostree | |
# or pygobject, but I suspect it has something to do with what pull | |
# does with the main context. | |
progress = OSTree.AsyncProgress.new() | |
progress.connect('changed', | |
OSTree.Repo.pull_default_console_progress_changed, | |
None) | |
repo.pull_with_options(remote, opts, progress) | |
progress.finish() | |
def fix_dangling_refs(repo): | |
"""Update repo refs where the commit is missing | |
This does a commit metadata only pull so the refs are valid again. | |
""" | |
repo_path = os.path.realpath(repo.get_path().get_path()) | |
print('Fixing refs pointing to missing commits in', repo_path) | |
_, all_refs = repo.list_refs() | |
for refspec, checksum in all_refs.items(): | |
try: | |
repo.load_commit(checksum) | |
except GLib.Error as err: | |
if not err.matches(Gio.io_error_quark(), | |
Gio.IOErrorEnum.NOT_FOUND): | |
raise | |
# Try to pull the commit metadata again. | |
_, remote, ref = OSTree.parse_refspec(refspec) | |
if remote is None: | |
# If there's no remote, assume it's an ostree ref and | |
# use "eos" as the remote. | |
print('No remote for ref', ref, 'assuming "eos"') | |
remote = 'eos' | |
print('Pulling', checksum, 'commit metadata from', remote, | |
'for', ref) | |
pull_commit(repo, remote, checksum) | |
def mark_commits_partial(repo): | |
"""Mark commits with missing objects as partial""" | |
repo_path = os.path.realpath(repo.get_path().get_path()) | |
print('Marking commits with missing objects as partial in', repo_path) | |
_, all_objects = repo.list_objects(OSTree.RepoListObjectsFlags.ALL, None) | |
for objname in all_objects: | |
checksum, objtype = OSTree.object_name_deserialize(objname) | |
if objtype != OSTree.ObjectType.COMMIT: | |
continue | |
_, commit, state = repo.load_commit(checksum) | |
if state == OSTree.RepoCommitState.PARTIAL: | |
print('Commit', checksum, 'already marked as partial') | |
continue | |
mark_partial = False | |
try: | |
# If a dirtree is missing, traverse_commit will fail with | |
# G_IO_ERROR_NOT_FOUND. | |
_, reachable_objects = repo.traverse_commit(checksum, 0) | |
# Unfortunately, it doesn't check that the leaves (dirmeta | |
# and files) exist, so we need to do that manually. In case | |
# that behavior ever changes, just check that all the | |
# reachable objects exist. | |
# | |
# https://github.com/ostreedev/ostree/issues/1222 | |
for commit_obj in reachable_objects: | |
if commit_obj not in all_objects: | |
mark_partial = True | |
break | |
except GLib.Error as err: | |
if not err.matches(Gio.io_error_quark(), | |
Gio.IOErrorEnum.NOT_FOUND): | |
raise | |
mark_partial = True | |
if mark_partial: | |
print('Marking commit', checksum, 'as partial') | |
commit_partial_path = os.path.join(repo_path, 'state', | |
checksum + '.commitpartial') | |
with open(commit_partial_path, 'w'): | |
pass | |
def pull_partial_refs(repo): | |
"""Try to fully restore any partial referenced commits""" | |
# Look for any partial refs and re-pull them. | |
_, all_refs = repo.list_refs() | |
for refspec, checksum in all_refs.items(): | |
_, remote, ref = OSTree.parse_refspec(refspec) | |
if remote is None: | |
# Don't bother pulling local refs. Only the ostree deploys | |
# are local, and as long as they're marked partial, they can | |
# be updated later. | |
continue | |
# If this is an app or runtime locale, it's intentionally | |
# partial since only the relevant subpaths are pulled. Skip it | |
# to not use up extra bandwidth and disk space. | |
if fnmatch(ref, '*/*.Locale/*/*'): | |
print('Skipping intentionally partial Locale commit', | |
refspec, checksum) | |
continue | |
_, commit, state = repo.load_commit(checksum) | |
if state != OSTree.RepoCommitState.PARTIAL: | |
continue | |
# Try to pull the full commit again. | |
print('Pulling', checksum, 'commit from', remote, 'for', ref) | |
pull_commit(repo, remote, checksum, full=True) | |
def main(): | |
aparser = ArgumentParser( | |
description='Fix broken OSTree repo' | |
) | |
path_group = aparser.add_mutually_exclusive_group() | |
path_group.add_argument('--sysroot', help='path to OSTree sysroot') | |
path_group.add_argument('--repo', help='path to OSTree repo') | |
args = aparser.parse_args() | |
print('WARNING: Do not start App Center while this is running') | |
if args.repo is not None: | |
# Use a repo directly instead of getting it from the sysroot | |
sysroot = None | |
repo_file = Gio.File.new_for_path(args.repo) | |
repo = OSTree.Repo.new(repo_file) | |
repo.open() | |
else: | |
# Get the repo from the sysroot | |
if args.sysroot is None: | |
sysroot_file = None | |
else: | |
sysroot_file = Gio.File.new_for_path(args.sysroot) | |
sysroot = OSTree.Sysroot.new(sysroot_file) | |
sysroot.load() | |
_, repo = sysroot.get_repo() | |
# Resolve the full repo path | |
repo_path = os.path.realpath(repo.get_path().get_path()) | |
# Kill once with SIGTERM, then with SIGKILL | |
kill_repo_procs(repo_path, signal.SIGTERM) | |
time.sleep(1) | |
kill_repo_procs(repo_path, signal.SIGKILL) | |
# Now lock the sysroot if one is in use | |
if sysroot is not None and not sysroot.try_lock(): | |
print('Could not lock sysroot', sysroot.get_path().get_path(), | |
file=sys.stderr) | |
sys.exit(1) | |
# In older OSTree, cleaning up after a transaction (e.g., a pull) | |
# would delete the tmp/cache directory if it was older than 1 day. | |
# That's a problem because it has an open fd for that directory. | |
# Update the directory's mtime to current. This is racy because | |
# other repo users may have deleted the directory after we opened | |
# the repo and before they were killed, so just fail if the | |
# directory doesn't exist. | |
cache_dir = os.path.join(repo_path, 'tmp', 'cache') | |
try: | |
os.utime(cache_dir) | |
except FileNotFoundError: | |
print(cache_dir, 'does not exist - run', sys.argv[0], 'again!', | |
file=sys.stderr) | |
sys.exit(1) | |
# First, fix dangling refs so that refs can be reliably listed again | |
fix_dangling_refs(repo) | |
# Next, traverse all commits to mark any as partial | |
mark_commits_partial(repo) | |
# Finally, try to completely pull in any partial referenced commits | |
# so there are no longer any missing objects | |
pull_partial_refs(repo) | |
print('\nSuccess! Try to update the OS and Apps now.') | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment