Created
April 16, 2015 10:33
-
-
Save dakkar/cad4c546c519c6317526 to your computer and use it in GitHub Desktop.
Gentoo: find all files Portage can't re-create for you
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env perl | |
use strict; | |
use warnings; | |
use 5.020; | |
use autodie; | |
use Try::Tiny; | |
use File::Find::Rule; | |
use File::stat; | |
use Digest::MD5; | |
=head1 NAME | |
portage-scan | |
=head1 DESCRIPTION | |
You have an old Gentoo machine. You bought a new one. You want to copy | |
your system from the old one to the new one, but you have to recompile | |
all packages. You copy over F</var/lib/portage/world> and use it to | |
re-build all packages. What I<else> do you need to copy over? This | |
script will tell you. | |
It loads all the information about Portage-owned files, then scans the | |
whole filesystem, printing: | |
=over 4 | |
=item * | |
items that Portage does not know about (C<O>rphans) | |
=item * | |
items that are a different C<K>ind (directory / file / symlink) than | |
what Portage expects | |
=item * | |
items that have a different modification C<T>ime than what Portage | |
recorded | |
=item * | |
simC<L>ynks to a different target than what Portage recorded | |
=item * | |
files that have a different MD C<5> digest than the one Portage recorded | |
=back | |
Essentially, everything printed by this script should be copied over, | |
or at least checked. | |
=head1 IGNORED DIRECTORIES | |
You know you have to copy over F</home>, and that you should not touch | |
F</sys>, F</dev>, F</tmp> &c. So those are ignored. See the | |
C<$prune_rx> variable to configure this. | |
=head1 KNOWN ISSUES | |
Symlinks generated by C<gcc-config>, C<eselect> &c show up as orphaned. | |
C</lib/> and C</lib64/> appear confused in Portage's database: this | |
script guesses that if a file whose name starts with C</lib64/> looks | |
orphaned, maybe it could be known to Portage as residing under | |
C</lib/>. | |
=cut | |
sub load_portage_data { | |
my %data; | |
my $r = File::Find::Rule->file->name('CONTENTS')->start('/var/db/pkg'); | |
while (defined(my $fn = $r->match)) { | |
open my $fh,'<',$fn; | |
while (defined(my $line=<$fh>)) { | |
chomp $line; | |
# this mess with different regexps, instead of just | |
# splitting on spaces, is to make sure we can deal with | |
# files containing spaces in their names | |
if ($line =~ /^dir /) { | |
my ($path) = $line =~ m{\A dir [ ] (.+)\z}x; | |
$data{$path}={kind=>'dir'}; | |
} | |
elsif ($line =~ /^obj /) { | |
my ($path,$md5,$mtime) = $line =~ | |
m{\A obj [ ] (.+) [ ] ([0-9a-f]+) [ ] ([0-9]+)\z}x; | |
$data{$path} = { | |
kind => 'obj', | |
md5 => $md5, | |
mtime => $mtime, | |
}; | |
} | |
elsif ($line =~ /^sym /) { | |
my ($path,$dest,$mtime) = $line =~ | |
m{\A sym [ ] (.+) [ ]->[ ] (.+) [ ] ([0-9]+)\z}x; | |
$data{$path} = { | |
kind => 'sym', | |
dest => $dest, | |
mtime => $mtime, | |
}; | |
} | |
else { | |
warn "WTF? <$line> in $fn\n"; | |
} | |
} | |
} | |
return \%data; | |
} | |
sub run_rule_and_compare { | |
my ($rule,$data) = @_; | |
while (defined(my $fn = $rule->match)) { | |
my $file_info = $data->{$fn} || $data->{ $fn =~ s{^/lib64/}{/lib/}r }; | |
if (!$file_info) { | |
say "O $fn"; | |
next; | |
} | |
my $fs = lstat($fn); | |
my $kind = -l $fs ? 'sym' : -d $fs ? 'dir' : 'obj'; | |
if ($kind ne $file_info->{kind}) { | |
say "K $fn"; | |
next; | |
} | |
next if $kind eq 'dir'; # nothing else to check for directories | |
my $mtime = $fs->mtime; | |
my $errs=''; | |
if ($mtime != $file_info->{mtime}) { | |
$errs .= 'T'; | |
} | |
if ($kind eq 'sym') { | |
my $dest = readlink($fn); | |
if ($dest ne $file_info->{dest}) { | |
$errs .= 'L'; | |
} | |
} | |
else { | |
my $d = Digest::MD5->new; | |
try { | |
open my $fh,'<:raw',$fn; | |
$d->addfile($fh); | |
my $md5 = $d->hexdigest; | |
if ($md5 ne $file_info->{md5}) { | |
$errs .= '5'; | |
} | |
} | |
catch { $errs .= '*' }; | |
} | |
say "$errs $fn" if $errs; | |
} | |
} | |
# directories matching this regex will be ignored | |
my $prune_rx = qr{\A / (?:home/|net/|dev/|sys/|proc/|run/|tmp/|var/(?:db/pkg/|cache/|spool/|run/|log/)|usr/(?:src/linux/|portage/)) }x; | |
my $data = load_portage_data; | |
warn "loaded\n"; | |
my $rule = File::Find::Rule->new; | |
$rule->or( | |
$rule->new->exec(sub{$_[2] =~ /$prune_rx/})->prune->discard, | |
$rule->new, | |
); | |
run_rule_and_compare($rule->start('/'),$data); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment