Created
December 2, 2013 22:12
-
-
Save stefanor/7760022 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/perl -W | |
use strict; | |
use FileHandle; | |
use Fcntl qw(:DEFAULT O_DIRECT); | |
use POSIX; | |
#use Sys::Mmap; | |
use integer; | |
# we have several drives in the array, in different states each | |
# we keep in in @drv, in no particular order | |
# each drive has the following "members": | |
# slot - number of slot this drive is at, counting from 0 | |
# fh - filehandle | |
# name - /dev/sdX etc (for diagnostics) | |
# good, bad - don't know yet, where bad/good parts are | |
my @drv; | |
my $chunksz; # chunk size in bytes | |
my $drives; # number of drives (including parity) | |
my $raidsz; # whole device size | |
my $dataoff; # data offset within each device | |
my $layout; # data layout, 1 = left-symmetric, 2 = left-assymetric | |
my @need; # areas we need to recover _within_ array, start, len | |
if (0) { # a test array on my system | |
@drv = ( | |
{ slot => 0, name => "sdb2", }, | |
# { slot => 1, name => "sdc2", }, | |
{ slot => 2, name => "sdd2", }, | |
{ slot => 3, name => "sde2", }, | |
{ slot => 4, name => "sdf2", }, | |
); | |
$chunksz = 128*1024; | |
$drives = 5; | |
$raidsz = 4176384*1024; | |
$dataoff = 0*2048*512; | |
$layout = 2; | |
@need = ( [ 0, 2*1024*1024 ], ); | |
} else { | |
@drv = ( | |
{ slot => 0, name => "sda5", }, # this one is old | |
# { slot => 0, name => "sdf5", }, # this one is an attempt to rebuild sda | |
{ slot => 1, name => "sdb5", }, | |
{ slot => 2, name => "sdc5", }, # failed somewhere at the end | |
{ slot => 3, name => "sdd5", }, | |
{ slot => 4, name => "sde5", }, | |
); | |
$chunksz = 512*1024; | |
$drives = 5; | |
$raidsz = 553957376*1024; | |
$dataoff = 2048*512; | |
$layout = 2; | |
@need = ( | |
[ 0, 2 ], # lvm header | |
[ 2, 4096 ], | |
[ 444418, 21504 ], | |
[ 506882, 5120 ], | |
); | |
# convert to bytes | |
foreach my $d (@need) { | |
$d->[0] *= 1024*1024; | |
$d->[1] *= 1024*1024; | |
} | |
@need = ( | |
[ 226369, 1 ], # in stripes | |
[ 226371, 1 ], # in stripes | |
); | |
foreach my $d (@need) { $d->[0] *= $chunksz*($drives-1); } | |
} | |
my $ddrives = $drives - 1; # number of data drives ($drives-1) | |
my $stripsz = $chunksz * $ddrives; | |
my $chunks = $raidsz / $chunksz; | |
my $strips = $chunks / $ddrives; | |
warn "chunks=$chunks strips=$strips data_drives=$ddrives\n"; | |
die "raid size ($raidsz) is not a multiple of stripe size ($chunksz*$ddrives)!\n" | |
if $strips * $ddrives * $chunksz != $raidsz; | |
sub find_drv_for_offset($) { | |
my $off = shift; | |
my $chunkno = $off / $chunksz; | |
my $drive = $chunkno / $ddrives; | |
foreach my $i (0..$drives-1) { | |
if ($drv[$i]->{slot} == $i) { | |
return $i; | |
} | |
} | |
return undef; | |
} | |
# open the devices | |
foreach my $d (@drv) { | |
my $fh = FileHandle->new; | |
sysopen $fh, "/dev/$d->{name}", O_RDONLY #| O_DIRECT | |
or die "unable to open $d->{name}: $!\n"; | |
$d->{fh} = $fh; | |
} | |
my $buf; | |
if (0) { | |
# memalign a buffer | |
open F, "+<", "/dev/null" or die "/dev/null: $!\n"; | |
mmap($buf, $chunksz, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON, F) | |
or die "mmap: $!\n"; | |
close F; | |
} | |
open OUT, "+<", $ARGV[0] or die "unable to open output file $ARGV[0]: $!\n"; | |
foreach my $n (@need) { | |
my $start = $n->[0]; | |
my $len = $n->[1]; | |
my $strip = $start / $stripsz; | |
my $last = ($start + $len + $stripsz - 1) / $stripsz; | |
warn "extracting $start:$len\n"; | |
for(; $strip < $last; ++$strip) { | |
my $off = $strip * $chunksz + $dataoff; | |
warn " strip=$strip off=".($strip*$stripsz)." doff=$off\n"; | |
my @d; # data blocks of the given strip | |
my $n = 0; | |
my @missing; | |
foreach my $d (@drv) { | |
if (defined(sysseek($d->{fh}, $off, SEEK_SET)) && | |
defined(sysread($d->{fh}, $buf, $chunksz))) { | |
$d[$d->{slot}] = $buf; | |
++$n; | |
} | |
else { | |
warn "error reading from $d->{name}:$off: $!\n"; | |
push @missing, $d->{slot}; | |
} | |
} | |
if ($n < $ddrives) { | |
warn " unable to read strip# $strip (bytes ".($strip*$stripsz)."..".(($strip+1)*$stripsz)."), may recover some chunks from this strip in the future\n"; | |
warn "BAD: ".($strip*$stripsz)." ".($stripsz)."\n"; | |
} | |
my $p; | |
if ($n >= $ddrives) { | |
$p = "\0" x $chunksz; # parity | |
# xor all of them together | |
foreach my $i (0..$drives-1) { | |
$p ^= $d[$i] if defined $d[$i]; | |
} | |
# here, we either have full stripe, and parity is all zeros, | |
# or we have incomplete stripe, and $p contains the missing part | |
if ($n == $drives) { # full stripe | |
if ($p ne ("\0" x $chunksz)) { | |
warn " parity for strip=$strip (".($strip*$chunksz)."..".(($strip+1)*$chunksz-1).") is wrong\n"; | |
#XXX add it to bad list? | |
next; | |
} | |
} | |
} | |
sysseek OUT, $strip * $stripsz, 0; | |
# where's the parity block within stripe? | |
if ($layout == 1) { # left assymetric | |
# linux raid5 left-symmetric layout with 5 drives | |
# D0 D1 D2 D3 D4 | |
# d00 | d01 | d02 | d03 |(p00) s00 | |
# d04 | d05 | d06 |(p01)| d07 s01 | |
# d08 | d09 |(p02)| d10 | d11 s02 | |
# d12 |(p03)| d13 | d14 | d15 s03 | |
# (p04)| d16 | d17 | d18 | d19 s04 | |
# d20 | d21 | d22 | d23 |(p05) s05 | |
# ... | |
my $pi = $drives - $strip % $drives - 1; | |
foreach my $i (0..$drives - 1) { | |
next if $i == $pi; | |
syswrite OUT, defined($d[$i]) ? $d[$i] : $p; | |
#XXX handle not fully read stripe! | |
} | |
} | |
elsif ($layout == 2) { # left assymetric | |
# linux raid5 left-assymmetric layout with 5 drives | |
# D0 D1 D2 D3 D4 | |
# d00 | d01 | d02 | d03 |(p00) s00 | |
# d05 | d06 | d07 |(p01)| d04 s01 | |
# d10 | d11 |(p02)| d08 | d09 s02 | |
# d15 |(p03)| d12 | d13 | d14 s03 | |
# (p04)| d16 | d17 | d18 | d19 s04 | |
# d20 | d21 | d22 | d23 |(p05) s05 | |
# ... | |
my $pi = $drives - $strip % $drives - 1; | |
foreach my $i (0..$ddrives - 1) { | |
my $j = ($pi + $i + 1) % $drives; | |
if (defined $d[$j]) { | |
syswrite OUT, $d[$j]; | |
} elsif (defined $p) { | |
syswrite OUT, $p; | |
} | |
else { | |
sysseek OUT, $chunksz, SEEK_CUR; | |
my $k; | |
foreach $k (@drv) { | |
if ($k->{slot} == $j) { | |
$k = $k->{name}; | |
last; | |
} | |
} | |
warn "BAD: ".($strip*$stripsz+$i*$chunksz)." $chunksz (slot=$j $k)\n"; | |
} | |
} | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment