Skip to content

Instantly share code, notes, and snippets.

@jimfulton
Last active September 25, 2016 14:05
Show Gist options
  • Save jimfulton/0e61d1b5da0bccf8282a58a9a63aedfd to your computer and use it in GitHub Desktop.
Save jimfulton/0e61d1b5da0bccf8282a58a9a63aedfd to your computer and use it in GitHub Desktop.
Rust script for iterating over a ZODB file storage file using nested iterators.
/// Summarize a file-storage 1 formatted file, mainly as a way of
/// gaging rust performance. This is more or less eqivalent to the
/// following Python/ZODB script:
///
/// import ZODB.FileStorage
/// import sys
///
/// def main():
/// it = ZODB.FileStorage.FileIterator(sys.argv[1])
/// transactions = records = 0
/// for transaction in it:
/// transactions += 1
/// for record in transaction:
/// records += 1
///
/// print(transactions, records)
///
/// if __name__ == '__main__':
/// main()
extern crate byteorder;
use byteorder::{BigEndian, ReadBytesExt};
use std::cell::RefCell;
use std::fs::File;
use std::io;
use std::io::BufReader;
use std::io::prelude::*;
use std::rc::Rc;
macro_rules! io_assert {
($cond: expr, $msg: expr ) => (
if ! ($cond) {
return Err(io::Error::new(io::ErrorKind::Other, $msg))
}
)
}
static MAGIC2: [u8; 4] = [70, 83, 50, 49]; // b'FS21'
static MAGIC3: [u8; 4] = [70, 83, 51, 48]; // b'FS30'
const CHECKPOINT_STATUS: u8 = 99; // b'c'
type Tid = [u8; 8];
type Oid = [u8; 8];
type Reader = Rc<RefCell<BufReader<File>>>;
struct Transaction {
reader: Reader,
pos: u64,
offset: u64,
id: Tid,
length: u64,
status: u8,
user: Vec<u8>,
desc: Vec<u8>,
ext: Vec<u8>,
}
const TRANSACTIONHEADERSIZE: u64 = 23;
impl Transaction {
fn read(reader_: Reader, pos: u64) -> io::Result<Option<Transaction>> {
let mut reader = reader_.borrow_mut();
let mut id = [0; 8];
let read = try!(reader.read(&mut id));
if read == 0 {
Ok(None)
}
else {
if read != 8 {
try!(reader.read_exact(&mut id[read..]));
}
let length = try!(reader.read_u64::<BigEndian>());
let mut status = [0];
try!(reader.read_exact(&mut status));
if status[0] == CHECKPOINT_STATUS {
return Ok(None);
}
let luser = try!(reader.read_u16::<BigEndian>());
let ldesc = try!(reader.read_u16::<BigEndian>());
let lext = try!(reader.read_u16::<BigEndian>());
let mut t = Transaction {
reader: reader_.clone(), pos: pos,
offset: TRANSACTIONHEADERSIZE +
luser as u64 + ldesc as u64 + lext as u64,
id: id, length: length, status: status[0],
user: vec![0u8; luser as usize],
desc: vec![0u8; ldesc as usize],
ext: vec![0u8; lext as usize],
};
try!(reader.read_exact(&mut t.user));
try!(reader.read_exact(&mut t.desc));
try!(reader.read_exact(&mut t.ext));
Ok(Some(t))
}
}
fn check_length(&mut self) -> io::Result<()> {
let mut reader = self.reader.borrow_mut();
io_assert!(try!(reader.read_u64::<BigEndian>()) == self.length,
"Redundant length mismatch");
Ok(())
}
}
impl std::iter::Iterator for Transaction {
type Item = io::Result<DataRecord>;
fn next(&mut self) -> Option<io::Result<DataRecord>> {
if self.offset == self.length {
match self.check_length() {
Ok(_) => None,
r => Some(Err(r.unwrap_err())),
}
}
else {
let mut reader = self.reader.borrow_mut();
let r = DataRecord::read(&mut *reader, self.pos + self.offset);
if let Ok(ref d) = r {
self.offset += d.len();
}
Some(r)
}
}
}
pub struct DataRecord {
pos: u64,
oid: Oid,
tid: Tid,
prev: u64,
tloc: u64,
lver: u16,
dlen: u64,
data: Vec<u8>, // deleted data uses empty, not None
}
const DATAHEADERSIZE: u64 = 42;
impl DataRecord {
fn read(mut reader: &mut BufReader<File>, pos: u64)
-> io::Result<DataRecord> {
let mut oid = [0; 8];
let mut tid = [0; 8];
try!(reader.read_exact(&mut oid));
try!(reader.read_exact(&mut tid));
let prev = try!(reader.read_u64::<BigEndian>());
let tloc = try!(reader.read_u64::<BigEndian>());
io_assert!(try!(reader.read_u16::<BigEndian>()) == 0, "lver non-zero");
let mut dlen = try!(reader.read_u64::<BigEndian>());
let data = if dlen == 0 {
dlen = 8;
let back_pointer = try!(reader.read_u64::<BigEndian>());
if back_pointer > 0 {
// Get data from earlier record because Undo
try!(reader.seek(std::io::SeekFrom::Start(back_pointer)));
let data = try!(
DataRecord::read(&mut reader, back_pointer)).data;
try!(reader.seek(std::io::SeekFrom::Start(
pos + DATAHEADERSIZE + 8)));
data
}
else {
vec![0u8; 0] // Deleted (or undone created)
}
}
else { // Normal case
let mut data = vec![0u8; dlen as usize];
try!(reader.read_exact(&mut data));
data
};
Ok(DataRecord {
pos: pos, oid: oid, tid: tid, prev: prev, tloc: tloc, lver: 0,
dlen: dlen, data: data,
})
}
fn len(&self) -> u64 { DATAHEADERSIZE + self.dlen}
}
struct FileIterator {
reader: Reader,
pos: u64,
}
impl FileIterator {
fn open(name: &str) -> io::Result<FileIterator> {
let mut reader = BufReader::new(try!(File::open(name)));
try!(FileIterator::check_magic(&mut reader));
Ok(FileIterator { reader: Rc::new(RefCell::new(reader)), pos: 4 })
}
fn check_magic(reader: &mut io::Read) -> io::Result<()> {
let mut magic = [0u8; 4];
try!(reader.read_exact(&mut magic));
io_assert!(magic == MAGIC3 || magic == MAGIC2, "Bad file magic");
Ok(())
}
}
impl std::iter::Iterator for FileIterator {
type Item = io::Result<Transaction>;
fn next(&mut self) -> Option<io::Result<Transaction>> {
match Transaction::read(self.reader.clone(), self.pos) {
Ok(Some(t)) => {
self.pos += t.length + 8;
Some(Ok(t))
},
Err(e) => Some(Err(e)),
_ => None
}
}
}
#[derive(Debug)]
struct DBStats {
transactions: i32,
records: i32,
}
fn main() {
let args: Vec<String> = std::env::args().collect();
let fi = FileIterator::open(&args[1]).unwrap();
let mut stats = DBStats {transactions: 0, records: 0};
'outer: for tr in fi {
match tr {
Ok(transaction) => {
stats.transactions += 1;
for rr in transaction {
match rr {
Ok(_) => stats.records += 1,
Err(error) => {
print!("WTF? {}\n", error);
break 'outer;
}
}
}
},
Err(error) => {
print!("WTF? {}\n", error);
break;
},
}
}
print!("{:?}\n", stats);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment