Skip to content

Instantly share code, notes, and snippets.

@jimfulton
Created September 25, 2016 14:06
Show Gist options
  • Save jimfulton/1ed5e0a655647514465af2f9517333ac to your computer and use it in GitHub Desktop.
Save jimfulton/1ed5e0a655647514465af2f9517333ac to your computer and use it in GitHub Desktop.
Rust script for iterating over a ZODB file storage file using a single iterator yielding an enum type
/// Summarize a file-storage 1 formatted file, mainly as a way of
/// gaging rust performance. This is more or less eqivalent to the
/// following Python/ZODB script:
///
/// import ZODB.FileStorage
/// import sys
///
/// def main():
/// it = ZODB.FileStorage.FileIterator(sys.argv[1])
/// transactions = records = 0
/// for transaction in it:
/// transactions += 1
/// for record in transaction:
/// records += 1
///
/// print(transactions, records)
///
/// if __name__ == '__main__':
/// main()
///
/// Except that rather than providing nested iterators, a single
/// iterator of both transactions and data records is provided.
extern crate byteorder;
use byteorder::{BigEndian, ReadBytesExt};
use std::fs::File;
use std::io;
use std::io::BufReader;
use std::io::prelude::*;
macro_rules! io_assert {
($cond: expr, $msg: expr ) => (
if ! ($cond) {
return Err(io::Error::new(io::ErrorKind::Other, $msg))
}
)
}
static MAGIC2: [u8; 4] = [70, 83, 50, 49]; // b'FS21'
static MAGIC3: [u8; 4] = [70, 83, 51, 48]; // b'FS30'
const CHECKPOINT_STATUS: u8 = 99; // b'c'
type Tid = [u8; 8];
type Oid = [u8; 8];
struct TransactionRecord {
pos: u64,
id: Tid,
length: u64,
status: u8,
user: Vec<u8>,
desc: Vec<u8>,
ext: Vec<u8>,
}
const TRANSACTIONHEADERSIZE: u64 = 23;
impl TransactionRecord {
fn read(reader: &mut io::Read, pos: u64)
-> io::Result<Option<TransactionRecord>> {
let mut id = [0; 8];
let read = try!(reader.read(&mut id));
if read == 0 {
Ok(None) // EOF
}
else {
if read != 8 { // because BufReader
try!(reader.read_exact(&mut id[read..]));
}
let length = try!(reader.read_u64::<BigEndian>());
let mut status = [0];
try!(reader.read_exact(&mut status));
let luser = try!(reader.read_u16::<BigEndian>());
let ldesc = try!(reader.read_u16::<BigEndian>());
let lext = try!(reader.read_u16::<BigEndian>());
let mut r = TransactionRecord {
pos: pos, id: id, length: length, status: status[0],
user: vec![0u8; luser as usize],
desc: vec![0u8; ldesc as usize],
ext: vec![0u8; lext as usize],
};
try!(reader.read_exact(&mut r.user));
try!(reader.read_exact(&mut r.desc));
try!(reader.read_exact(&mut r.ext));
Ok(Some(r))
}
}
}
pub struct DataRecord {
pos: u64,
oid: Oid,
tid: Tid,
prev: u64,
tloc: u64,
lver: u16,
data: Vec<u8>,
}
const DATAHEADERSIZE: u64 = 42;
impl DataRecord {
fn read(reader: &mut io::Read, pos: u64) -> io::Result<DataRecord> {
let mut oid = [0; 8];
let mut tid = [0; 8];
try!(reader.read_exact(&mut oid));
try!(reader.read_exact(&mut tid));
let prev = try!(reader.read_u64::<BigEndian>());
let tloc = try!(reader.read_u64::<BigEndian>());
io_assert!(try!(reader.read_u16::<BigEndian>()) == 0, "lver non-zero");
let mut dlen = try!(reader.read_u64::<BigEndian>());
if dlen == 0 {
dlen = 8; // backpointer
}
let mut r = DataRecord {
pos: pos, oid: oid, tid: tid, prev: prev, tloc: tloc, lver: 0,
data: vec![0u8; dlen as usize],
};
try!(reader.read_exact(&mut r.data));
Ok(r)
}
}
enum Record {
Transaction(TransactionRecord),
Data(DataRecord),
}
struct Iterator {
reader: BufReader<File>,
pos: u64,
tlen: u64,
tend: u64,
}
impl Iterator {
fn open(name: &str) -> io::Result<Iterator> {
let mut reader = BufReader::new(try!(File::open(name)));
try!(Iterator::check_magic(&mut reader));
Ok(Iterator { reader: reader, pos: 4, tlen: 0, tend: 4 })
}
fn check_magic(reader: &mut io::Read) -> io::Result<()> {
let mut magic = [0u8; 4];
try!(reader.read_exact(&mut magic));
io_assert!(magic == MAGIC3 || magic == MAGIC2, "Bad file magic");
Ok(())
}
fn check_tlen(&mut self) -> io::Result<()> {
io_assert!(try!(self.reader.read_u64::<BigEndian>()) == self.tlen,
"Redundant length mismatch");
Ok(())
}
}
impl std::iter::Iterator for Iterator {
type Item = io::Result<Record>;
fn next(&mut self) -> Option<io::Result<Record>> {
if self.pos == self.tend {
if self.tlen > 0 {
if let Err(e) = self.check_tlen() {
return Some(Err(e))
}
self.pos += 8
}
match TransactionRecord::read(&mut self.reader, self.pos) {
Ok(Some(t)) => {
self.tend = self.pos + t.length;
self.tlen = t.length;
self.pos += TRANSACTIONHEADERSIZE +
t.user.len() as u64 +
t.desc.len() as u64 +
t.ext.len() as u64;
Some(Ok(Record::Transaction(t)))
},
Err(e) => Some(Err(e)),
_ => None,
}
}
else {
match DataRecord::read(&mut self.reader, self.pos) {
Ok(d) => {
self.pos += DATAHEADERSIZE + d.data.len() as u64;
Some(Ok(Record::Data(d)))
},
Err(e) => Some(Err(e)),
}
}
}
}
#[derive(Debug)]
struct DBStats {
transactions: i32,
records: i32,
}
fn main() {
let args: Vec<String> = std::env::args().collect();
let mut fi = Iterator::open(&args[1]).unwrap();
let mut stats = DBStats {transactions: 0, records: 0};
for tr in Iterator::open(&args[1]).unwrap() {
match tr {
Ok(Record::Transaction(t)) => {
stats.transactions += 1;
},
Ok(Record::Data(d)) => {
stats.records += 1;
},
Err(error) => {
print!("WTF? {}\n", error);
break;
},
}
}
print!("{:?}\n", stats);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment