Skip to content

Instantly share code, notes, and snippets.

@josephg
Created September 10, 2021 14:32
Show Gist options
  • Save josephg/8c149484a1cac3473bc3d4d2fa3867a2 to your computer and use it in GitHub Desktop.
Save josephg/8c149484a1cac3473bc3d4d2fa3867a2 to your computer and use it in GitHub Desktop.
positional update file format experiments
#[cfg(test)]
mod tests{
// *** Mad science ***
#[test]
fn positional_updates_1() {
// This variant leans on ins_del_runs to just use a single stream of insert and delete positions.
#[derive(Debug, Eq, PartialEq, Clone, Copy)]
struct Run3 {
diff: i32, // From previous item
len: u32,
start: u32,
end: u32,
}
impl SplitableSpan for Run3 {
fn len(&self) -> usize { self.len as usize }
fn truncate(&mut self, at: usize) -> Self { unimplemented!() }
fn can_append(&self, other: &Self) -> bool {
// dbg!(self, other);
assert_eq!(other.start == self.end, other.diff == 0);
other.start == self.end
}
fn append(&mut self, other: Self) {
self.len += other.len;
self.end = other.end;
}
fn prepend(&mut self, other: Self) {
self.diff = other.diff;
self.len += other.len;
self.start = other.start;
}
}
let data = load_testing_data("benchmark_data/automerge-paper.json.gz");
// let data = load_testing_data("benchmark_data/blogpost.json.gz");
// let data = load_testing_data("benchmark_data/rustcode.json.gz");
let mut i = 0;
let mut w = SpanWriter::new(|into, val: Run3| {
// if i < 100 { dbg!((val.diff, val.len)); }
// if i < 100 { dbg!(val); }
// i += 1;
if val.diff == -1 && val.len == 1 { return; }
let mut dest = [0u8; 20];
let mut pos = 0;
pos += encode_i64_with_extra_bit(val.diff as i64, val.len != 1, &mut dest[..]);
if val.len != 1 {
pos += encode_u32(val.len, &mut dest[pos..]);
}
into.extend_from_slice(&dest[..pos]);
});
let mut ins_del_runs = SpanWriter::new_with_val(0, |vec: &mut Vec<u8>, len: i32| {
// The lengths alternate back and forth each operation.
push_u32(vec, len.abs() as u32);
});
let mut last: u32 = 0;
for TestPatch(pos, del, ins) in data.txns.iter().flat_map(|txn| txn.patches.iter()) {
// dbg!(patch);
if *del > 0 {
w.push(Run3 {
diff: i32::wrapping_sub(*pos as i32,last as i32),
len: *del as _,
start: *pos as u32,
end: *pos as u32
});
ins_del_runs.push(-(*del as i32));
last = *pos as u32;
}
if !ins.is_empty() {
let count = ins.chars().count() as u32;
w.push(Run3 {
diff: i32::wrapping_sub(*pos as i32,last as i32),
len: count,
start: *pos as u32,
end: *pos as u32 + count
});
ins_del_runs.push(count as i32);
last = *pos as u32 + count;
}
}
dbg!(w.count);
let data = w.flush_into_inner();
dbg!(data.len());
let ins_del_data = ins_del_runs.flush_into_inner();
dbg!(ins_del_data.len());
dbg!(data.len() + ins_del_data.len());
}
#[test]
fn positional_updates_2() {
// This time not merging deletes and inserts. Each entry is either an insert or a delete.
#[derive(Debug, Eq, PartialEq, Clone, Copy)]
struct Run3 {
diff: i32, // From previous item
len: u32,
is_delete: bool,
}
impl SplitableSpan for Run3 {
fn len(&self) -> usize { self.len as usize }
fn truncate(&mut self, at: usize) -> Self { unimplemented!() }
fn can_append(&self, other: &Self) -> bool {
self.is_delete == other.is_delete && other.diff == 0
}
fn append(&mut self, other: Self) {
self.len += other.len;
}
fn prepend(&mut self, other: Self) { unimplemented!(); }
}
let data = load_testing_data("benchmark_data/automerge-paper.json.gz");
// let data = load_testing_data("benchmark_data/blogpost.json.gz");
// let data = load_testing_data("benchmark_data/rustcode.json.gz");
let mut i = 0;
let mut w = SpanWriter::new(|into, val: Run3| {
// if i < 100 { dbg!((val.diff, val.len)); }
// if i < 100 { dbg!(val); }
i += 1;
if val.diff == -1 && val.len == 1 { return; }
let mut dest = [0u8; 20];
let mut pos = 0;
let n = num_encode_i64_with_extra_bit(val.diff as i64, val.len != 1);
let n = mix_bit_u64(n, val.is_delete);
pos += encode_u64(n, &mut dest[..]);
if val.len != 1 {
pos += encode_u32(val.len, &mut dest[pos..]);
}
into.extend_from_slice(&dest[..pos]);
});
let mut last: u32 = 0;
for TestPatch(pos, del, ins) in data.txns.iter().flat_map(|txn| txn.patches.iter()) {
// dbg!(patch);
if *del > 0 {
w.push(Run3 {
diff: i32::wrapping_sub(*pos as i32,last as i32),
len: *del as _,
is_delete: true,
});
last = *pos as u32;
}
if !ins.is_empty() {
let count = ins.chars().count() as u32;
w.push(Run3 {
diff: i32::wrapping_sub(*pos as i32,last as i32),
len: count,
is_delete: false,
});
last = *pos as u32 + count;
}
}
dbg!(w.count);
let data = w.flush_into_inner();
dbg!(data.len());
}
#[test]
fn positional_updates_3() {
// This variant is the same as above, but with backspace_mode added so backspaces are small.
#[derive(Debug, Eq, PartialEq, Clone, Copy)]
struct Run3 {
diff: i32, // From previous item
len: u32,
is_delete: bool,
backspace_mode: bool,
}
fn merge_bksp(r1: &Run3, r2: &Run3) -> Option<Run3> {
if !r1.is_delete || !r2.is_delete { return None; }
if !(r2.len == 1 && r2.diff == -1) { return None; }
if r1.backspace_mode {
return Some(Run3 {
diff: r1.diff,
len: r1.len + 1,
is_delete: true,
backspace_mode: true
});
} else if r1.len == 1 {
return Some(Run3 {
diff: r1.diff + 1, // Weird but we'll go after the deleted character.
len: r1.len + 1,
is_delete: true,
backspace_mode: true
});
} else { return None; }
}
impl SplitableSpan for Run3 {
fn len(&self) -> usize { self.len as usize }
fn truncate(&mut self, at: usize) -> Self { unimplemented!() }
fn can_append(&self, other: &Self) -> bool {
self.is_delete == other.is_delete && (other.diff == 0 || merge_bksp(self, other).is_some())
}
fn append(&mut self, other: Self) {
if let Some(r) = merge_bksp(self, &other) {
*self = r;
} else { self.len += other.len; }
}
fn prepend(&mut self, other: Self) { unimplemented!(); }
}
let data = load_testing_data("benchmark_data/automerge-paper.json.gz");
// let data = load_testing_data("benchmark_data/blogpost.json.gz");
// let data = load_testing_data("benchmark_data/rustcode.json.gz");
let mut i = 0;
let mut w = SpanWriter::new(|into, val: Run3| {
// if i < 100 { dbg!((val.diff, val.len)); }
// if i < 100 { dbg!(val); }
i += 1;
// if val.diff == -1 && val.len == 1 { return; }
let mut dest = [0u8; 20];
let mut pos = 0;
let mut n = num_encode_i64_with_extra_bit(val.diff as i64, val.len != 1);
if val.is_delete {
n = mix_bit_u64(n, val.backspace_mode);
}
n = mix_bit_u64(n, val.is_delete);
pos += encode_u64(n, &mut dest[..]);
if val.len != 1 {
pos += encode_u32(val.len, &mut dest[pos..]);
}
into.extend_from_slice(&dest[..pos]);
});
let mut last: u32 = 0;
for TestPatch(pos, del, ins) in data.txns.iter().flat_map(|txn| txn.patches.iter()) {
// dbg!(patch);
if *del > 0 {
w.push(Run3 {
diff: i32::wrapping_sub(*pos as i32,last as i32),
len: *del as _,
is_delete: true,
backspace_mode: false,
});
last = *pos as u32;
}
if !ins.is_empty() {
let count = ins.chars().count() as u32;
w.push(Run3 {
diff: i32::wrapping_sub(*pos as i32,last as i32),
len: count,
is_delete: false,
backspace_mode: false,
});
last = *pos as u32 + count;
}
}
dbg!(w.count);
let data = w.flush_into_inner();
dbg!(data.len());
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment