Created
September 10, 2021 14:32
-
-
Save josephg/8c149484a1cac3473bc3d4d2fa3867a2 to your computer and use it in GitHub Desktop.
positional update file format experiments
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[cfg(test)] | |
mod tests{ | |
// *** Mad science *** | |
#[test] | |
fn positional_updates_1() { | |
// This variant leans on ins_del_runs to just use a single stream of insert and delete positions. | |
#[derive(Debug, Eq, PartialEq, Clone, Copy)] | |
struct Run3 { | |
diff: i32, // From previous item | |
len: u32, | |
start: u32, | |
end: u32, | |
} | |
impl SplitableSpan for Run3 { | |
fn len(&self) -> usize { self.len as usize } | |
fn truncate(&mut self, at: usize) -> Self { unimplemented!() } | |
fn can_append(&self, other: &Self) -> bool { | |
// dbg!(self, other); | |
assert_eq!(other.start == self.end, other.diff == 0); | |
other.start == self.end | |
} | |
fn append(&mut self, other: Self) { | |
self.len += other.len; | |
self.end = other.end; | |
} | |
fn prepend(&mut self, other: Self) { | |
self.diff = other.diff; | |
self.len += other.len; | |
self.start = other.start; | |
} | |
} | |
let data = load_testing_data("benchmark_data/automerge-paper.json.gz"); | |
// let data = load_testing_data("benchmark_data/blogpost.json.gz"); | |
// let data = load_testing_data("benchmark_data/rustcode.json.gz"); | |
let mut i = 0; | |
let mut w = SpanWriter::new(|into, val: Run3| { | |
// if i < 100 { dbg!((val.diff, val.len)); } | |
// if i < 100 { dbg!(val); } | |
// i += 1; | |
if val.diff == -1 && val.len == 1 { return; } | |
let mut dest = [0u8; 20]; | |
let mut pos = 0; | |
pos += encode_i64_with_extra_bit(val.diff as i64, val.len != 1, &mut dest[..]); | |
if val.len != 1 { | |
pos += encode_u32(val.len, &mut dest[pos..]); | |
} | |
into.extend_from_slice(&dest[..pos]); | |
}); | |
let mut ins_del_runs = SpanWriter::new_with_val(0, |vec: &mut Vec<u8>, len: i32| { | |
// The lengths alternate back and forth each operation. | |
push_u32(vec, len.abs() as u32); | |
}); | |
let mut last: u32 = 0; | |
for TestPatch(pos, del, ins) in data.txns.iter().flat_map(|txn| txn.patches.iter()) { | |
// dbg!(patch); | |
if *del > 0 { | |
w.push(Run3 { | |
diff: i32::wrapping_sub(*pos as i32,last as i32), | |
len: *del as _, | |
start: *pos as u32, | |
end: *pos as u32 | |
}); | |
ins_del_runs.push(-(*del as i32)); | |
last = *pos as u32; | |
} | |
if !ins.is_empty() { | |
let count = ins.chars().count() as u32; | |
w.push(Run3 { | |
diff: i32::wrapping_sub(*pos as i32,last as i32), | |
len: count, | |
start: *pos as u32, | |
end: *pos as u32 + count | |
}); | |
ins_del_runs.push(count as i32); | |
last = *pos as u32 + count; | |
} | |
} | |
dbg!(w.count); | |
let data = w.flush_into_inner(); | |
dbg!(data.len()); | |
let ins_del_data = ins_del_runs.flush_into_inner(); | |
dbg!(ins_del_data.len()); | |
dbg!(data.len() + ins_del_data.len()); | |
} | |
#[test] | |
fn positional_updates_2() { | |
// This time not merging deletes and inserts. Each entry is either an insert or a delete. | |
#[derive(Debug, Eq, PartialEq, Clone, Copy)] | |
struct Run3 { | |
diff: i32, // From previous item | |
len: u32, | |
is_delete: bool, | |
} | |
impl SplitableSpan for Run3 { | |
fn len(&self) -> usize { self.len as usize } | |
fn truncate(&mut self, at: usize) -> Self { unimplemented!() } | |
fn can_append(&self, other: &Self) -> bool { | |
self.is_delete == other.is_delete && other.diff == 0 | |
} | |
fn append(&mut self, other: Self) { | |
self.len += other.len; | |
} | |
fn prepend(&mut self, other: Self) { unimplemented!(); } | |
} | |
let data = load_testing_data("benchmark_data/automerge-paper.json.gz"); | |
// let data = load_testing_data("benchmark_data/blogpost.json.gz"); | |
// let data = load_testing_data("benchmark_data/rustcode.json.gz"); | |
let mut i = 0; | |
let mut w = SpanWriter::new(|into, val: Run3| { | |
// if i < 100 { dbg!((val.diff, val.len)); } | |
// if i < 100 { dbg!(val); } | |
i += 1; | |
if val.diff == -1 && val.len == 1 { return; } | |
let mut dest = [0u8; 20]; | |
let mut pos = 0; | |
let n = num_encode_i64_with_extra_bit(val.diff as i64, val.len != 1); | |
let n = mix_bit_u64(n, val.is_delete); | |
pos += encode_u64(n, &mut dest[..]); | |
if val.len != 1 { | |
pos += encode_u32(val.len, &mut dest[pos..]); | |
} | |
into.extend_from_slice(&dest[..pos]); | |
}); | |
let mut last: u32 = 0; | |
for TestPatch(pos, del, ins) in data.txns.iter().flat_map(|txn| txn.patches.iter()) { | |
// dbg!(patch); | |
if *del > 0 { | |
w.push(Run3 { | |
diff: i32::wrapping_sub(*pos as i32,last as i32), | |
len: *del as _, | |
is_delete: true, | |
}); | |
last = *pos as u32; | |
} | |
if !ins.is_empty() { | |
let count = ins.chars().count() as u32; | |
w.push(Run3 { | |
diff: i32::wrapping_sub(*pos as i32,last as i32), | |
len: count, | |
is_delete: false, | |
}); | |
last = *pos as u32 + count; | |
} | |
} | |
dbg!(w.count); | |
let data = w.flush_into_inner(); | |
dbg!(data.len()); | |
} | |
#[test] | |
fn positional_updates_3() { | |
// This variant is the same as above, but with backspace_mode added so backspaces are small. | |
#[derive(Debug, Eq, PartialEq, Clone, Copy)] | |
struct Run3 { | |
diff: i32, // From previous item | |
len: u32, | |
is_delete: bool, | |
backspace_mode: bool, | |
} | |
fn merge_bksp(r1: &Run3, r2: &Run3) -> Option<Run3> { | |
if !r1.is_delete || !r2.is_delete { return None; } | |
if !(r2.len == 1 && r2.diff == -1) { return None; } | |
if r1.backspace_mode { | |
return Some(Run3 { | |
diff: r1.diff, | |
len: r1.len + 1, | |
is_delete: true, | |
backspace_mode: true | |
}); | |
} else if r1.len == 1 { | |
return Some(Run3 { | |
diff: r1.diff + 1, // Weird but we'll go after the deleted character. | |
len: r1.len + 1, | |
is_delete: true, | |
backspace_mode: true | |
}); | |
} else { return None; } | |
} | |
impl SplitableSpan for Run3 { | |
fn len(&self) -> usize { self.len as usize } | |
fn truncate(&mut self, at: usize) -> Self { unimplemented!() } | |
fn can_append(&self, other: &Self) -> bool { | |
self.is_delete == other.is_delete && (other.diff == 0 || merge_bksp(self, other).is_some()) | |
} | |
fn append(&mut self, other: Self) { | |
if let Some(r) = merge_bksp(self, &other) { | |
*self = r; | |
} else { self.len += other.len; } | |
} | |
fn prepend(&mut self, other: Self) { unimplemented!(); } | |
} | |
let data = load_testing_data("benchmark_data/automerge-paper.json.gz"); | |
// let data = load_testing_data("benchmark_data/blogpost.json.gz"); | |
// let data = load_testing_data("benchmark_data/rustcode.json.gz"); | |
let mut i = 0; | |
let mut w = SpanWriter::new(|into, val: Run3| { | |
// if i < 100 { dbg!((val.diff, val.len)); } | |
// if i < 100 { dbg!(val); } | |
i += 1; | |
// if val.diff == -1 && val.len == 1 { return; } | |
let mut dest = [0u8; 20]; | |
let mut pos = 0; | |
let mut n = num_encode_i64_with_extra_bit(val.diff as i64, val.len != 1); | |
if val.is_delete { | |
n = mix_bit_u64(n, val.backspace_mode); | |
} | |
n = mix_bit_u64(n, val.is_delete); | |
pos += encode_u64(n, &mut dest[..]); | |
if val.len != 1 { | |
pos += encode_u32(val.len, &mut dest[pos..]); | |
} | |
into.extend_from_slice(&dest[..pos]); | |
}); | |
let mut last: u32 = 0; | |
for TestPatch(pos, del, ins) in data.txns.iter().flat_map(|txn| txn.patches.iter()) { | |
// dbg!(patch); | |
if *del > 0 { | |
w.push(Run3 { | |
diff: i32::wrapping_sub(*pos as i32,last as i32), | |
len: *del as _, | |
is_delete: true, | |
backspace_mode: false, | |
}); | |
last = *pos as u32; | |
} | |
if !ins.is_empty() { | |
let count = ins.chars().count() as u32; | |
w.push(Run3 { | |
diff: i32::wrapping_sub(*pos as i32,last as i32), | |
len: count, | |
is_delete: false, | |
backspace_mode: false, | |
}); | |
last = *pos as u32 + count; | |
} | |
} | |
dbg!(w.count); | |
let data = w.flush_into_inner(); | |
dbg!(data.len()); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment