Skip to content

Instantly share code, notes, and snippets.

@jlongster jlongster/hashlist.js
Last active Dec 2, 2018

Embed
What would you like to do?
// UPDATE: don't use this. when it re-partitions the list when time moves forward, it does not correctly keep hashes
// Use a real merkle tree instead: https://gist.github.com/jlongster/f431b6d75ef29c1a2ed000715aef9c8c
import Timestamp from './timestamp';
// This is a compact data structure that keeps track of a list of
// hashes (representing messages) over a range of time in order to
// figure out what has changed between clients, kinda like a Merkle
// tree. It creates "buckets" that represent different time ranges,
// and divides time into smaller buckets the more recent they are. The
// total data structure is mostly a 17-length array.
//
// It requires knowledge of the latest "clock", which is determinstic
// given the current HLC state. Given this, it continuously
// re-partitions the buckets so they represent the time ranges
// relative to the clock.
//
// For example, a default hashlist would look like this:
//
// initHashlist('2018-11-12T13:21:40.122Z-0000-0123456789ABCDEF');
//
// { at: 25700485,
// list:
// [ { hash: 0, timestamp: 25700485 }, // 2018-11-12T13:25:00.000Z
// { hash: 0, timestamp: 25700475 }, // 2018-11-12T13:15:00.000Z
// { hash: 0, timestamp: 25700465 }, // 2018-11-12T13:05:00.000Z
// { hash: 0, timestamp: 25700445 }, // 2018-11-12T12:45:00.000Z
// { hash: 0, timestamp: 25700405 }, // 2018-11-12T12:05:00.000Z
// { hash: 0, timestamp: 25700325 }, // 2018-11-12T10:45:00.000Z
// { hash: 0, timestamp: 25700165 }, // 2018-11-12T08:05:00.000Z
// { hash: 0, timestamp: 25699845 }, // 2018-11-12T02:45:00.000Z
// { hash: 0, timestamp: 25699205 }, // 2018-11-11T16:05:00.000Z
// { hash: 0, timestamp: 25697925 }, // 2018-11-10T18:45:00.000Z
// { hash: 0, timestamp: 25695365 }, // 2018-11-09T00:05:00.000Z
// { hash: 0, timestamp: 25690245 }, // 2018-11-05T10:45:00.000Z
// { hash: 0, timestamp: 25680005 }, // 2018-10-29T08:05:00.000Z
// { hash: 0, timestamp: 25659525 }, // 2018-10-15T02:45:00.000Z
// { hash: 0, timestamp: 25618565 }, // 2018-09-16T16:05:00.000Z
// { hash: 0, timestamp: 25536645 }, // 2018-07-21T18:45:00.000Z
// { hash: 0, timestamp: 25372805 } ] } // 2018-03-30T00:05:00.000Z
//
// Note how it creates larger buckets as time goes back. This allows
// you to track recent changes more closely, while still detecting
// anamolous cases where something changed far back.
export function timestampToISOString(timestamp) {
return new Date(timestamp * 60 * 1000).toISOString();
}
function minutesFromTimestamp(timestamp) {
return (timestamp.millis() / 1000 / 60) | 0;
}
export function mapHashlist(hl, fn) {
return { clock: hl.clock, list: hl.list.map(fn) };
}
// Create an empty hashlist based off of a time
export function initHashlist(latestTimestamp) {
const currentTime = Math.ceil(minutesFromTimestamp(latestTimestamp) / 5) * 5;
let list = [];
list.push({
hash: 0,
timestamp: currentTime
});
let x = 10;
for (let i = 0; i < 16; i++) {
list.push({
hash: 0,
timestamp: currentTime - x
});
x *= 2;
}
return { clock: latestTimestamp.toString(), list };
}
// Re-partition the hashlist based off a new time. This will collapse
// time ranges as needed to bring it up to speed, while keeping the
// hashes in tact
export function updateHashlist(hashlist, latestTimestamp) {
const newList = initHashlist(latestTimestamp);
if (hashlist === null) {
return newList;
} else if (newList.clock === hashlist.clock) {
// No need to re-partition if they are based off the same time
return hashlist;
} else if (hashlist.clock > newList.clock) {
throw new Error(
'Cannot combine hashlist with one in the past (global clock must be running behind messages)'
);
}
return mapHashlist(newList, bucket2 => {
let bucket = bucket2;
hashlist.list.some(bucket1 => {
if (bucket1.timestamp <= bucket2.timestamp) {
bucket = { ...bucket, hash: bucket2.hash ^ bucket1.hash };
return true;
}
});
return bucket;
});
}
// Find the most recent time that are guaranteed to have the same
// messages applied
export function diffHashlists(hl1, hl2) {
if (hl1.clock.toString() !== hl2.clock.toString()) {
throw new Error("Cannot compare hashlists, time isn't the same");
}
if (hl1.list[0].hash === hl2.list[0].hash) {
// They are equal
return null;
}
for (let i = 0; i < hl1.list.length; i++) {
if (hl1.list[i].hash === hl2.list[i].hash) {
return timestampToISOString(hl1.list[i].timestamp);
}
}
throw new Error('No common history in hashlists');
}
// Add a message a hashlist, given the latest time. This will update
// all the hashes in the appropriate buckets
export function addMessage(latestTimestamp, hashlist, message) {
if (message.timestamp.toString() > latestTimestamp.toString()) {
throw new Error('Message time is greater than latest given time');
}
hashlist = updateHashlist(hashlist, latestTimestamp);
console.log('adding message', message.timestamp.toString());
return mapHashlist(hashlist, bucket => {
if (bucket.timestamp > minutesFromTimestamp(message.timestamp)) {
return {
...bucket,
hash: bucket.hash ^ message.timestamp.hash()
};
}
return bucket;
});
}
import {
initHashlist,
updateHashlist,
diffHashlists,
addMessage,
timestampToISOString
} from './hashlist';
import Timestamp from './timestamp';
import { format } from 'date-fns';
function pretty(n) {
if (n < 60) {
console.log(`${n} (${n} min)`);
} else if (n < 24 * 60) {
console.log(`${n} (${n / 60} hours)`);
} else {
console.log(`${n} (${n / (24 * 60)} days)`);
}
}
function message(timestamp, hash) {
timestamp = Timestamp.parse(timestamp);
timestamp.hash = () => hash;
return { timestamp };
}
function debug(hashlist) {
console.log({
clock: hashlist.clock.toString(),
list: hashlist.list.map(hl => ({
hash: hl.hash,
timestamp: timestampToISOString(hl.timestamp)
}))
});
}
describe('hashlist', () => {
test('hashlist is created correctly', () => {
let hl = initHashlist(
Timestamp.parse('2018-11-12T13:21:40.122Z-0000-0123456789ABCDEF')
);
expect(hl.clock.toString()).toBe(
'2018-11-12T13:21:40.122Z-0000-0123456789ABCDEF'
);
expect(hl.list.length).toBe(17);
});
test('updating the current time works', () => {
let hl = initHashlist(
Timestamp.parse('2018-11-12T13:21:40.122Z-0000-0123456789ABCDEF')
);
// Set a few hashes
for (let i = 0; i < 14; i++) {
hl.list[i].hash = hl.list[i].hash ^ 1200;
}
for (let i = 0; i < 6; i++) {
hl.list[i].hash = hl.list[i].hash ^ 1000;
}
expect(hl.list.slice(0, 6).forEach(x => expect(x.hash).toBe(1880)));
expect(hl.list.slice(7, 14).forEach(x => expect(x.hash).toBe(1200)));
// The hashlist should be updated with new buckets since time has
// been moved forward
let updated = updateHashlist(
hl,
Timestamp.parse('2018-11-14T13:21:40.122Z-0000-0123456789ABCDEF')
);
expect(
updated.list.slice(0, 10).forEach(x => {
expect(x.hash).toBe(1880);
expect(
timestampToISOString(x.timestamp) > '2018-11-12T11:00:00.000Z'
).toBeTruthy();
})
);
expect(
updated.list.slice(11, 13).forEach(x => {
expect(x.hash).toBe(1200);
expect(
timestampToISOString(x.timestamp) > '2018-10-29T07:00:00.000Z'
).toBeTruthy();
})
);
expect(
timestampToISOString(updated.list[14].timestamp) <=
'2018-10-29T07:00:00.000Z'
).toBeTruthy();
});
test('adding message works', () => {
let initHl = initHashlist(
Timestamp.parse('2018-11-13T05:21:40.122Z-0000-0123456789ABCDEF')
);
// Adding a message that has the same timestamp as the latest time
// should work
let hl = addMessage(
Timestamp.parse('2018-11-14T13:21:40.122Z-0000-0123456789ABCDEF'),
initHl,
message('2018-11-14T13:21:40.122Z-0000-0123456789ABCDEF', 1000)
);
expect(hl.list[0].hash).toBe(1000);
expect(hl.list[1].hash).toBe(0);
// Adding a message that is in the future should throw
expect(() => {
addMessage(
Timestamp.parse('2018-11-14T13:21:40.122Z-0000-0123456789ABCDEF'),
initHl,
message('2018-11-14T13:21:41.122Z-0000-0123456789ABCDEF', 1000)
);
}).toThrow();
// Adding a message in the last 5 minutes should be the latest
// bucket only
hl = addMessage(
Timestamp.parse('2018-11-14T13:21:40.122Z-0000-0123456789ABCDEF'),
initHl,
message('2018-11-14T13:19:40.122Z-0000-0123456789ABCDEF', 1000)
);
expect(hl.list[0].hash).toBe(1000);
expect(hl.list[1].hash).toBe(0);
// Adding a message in the last 10 minutes should be the latest
// two buckets only
hl = addMessage(
Timestamp.parse('2018-11-14T13:21:40.122Z-0000-0123456789ABCDEF'),
initHl,
message('2018-11-14T13:14:40.122Z-0000-0123456789ABCDEF', 1000)
);
expect(hl.list[0].hash).toBe(1000);
expect(hl.list[1].hash).toBe(1000);
expect(hl.list[2].hash).toBe(0);
// Adding an old message should update many buckets
hl = addMessage(
Timestamp.parse('2018-11-14T13:21:40.122Z-0000-0123456789ABCDEF'),
initHl,
message('2018-06-14T13:14:40.122Z-0000-0123456789ABCDEF', 1000)
);
for (let i = 0; i < 16; i++) {
expect(hl.list[i].hash).toBe(1000);
}
expect(hl.list[16].hash).toBe(0);
});
test('diff returns the correct time difference', () => {
let hl1 = initHashlist(
Timestamp.parse('2018-11-13T05:21:40.122Z-0000-0123456789ABCDEF')
);
let hl2 = initHashlist(
Timestamp.parse('2018-11-20T05:21:40.122Z-0000-0123456789ABCDEF')
);
const messages = [
// First client messages
message('2018-11-13T13:20:40.122Z-0000-0123456789ABCDEF', 1000),
message('2018-11-14T13:05:35.122Z-0000-0123456789ABCDEF', 1100),
message('2018-11-15T22:19:00.122Z-0000-0123456789ABCDEF', 1200),
// Second client messages
message('2018-11-20T13:19:40.122Z-0000-0123456789ABCDEF', 1300),
message('2018-11-25T13:19:40.122Z-0000-0123456789ABCDEF', 1400)
];
// Add some messages to the first list. Note that we've moved the
// clock forward
let timestamp1 = Timestamp.parse(
'2018-11-16T13:21:40.122Z-0000-0123456789ABCDEF'
);
hl1 = addMessage(timestamp1, hl1, messages[0]);
hl1 = addMessage(timestamp1, hl1, messages[1]);
hl1 = addMessage(timestamp1, hl1, messages[2]);
expect(hl1.list[0].hash).toBe(788);
// Add some messages to the second list
let timestamp2 = Timestamp.parse(
'2018-11-25T13:21:40.122Z-0000-0123456789ABCDEF'
);
hl2 = addMessage(timestamp2, hl2, messages[3]);
hl2 = addMessage(timestamp2, hl2, messages[4]);
expect(hl2.list[0].hash).toBe(108);
// Move both hashlists forward to the same latest time
let lastTimestamp = timestamp2;
hl1 = updateHashlist(hl1, lastTimestamp);
hl2 = updateHashlist(hl2, lastTimestamp);
// This is correct - this is the most recent timestamp (which is
// before all 5 new messages)
expect(diffHashlists(hl1, hl2)).toBe('2018-11-11T08:05:00.000Z');
// Add the messages to the other client and verify the hash
hl1 = addMessage(lastTimestamp, hl1, messages[3]);
hl1 = addMessage(lastTimestamp, hl1, messages[4]);
hl2 = addMessage(lastTimestamp, hl2, messages[0]);
hl2 = addMessage(lastTimestamp, hl2, messages[1]);
hl2 = addMessage(lastTimestamp, hl2, messages[2]);
expect(hl1.list[0].hash).toBe(hl2.list[0].hash);
});
test('diff returns the correct time with same messages applied', () => {
let hl1 = initHashlist(
Timestamp.parse('2018-11-13T05:21:40.122Z-0000-0123456789ABCDEF')
);
let hl2 = initHashlist(
Timestamp.parse('2018-11-20T05:21:40.122Z-0000-0123456789ABCDEF')
);
let messages = [
message('2018-11-13T13:20:40.122Z-0000-0123456789ABCDEF', 1000),
message('2018-11-14T13:05:35.122Z-0000-0123456789ABCDEF', 1100),
message('2018-11-21T22:19:00.122Z-0000-0123456789ABCDEF', 1200),
message('2018-11-23T22:19:40.122Z-0000-0123456789ABCDEF', 1300),
message('2018-11-24T13:19:40.122Z-0000-0123456789ABCDEF', 1400)
];
let timestamp1 = Timestamp.parse(
'2018-11-24T23:21:40.122Z-0000-0123456789ABCDEF'
);
hl1 = addMessage(timestamp1, hl1, messages[0]);
hl1 = addMessage(timestamp1, hl1, messages[1]);
hl1 = addMessage(timestamp1, hl1, messages[2]);
hl1 = addMessage(timestamp1, hl1, messages[3]);
let timestamp2 = Timestamp.parse(
'2018-11-25T13:21:40.122Z-0000-0123456789ABCDEF'
);
hl2 = addMessage(timestamp2, hl2, messages[0]);
hl2 = addMessage(timestamp2, hl2, messages[1]);
hl2 = addMessage(timestamp2, hl2, messages[2]);
hl2 = addMessage(timestamp2, hl2, messages[4]);
// Move both hashlists forward to the same latest time
let lastTimestamp = timestamp2;
hl1 = updateHashlist(hl1, lastTimestamp);
hl2 = updateHashlist(hl2, lastTimestamp);
expect(diffHashlists(hl1, hl2)).toBe('2018-11-23T18:45:00.000Z');
});
});
import { addMessage, updateHashlist } from './hashlist';
import Timestamp from './timestamp';
import expect from 'expect';
// prettier-ignore
let messages = [
{ timestamp: Timestamp.parse('2018-01-01T01:01:00.000Z-0000-0123456789ABCDEF') },
{ timestamp: Timestamp.parse('2018-03-01T01:01:00.000Z-0000-0123456789ABCDEF') },
{ timestamp: Timestamp.parse('2018-05-01T01:02:00.000Z-0000-0123456789ABCDEF') },
{ timestamp: Timestamp.parse('2018-06-01T01:02:00.000Z-0000-0123456789ABCDEF') },
{ timestamp: Timestamp.parse('2018-06-02T01:02:00.000Z-0000-0123456789ABCDEF') },
{ timestamp: Timestamp.parse('2018-06-02T05:02:00.000Z-0000-0123456789ABCDEF') },
{ timestamp: Timestamp.parse('2018-06-02T05:21:00.000Z-0000-0123456789ABCDEF') },
{ timestamp: Timestamp.parse('2018-06-02T05:35:00.000Z-0000-0123456789ABCDEF') },
{ timestamp: Timestamp.parse('2018-06-02T05:42:00.000Z-0000-0123456789ABCDEF') },
{ timestamp: Timestamp.parse('2018-06-02T05:46:00.000Z-0000-0123456789ABCDEF') },
{ timestamp: Timestamp.parse('2018-06-02T05:53:00.000Z-0000-0123456789ABCDEF') },
{ timestamp: Timestamp.parse('2018-06-02T05:54:00.000Z-0000-0123456789ABCDEF') },
{ timestamp: Timestamp.parse('2018-06-02T05:57:00.000Z-0000-0123456789ABCDEF') }
];
let latestTime = Timestamp.parse(
'2018-06-02T05:57:00.000Z-0000-0123456789ABCDEF'
);
function makeHashlistAtTime(time) {
let hl = null;
for (let msg of messages) {
hl = addMessage(time, hl, msg);
}
return hl;
}
let baseMillis = latestTime.millis();
let hl = makeHashlistAtTime(latestTime);
// Move forward in 55 seconds increments through 3 months worth of
// time and make sure that re-partitioning the hashlist results in the
// same thing as a new one created at that time
for (var i = 0; i < 3 * 30 * 24 * 60 * 60 * 1000; i += 55 * 1000) {
let newTime = new Timestamp(baseMillis + i, 0, '0123456789ABCDEF');
console.log('Moving forward to ' + newTime);
let correct = makeHashlistAtTime(newTime);
let moved = updateHashlist(hl, newTime);
expect(correct).toEqual(moved);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.