Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Save jonasschneider/baa38821d1718962ce19 to your computer and use it in GitHub Desktop.
Save jonasschneider/baa38821d1718962ce19 to your computer and use it in GitHub Desktop.
From ce4f2a8c91510d7032ab441c37f3b78de7cc72cb Mon Sep 17 00:00:00 2001
From: Jonas Schneider <mail@jonasschneider.com>
Date: Tue, 10 Mar 2015 23:48:42 +0100
Subject: [PATCH] backup_creator: naively buffer a couple of instructions to
avoid using the slow StringOutputStream
---
backup_creator.cc | 36 +++++++++++++++++++++++++++++++++++-
backup_creator.hh | 4 ++++
2 files changed, 39 insertions(+), 1 deletion(-)
diff --git a/backup_creator.cc b/backup_creator.cc
index 4151b7f..ae82bc1 100644
--- a/backup_creator.cc
+++ b/backup_creator.cc
@@ -169,6 +169,8 @@ void BackupCreator::finish()
if ( chunkToSaveFill )
saveChunkToSave();
+
+ flushInstructions();
}
void BackupCreator::moveFromRingBufferToChunkToSave( unsigned toMove )
@@ -269,7 +271,39 @@ void BackupCreator::outputInstruction( BackupInstruction const & instr )
// TODO: once backupData becomes large enough, spawn another BackupCreator and
// feed data to it. This way we wouldn't have to store the entire backupData
// in RAM
- Message::serialize( instr, *backupDataStream );
+
+ unflushedInstructions.push_back(instr);
+ unflushedInstructionsSerializedSize += 4 + instr.ByteSize();
+
+ // if the vector is too large, flush it
+ if(unflushedInstructions.size() > 1024) {
+ flushInstructions();
+ }
+}
+
+void BackupCreator::flushInstructions() {
+ if(unflushedInstructions.empty()) {
+ CHECK(unflushedInstructionsSerializedSize == 0, "no unflushed instructions but nonzero size");
+ return;
+ }
+
+ dPrintf( "flush buffer size: %u\n", unflushedInstructionsSerializedSize);
+
+ void *data = malloc(unflushedInstructionsSerializedSize);
+ google::protobuf::io::ArrayOutputStream aos(data, unflushedInstructionsSerializedSize);
+
+ for(std::vector<BackupInstruction>::iterator it = unflushedInstructions.begin(); it != unflushedInstructions.end(); ++it) {
+ Message::serialize(*it, aos);
+ }
+
+ CHECK( aos.ByteCount() <= unflushedInstructionsSerializedSize, "serialized size prediction didn't match" );
+
+ unflushedInstructions.clear();
+ unflushedInstructionsSerializedSize = 0;
+
+ backupData.append((const char *)data, aos.ByteCount());
+
+ free(data);
}
void BackupCreator::getBackupData( string & str )
diff --git a/backup_creator.hh b/backup_creator.hh
index 42d590d..be07350 100644
--- a/backup_creator.hh
+++ b/backup_creator.hh
@@ -48,6 +48,8 @@ class BackupCreator: ChunkIndex::ChunkInfoInterface, NoCopy
RollingHash rollingHash;
string backupData;
+ vector< BackupInstruction > unflushedInstructions;
+ unsigned unflushedInstructionsSerializedSize = 0;
sptr< google::protobuf::io::StringOutputStream > backupDataStream;
/// Sees if the current block in the ring buffer exists in the chunk store.
@@ -57,6 +59,8 @@ class BackupCreator: ChunkIndex::ChunkInfoInterface, NoCopy
/// Outputs data contained in chunkToSave as a new chunk
void saveChunkToSave();
+ void flushInstructions();
+
/// Move the given amount of bytes from the ring buffer to the chunk to save.
/// Ring buffer must have at least that many bytes
void moveFromRingBufferToChunkToSave( unsigned bytes );
--
2.2.1
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment