-
-
Save ajdavis/f511a63dd607acbce82b9d0635eb346d to your computer and use it in GitHub Desktop.
WiredTiger import example from Dan Gottlieb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
#include <vector> | |
#include <wiredtiger.h> | |
#include "mongo/bson/bsonmisc.h" | |
#include "mongo/bson/bsonobj.h" | |
#include "mongo/bson/bsonobjbuilder.h" | |
#include "mongo/db/operation_context.h" | |
#include "mongo/db/storage/bson_collection_catalog_entry.h" | |
#include "mongo/db/storage/durable_catalog.h" | |
#include "mongo/db/storage/wiredtiger/wiredtiger_util.h" | |
namespace mongo { | |
struct WTImportArguments { | |
std::string filepath; | |
// Just the base name, no "table:" nor "file:" prefix. No ".wt" suffix. | |
std::string ident; | |
// When querying WT metadata for "table:<ident>" | |
std::string tableMetadata; | |
// When querying WT metadata for "file:<ident>.wt" | |
std::string fileMetadata; | |
}; | |
struct CollectionMetadata { | |
WTImportArguments collection; | |
std::string namespaceString; | |
BSONObj catalogObject; | |
BSONObj sizeStorerObject; | |
std::vector<WTImportArguments> indexes; | |
}; | |
void buildStorageMetadata(BSONObjBuilder& toAddTo, const WTImportArguments& fields) { | |
toAddTo << fields.ident | |
<< BSON("tableMetadata" << fields.tableMetadata << "fileMetadata" | |
<< fields.fileMetadata); | |
} | |
std::vector<CollectionMetadata> rollbackToStableAndExportConfigs(std::string importingDbpath); | |
void importFromBackupCursor(OperationContext* opCtxFromRealDatabase, std::string importingDbpath) { | |
std::vector<CollectionMetadata> collectionsToImport = | |
rollbackToStableAndExportConfigs(importingDbpath); | |
for (const auto& coll : collectionsToImport) { | |
BSONObjBuilder storageMetadata; | |
// I kept things structured on the exporting side. The import though takes WT's metadata in | |
// as a single BSONObj blob per collection with what I believe is the following format (via | |
// quick reading of `WTKVENgine::importRecordStore`, `WTKVENgine::importSortedDataInterface` | |
// -> `WiredTigerUtil::generateImportString`): | |
// | |
// { "collection-123...": { "tableMetadata": "<tableMetadata>", "fileMetadata": | |
// "<fileMetadata>" }, | |
// "index-124...": { "tableMetadata": "<tableMetadata>", "fileMetadata": "<fileMetadata>" | |
// }, ... } | |
buildStorageMetadata(storageMetadata, coll.collection); | |
for (const auto& indexImportArgs : coll.indexes) { | |
buildStorageMetadata(storageMetadata, indexImportArgs); | |
} | |
uassertStatusOK( | |
DurableCatalog::get(opCtxFromRealDatabase) | |
->importCollection( | |
opCtxFromRealDatabase, | |
NamespaceString(coll.namespaceString), | |
coll.catalogObject, | |
storageMetadata.done(), | |
DurableCatalog::ImportCollectionUUIDOption::kKeepOld /* or kGenerateNew? */)); | |
} | |
// At this point we also need to construct collection objects and initialize counts. See | |
// `live_import/import_collection.cpp::importCollection()` for how we construct/initializes | |
// those MDB classes. | |
} | |
std::vector<CollectionMetadata> rollbackToStableAndExportConfigs(std::string importingDbpath) { | |
WT_CONNECTION* conn; | |
// Open WT where the backed up data files were placed. Tell WT that the journal files are | |
// compressed with snappy and inside "<importingDbpath>/journal". | |
// | |
// Note that by not passing any configuration regarding `checkpoint=`, WT defaults to not taking | |
// checkpoints outside of opening and closing. | |
invariantWTOK( | |
wiredtiger_open(importingDbpath.c_str(), | |
nullptr, | |
"config_base=false,log=(enabled=true,path=journal,compressor=snappy)", | |
&conn)); | |
// WT is open. `WiredTiger.backup` is converted into a fresh `WiredTiger.wt` | |
// file. RollbackToStable was implicitly performed and a checkpoint was taken on completion. We | |
// rely on checkpoints being disabled to make exporting the WT metadata (byte offset to the root | |
// node) consistent with the new file that was written out. | |
WT_SESSION* session; | |
invariantWTOK(conn->open_session(conn, nullptr, nullptr, &session)); | |
WT_CURSOR* mdbCatalogCursor; | |
WT_CURSOR* sizeStorerCursor; | |
invariantWTOK( | |
session->open_cursor(session, "table:_mdb_catalog", nullptr, nullptr, &mdbCatalogCursor)); | |
invariantWTOK( | |
session->open_cursor(session, "table:sizeStorer", nullptr, nullptr, &sizeStorerCursor)); | |
while (true) { | |
int ret = mdbCatalogCursor->next(mdbCatalogCursor); | |
if (ret == WT_NOTFOUND) { | |
break; | |
} | |
invariantWTOK(ret); | |
WT_ITEM value; | |
invariantWTOK(mdbCatalogCursor->get_value(mdbCatalogCursor, &value)); | |
BSONObj rawCatalogEntry(value.data); | |
if (DurableCatalogImpl::isFeatureDocument(rawCatalogEntry)) { | |
// Not actually a public method. | |
// | |
// Re: this "feature document", we may "technically" need to copy its flag bits over, | |
// but realistically it's not meaningful. I defer to storage execution on the best thing | |
// to do. | |
continue; | |
} | |
// Lots of options here. I'm providing something that demonstrates we can avoid some of the | |
// higher level classes such as `Collection` or `IndexCatalog` that `exportCollection` | |
// otherwise depends on. I'm pessimistic those classes will work out of the box when we're | |
// trying to point them at catalog stuff in a different mdb_catalog file. | |
// | |
// Can also attempt to make a version of `enterprise/live_import/exportCollection` that | |
// doesn't depend on these classes and share the code. | |
// Please refactor this BSONCollectionCatalogEntry stuff if you feel the urge. | |
auto catalogEntry = BSONCollectionCatalogEntry::MetaData::parse(rawCatalogEntry); | |
// If exportCollection() is refactored so this code can hook into that -- the "output" of | |
// this loop would presumably become the bson object we can pass into the `importCollection` | |
// command. | |
// | |
// The `importCollection` command though (unfortunately) does all the replication bits of | |
// importing. I presume secondaries on a merging recipient will be independently copying the | |
// donor files and getting them consistent with a rollback to stable step. The primaries WT | |
// metadata cannot be correctly used by a secondary in this case. | |
// | |
// Instead what I'll opt for is calling `DurableCatalog::importCollection()` which basically | |
// takes these inputs. | |
CollectionMetadata toAdd; | |
toAdd.collection.filepath = | |
importingDbpath + "/" + rawCatalogEntry["ident"].String() + ".wt"; | |
// Ident === `collection-123-456`. | |
toAdd.collection.ident = rawCatalogEntry["ident"].String(); | |
toAdd.namespaceString = catalogEntry.ns; | |
// I'm pretty sure it's fine to use the same session for querying WT's metadata (i.e: | |
// WiredTiger.wt). | |
toAdd.collection.tableMetadata = uassertStatusOK(WiredTigerUtil::getMetadata( | |
session, "table:" + toAdd.collection.ident)); // table:collection-... | |
toAdd.collection.fileMetadata = uassertStatusOK(WiredTigerUtil::getMetadata( | |
session, "file:" + toAdd.collection.ident)); // file:collection-... | |
toAdd.catalogObject = rawCatalogEntry.getOwned(); | |
/* do a sizeStorerCursor->search on I believe the `toAdd.collUri` string, get an owned bson | |
* obj and shove it in here: | |
* toAdd.sizeStorerObject = ownedSizeBSONObj; | |
*/ | |
for (const BSONCollectionCatalogEntry::IndexMetaData& index : catalogEntry.indexes) { | |
// At this point I learned that IndexMetaData does not provide the "ident" string for | |
// indexes either. E.g: `index-123-456`. Thus my plea to refactor the | |
// BSONCollectionCatalogEntry file... | |
// | |
// For now we'll pretend it does... | |
WTImportArguments indexImport; | |
indexImport.filepath = importingDbpath + "/" + index.ident + ".wt"; | |
// Ident === `index-124-456`. | |
indexImport.ident = index.ident; | |
indexImport.tableMetadata = | |
WiredTigerUtil::getMetadata(session, "table:" + index.ident); | |
indexImport.fileMetadata = | |
WiredTigerUtil::getMetadata(session, "file:" + index.ident + ".wt"); | |
} | |
// Closing WT I believe will take a checkpoint. Even though we did no logical writes, it's | |
// not obvious to me that the root node offsets we saved as part of the WT metadata sit | |
// still. The most conservative thing to do is copy the files at this step before closing | |
// WT. Obviously that's less desirable than just moving the files. Moving the files | |
// underneath WT while running will almost certainly result WT taking down the whole mongod | |
// process. | |
// | |
// Worth clarifying with WT how these files can be safely and optimally transfered to the | |
// "real" dbpath. If my hunch is right that moving files after closing is not safe today -- | |
// my guess is it's easy for WT to add configuration string to `WT_CONNECTION::close` that | |
// skips taking a checkpoint or find some equivalent solution. | |
conn->close(conn, nullptr); | |
} | |
} | |
} // namespace mongo |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment