Skip to content

Instantly share code, notes, and snippets.

@j-coll
Last active January 23, 2017 14:11
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save j-coll/cb523ba937a9efb47f9a4f4acd23f82f to your computer and use it in GitHub Desktop.
Save j-coll/cb523ba937a9efb47f9a4f4acd23f82f to your computer and use it in GitHub Desktop.
Repair opencga-storage-mongodb variants database due to opencb/opencga#504
// Generic migration utility method
function migrateCollection(collection, query, projection, migrateFunc) {
var bulk = db.getCollection(collection).initializeOrderedBulkOp();
var count = 0;
var bulkSize = 500;
db.getCollection(collection).find(query,projection).forEach(function(doc) {
migrateFunc(bulk, doc);
if ( bulk.nUpdateOps > bulkSize ) {
count += bulk.nUpdateOps;
print("Execute bulk! " + count);
bulk.execute();
bulk = db.getCollection(collection).initializeOrderedBulkOp();
}
});
if ( bulk.nUpdateOps > 0 ) {
count += bulk.nUpdateOps;
print("Execute bulk! " + count);
bulk.execute();
bulk = db.getCollection(collection).initializeOrderedBulkOp();
}
if (count == 0) {
print("Nothing to do!");
}
}
// Remove extra secondary alternate and fix type on variants with reference.length == 50
migrateCollection("variants", {reference:{$regex:"^.{50}$"}, type:"SV"}, {stats:0, annotation:0}, function (bulk, doc) {
var ref = doc.reference;
var id = doc._id;
print("Updating variant: _id : '" + id + "'");
for (var i in doc.studies) {
var study = doc.studies[i];
bulk.find({_id: id, "studies.sid": study.sid}).update({$set: {type: "INDEL"}, $pull: {"studies.$.alts": {ref: ref}}});
}
});
// Change type of SV secondary alternates with reference.length = 50
migrateCollection("variants", {"studies.alts":{$elemMatch:{type:"SV", ref:{$regex:"^.{50}$"}}}}, {stats:0, annotation:0}, function (bulk, doc) {
var id = doc._id;
print("Updating variant: _id : '" + id + "'");
for (var i in doc.studies) {
var study = doc.studies[i];
if (study.hasOwnProperty("alts")) {
var alts = study.alts;
var modified = false;
for (var i2 in alts) {
var alt = alts[i2];
if (alt.ref.length == 50 && alt.type == "SV") {
alt.type = "INDEL";
modified = true;
}
}
if (modified) {
bulk.find({_id: id, "studies.sid": study.sid}).update({$set: {"studies.$.alts": alts}});
}
}
}
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment