Skip to content

Instantly share code, notes, and snippets.

@pfurio
Last active September 15, 2017 12:11
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pfurio/2ca0cb2da46eac9e309101066f8758f5 to your computer and use it in GitHub Desktop.
Save pfurio/2ca0cb2da46eac9e309101066f8758f5 to your computer and use it in GitHub Desktop.
OpenCGA migration scripts to move from release 1.1.x to 1.2.0
// 1. Load the function from https://gist.github.com/pfurio/f7cd90af08e0073699f0beeeef1958ba
// 2. Load all the scripts present in the gist in the CATALOG database
// Remove sessions array from every user entry. #618
db.user.update({}, {$unset: {sessions: ""}}, {multi: 1});
db.metadata.update({}, {$unset: {"admin.sessions": ""}, $set: {"admin.secretKey": "dummy", "admin.algorithm": "HS256"}});
// Move list of longs to list of object where needed. #621
function listLong2ListObject(originalList) {
var newList = [];
for (var i in originalList) {
if (originalList[i])
newList.push({
"id": NumberLong(originalList[i])
});
}
return newList;
}
// Rename sampleIds for samples in all files and move list<longs> to list<objects>
db.file.update({}, {$rename: {"sampleIds": "samples"}}, {multi: 1});
migrateCollection("file", {"samples" : { $exists: true, $ne: [] } }, {samples: 1}, function(bulk, doc) {
if (typeof doc.samples[0] !== "number") {
try {
doc.samples[0].toNumber(); // If it works, it is because it is a NumberLong, otherwise, it will be already a converted object
} catch(error) {
return; // Already changed to object
}
}
var samples = listLong2ListObject(doc.samples);
bulk.find({"_id": doc._id}).updateOne({"$set": {"samples": samples}});
});
// Move list<long> to list<object> from input and output fields
migrateCollection("job",
{$or: [{"input" : {$exists: true, $ne: []}}, {"output" : {$exists: true, $ne: []}}]},
{input: 1, output: 1}, function(bulk, doc) {
if (doc.input.length > 0) {
if (typeof doc.input[0] !== "number") {
try {
doc.input[0].toNumber(); // If it works, it is because it is a NumberLong, otherwise, it will be already a converted object
} catch(error) {
return; // Already changed to object
}
}
} else if (doc.output.length > 0) {
if (typeof doc.input[0] !== "number") {
try {
doc.input[0].toNumber(); // If it works, it is because it is a NumberLong, otherwise, it will be already a converted object
} catch(error) {
return; // Already changed to object
}
}
} else {
return;
}
var input = listLong2ListObject(doc.input);
var output = listLong2ListObject(doc.output);
bulk.find({"_id": doc._id}).updateOne({"$set": {"input": input, "output": output}});
});
// Move list<long> to list<object> from samples in cohorts
migrateCollection("cohort", {"samples" : { $exists: true, $ne: [] } }, {samples: 1}, function(bulk, doc) {
if (typeof doc.samples[0] !== "number") {
try {
doc.samples[0].toNumber(); // If it works, it is because it is a NumberLong, otherwise, it will be already a converted object
} catch(error) {
return; // Already changed to object
}
}
var samples = listLong2ListObject(doc.samples);
bulk.find({"_id": doc._id}).updateOne({"$set": {"samples": samples}});
});
// Migration to support tickets #632 and #642
// An array of _acl will be created based on the actual permissions in acl (#632)
// A new @members group will be created containing all the existing users in the studies (#642)
function getUser(user) {
if (user === "anonymous") {
return "*";
} else {
return user;
}
}
function buildNewPermissions(aclEntryList) {
var permissions = [];
for (var i in aclEntryList) {
var acl = aclEntryList[i];
if (acl.permissions.length > 0) {
for (var j in acl.permissions) {
permissions.push(getUser(acl.member) + "_" + acl.permissions[j]);
}
}
permissions.push(getUser(acl.member) + "_NONE");
}
return permissions;
}
// We will create a new internal group called @members.
migrateCollection("study", {_acl: {$exists: false}}, {acl: 1, groups: 1}, function(bulk, doc) {
var acl = buildNewPermissions(doc.acl);
// We will obtain all the users with any permission in the study
var users = new Set();
// Look for users in groups
for (var i in doc.groups) {
if (doc.groups[i].name === "@members") {
// TODO: We need to take this group out. At the moment, we will assume that group does not exist.
throw "A group called @members has been found in study " + doc._id + ". Please, remove that group before running this script"
+ " as that group name is going to be used for internal purposes.";
}
for (var j in doc.groups[i].userIds) {
// Add all the users belonging to the group
users.add(getUser(doc.groups[i].userIds[j]));
}
}
// Look for users with permissions
for (var i in doc.acl) {
if (!doc.acl[i].member.startsWith("@")) {
users.add(getUser(doc.acl[i].member));
}
}
// Add new group members containing all the users registered in the study
doc.groups.push({
"name": "@members",
"userIds": Array.from(users),
"syncedFrom": null
});
bulk.find({"_id": doc._id}).updateOne({"$set": {"_acl": acl, "groups": doc.groups}});
});
migrateCollection("cohort", {_acl: {$exists: false}}, {acl: 1}, function(bulk, doc) {
var acl = buildNewPermissions(doc.acl);
bulk.find({"_id": doc._id}).updateOne({"$set": {"_acl": acl}});
});
migrateCollection("family", {_acl: {$exists: false}}, {acl: 1}, function(bulk, doc) {
var acl = buildNewPermissions(doc.acl);
bulk.find({"_id": doc._id}).updateOne({"$set": {"_acl": acl}});
});
migrateCollection("file", {_acl: {$exists: false}}, {acl: 1}, function(bulk, doc) {
var acl = buildNewPermissions(doc.acl);
bulk.find({"_id": doc._id}).updateOne({"$set": {"_acl": acl}});
});
migrateCollection("individual", {_acl: {$exists: false}}, {acl: 1}, function(bulk, doc) {
var acl = buildNewPermissions(doc.acl);
bulk.find({"_id": doc._id}).updateOne({"$set": {"_acl": acl}});
});
migrateCollection("job", {_acl: {$exists: false}}, {acl: 1}, function(bulk, doc) {
var acl = buildNewPermissions(doc.acl);
bulk.find({"_id": doc._id}).updateOne({"$set": {"_acl": acl}});
});
migrateCollection("panel", {_acl: {$exists: false}}, {acl: 1}, function(bulk, doc) {
var acl = buildNewPermissions(doc.acl);
bulk.find({"_id": doc._id}).updateOne({"$set": {"_acl": acl}});
});
migrateCollection("sample", {_acl: {$exists: false}}, {acl: 1}, function(bulk, doc) {
var acl = buildNewPermissions(doc.acl);
bulk.find({"_id": doc._id}).updateOne({"$set": {"_acl": acl}});
});
// Add _ownerId to every study
var users = db.user.find({projects: {$exists: true, $ne: []}}, {"projects.id": 1});
for (var i = 0; i < users.length(); i++) {
var user = users[i];
var projectIds = [];
for (var j = 0; j < user.projects.length; j++) {
projectIds.push(user.projects[j].id);
}
// Add _ownerId to all the studies belonging to these projects
db.study.update({"_projectId": {$in: projectIds}}, {$set: {_ownerId: user["_id"]}}, {multi: 1});
}
// Add release to all the entries. #616
// Add release to projects
migrateCollection("user", {projects: {$exists: true, $ne: []}}, {projects: 1}, function(bulk, doc) {
var projects = [];
var changed = false;
for (var j = 0; j < doc.projects.length; j++) {
var project = doc.projects[j];
if (!project.hasOwnProperty("currentRelease")) {
project["currentRelease"] = 1;
changed = true;
}
projects.push(project);
}
if (changed) {
bulk.find({"_id": doc._id}).updateOne({$set: {projects: projects}});
}
});
// Add release to studies, files, samples, cohorts, individuals, jobs, panels,
db.study.update({}, {$set: {release: 1}}, {multi: 1});
db.cohort.update({}, {$set: {release: 1}}, {multi: 1});
db.family.update({}, {$set: {release: 1}}, {multi: 1});
db.file.update({}, {$set: {release: 1}}, {multi: 1});
db.individual.update({}, {$set: {release: 1}}, {multi: 1});
db.job.update({}, {$set: {release: 1}}, {multi: 1});
db.panel.update({}, {$set: {release: 1}}, {multi: 1});
db.sample.update({}, {$set: {release: 1}}, {multi: 1});
// Add release to variable sets
migrateCollection("study", {variableSets: {$exists: true, $ne: []}}, {variableSets: 1}, function(bulk, doc) {
var variableSets = [];
var changed = false;
for (var j = 0; j < doc.variableSets.length; j++) {
var varSet = doc.variableSets[j];
if (!varSet.hasOwnProperty("release")) {
varSet["release"] = 1;
varSet["confidential"] = false;
changed = true;
}
variableSets.push(varSet);
}
if (changed) {
bulk.find({"_id": doc._id}).updateOne({$set: {variableSets: variableSets}});
}
});
// Add release to annotation sets
function addReleaseToAnnotationSets(bulk, doc) {
var annotationSets = [];
var changed = false;
for (var j = 0; j < doc.annotationSets.length; j++) {
var annSet = doc.annotationSets[j];
if (!annSet.hasOwnProperty("release")) {
annSet["release"] = 1;
changed = true;
}
annotationSets.push(annSet);
}
if (changed) {
bulk.find({"_id": doc._id}).updateOne({$set: {annotationSets: annotationSets}});
}
}
migrateCollection("sample", {annotationSets: {$exists: true, $ne: []}}, {annotationSets: 1}, addReleaseToAnnotationSets);
migrateCollection("cohort", {annotationSets: {$exists: true, $ne: []}}, {annotationSets: 1}, addReleaseToAnnotationSets);
migrateCollection("individual", {annotationSets: {$exists: true, $ne: []}}, {annotationSets: 1}, addReleaseToAnnotationSets);
migrateCollection("family", {annotationSets: {$exists: true, $ne: []}}, {annotationSets: 1}, addReleaseToAnnotationSets);
db.study.createIndex({"_acl": 1}, {"background": true});
db.cohort.createIndex({"_acl": 1}, {"background": true});
db.family.createIndex({"_acl": 1}, {"background": true});
db.file.createIndex({"_acl": 1}, {"background": true});
db.individual.createIndex({"_acl": 1}, {"background": true});
db.job.createIndex({"_acl": 1}, {"background": true});
db.sample.createIndex({"_acl": 1}, {"background": true});
db.clinical.createIndex({"_acl": 1}, {"background": true});
db.file.dropIndex({"sampleIds": 1});
db.file.createIndex({"samples.id": 1}, {"background": true});
// Variable set ids were not being stored as long but as integers
migrateCollection("study", {"variableSets" : {$exists: true, $ne: []}}, {variableSets: 1}, function(bulk, doc) {
if (typeof doc.variableSets[0].id !== "number") {
// It is already converted
return;
}
var variableSets = [];
doc.variableSets.forEach(function(variableSet) {
variableSet.id = NumberLong(variableSet.id);
variableSets.push(variableSet);
});
bulk.find({"_id": doc._id}).updateOne({"$set": {"variableSets": variableSets}});
});
function int2LongAnnotationSets(bulk, doc) {
if (typeof doc.annotationSets[0].variableSetId !== "number") {
// It is already converted
return;
}
var annotationSets = [];
doc.annotationSets.forEach(function(annotationSet) {
annotationSet.variableSetId = NumberLong(annotationSet.variableSetId);
annotationSets.push(annotationSet);
});
bulk.find({"_id": doc._id}).updateOne({"$set": {"annotationSets": annotationSets}});
}
migrateCollection("sample", {"annotationSets" : {$exists: true, $ne: []}}, {annotationSets: 1}, int2LongAnnotationSets);
migrateCollection("individual", {"annotationSets" : {$exists: true, $ne: []}}, {annotationSets: 1}, int2LongAnnotationSets);
migrateCollection("cohort", {"annotationSets" : {$exists: true, $ne: []}}, {annotationSets: 1}, int2LongAnnotationSets);
migrateCollection("family", {"annotationSets" : {$exists: true, $ne: []}}, {annotationSets: 1}, int2LongAnnotationSets);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment