Last active
September 15, 2017 12:11
-
-
Save pfurio/2ca0cb2da46eac9e309101066f8758f5 to your computer and use it in GitHub Desktop.
OpenCGA migration scripts to move from release 1.1.x to 1.2.0
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 1. Load the function from https://gist.github.com/pfurio/f7cd90af08e0073699f0beeeef1958ba | |
// 2. Load all the scripts present in the gist in the CATALOG database |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Remove sessions array from every user entry. #618 | |
db.user.update({}, {$unset: {sessions: ""}}, {multi: 1}); | |
db.metadata.update({}, {$unset: {"admin.sessions": ""}, $set: {"admin.secretKey": "dummy", "admin.algorithm": "HS256"}}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Move list of longs to list of object where needed. #621 | |
function listLong2ListObject(originalList) { | |
var newList = []; | |
for (var i in originalList) { | |
if (originalList[i]) | |
newList.push({ | |
"id": NumberLong(originalList[i]) | |
}); | |
} | |
return newList; | |
} | |
// Rename sampleIds for samples in all files and move list<longs> to list<objects> | |
db.file.update({}, {$rename: {"sampleIds": "samples"}}, {multi: 1}); | |
migrateCollection("file", {"samples" : { $exists: true, $ne: [] } }, {samples: 1}, function(bulk, doc) { | |
if (typeof doc.samples[0] !== "number") { | |
try { | |
doc.samples[0].toNumber(); // If it works, it is because it is a NumberLong, otherwise, it will be already a converted object | |
} catch(error) { | |
return; // Already changed to object | |
} | |
} | |
var samples = listLong2ListObject(doc.samples); | |
bulk.find({"_id": doc._id}).updateOne({"$set": {"samples": samples}}); | |
}); | |
// Move list<long> to list<object> from input and output fields | |
migrateCollection("job", | |
{$or: [{"input" : {$exists: true, $ne: []}}, {"output" : {$exists: true, $ne: []}}]}, | |
{input: 1, output: 1}, function(bulk, doc) { | |
if (doc.input.length > 0) { | |
if (typeof doc.input[0] !== "number") { | |
try { | |
doc.input[0].toNumber(); // If it works, it is because it is a NumberLong, otherwise, it will be already a converted object | |
} catch(error) { | |
return; // Already changed to object | |
} | |
} | |
} else if (doc.output.length > 0) { | |
if (typeof doc.input[0] !== "number") { | |
try { | |
doc.input[0].toNumber(); // If it works, it is because it is a NumberLong, otherwise, it will be already a converted object | |
} catch(error) { | |
return; // Already changed to object | |
} | |
} | |
} else { | |
return; | |
} | |
var input = listLong2ListObject(doc.input); | |
var output = listLong2ListObject(doc.output); | |
bulk.find({"_id": doc._id}).updateOne({"$set": {"input": input, "output": output}}); | |
}); | |
// Move list<long> to list<object> from samples in cohorts | |
migrateCollection("cohort", {"samples" : { $exists: true, $ne: [] } }, {samples: 1}, function(bulk, doc) { | |
if (typeof doc.samples[0] !== "number") { | |
try { | |
doc.samples[0].toNumber(); // If it works, it is because it is a NumberLong, otherwise, it will be already a converted object | |
} catch(error) { | |
return; // Already changed to object | |
} | |
} | |
var samples = listLong2ListObject(doc.samples); | |
bulk.find({"_id": doc._id}).updateOne({"$set": {"samples": samples}}); | |
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Migration to support tickets #632 and #642 | |
// An array of _acl will be created based on the actual permissions in acl (#632) | |
// A new @members group will be created containing all the existing users in the studies (#642) | |
function getUser(user) { | |
if (user === "anonymous") { | |
return "*"; | |
} else { | |
return user; | |
} | |
} | |
function buildNewPermissions(aclEntryList) { | |
var permissions = []; | |
for (var i in aclEntryList) { | |
var acl = aclEntryList[i]; | |
if (acl.permissions.length > 0) { | |
for (var j in acl.permissions) { | |
permissions.push(getUser(acl.member) + "_" + acl.permissions[j]); | |
} | |
} | |
permissions.push(getUser(acl.member) + "_NONE"); | |
} | |
return permissions; | |
} | |
// We will create a new internal group called @members. | |
migrateCollection("study", {_acl: {$exists: false}}, {acl: 1, groups: 1}, function(bulk, doc) { | |
var acl = buildNewPermissions(doc.acl); | |
// We will obtain all the users with any permission in the study | |
var users = new Set(); | |
// Look for users in groups | |
for (var i in doc.groups) { | |
if (doc.groups[i].name === "@members") { | |
// TODO: We need to take this group out. At the moment, we will assume that group does not exist. | |
throw "A group called @members has been found in study " + doc._id + ". Please, remove that group before running this script" | |
+ " as that group name is going to be used for internal purposes."; | |
} | |
for (var j in doc.groups[i].userIds) { | |
// Add all the users belonging to the group | |
users.add(getUser(doc.groups[i].userIds[j])); | |
} | |
} | |
// Look for users with permissions | |
for (var i in doc.acl) { | |
if (!doc.acl[i].member.startsWith("@")) { | |
users.add(getUser(doc.acl[i].member)); | |
} | |
} | |
// Add new group members containing all the users registered in the study | |
doc.groups.push({ | |
"name": "@members", | |
"userIds": Array.from(users), | |
"syncedFrom": null | |
}); | |
bulk.find({"_id": doc._id}).updateOne({"$set": {"_acl": acl, "groups": doc.groups}}); | |
}); | |
migrateCollection("cohort", {_acl: {$exists: false}}, {acl: 1}, function(bulk, doc) { | |
var acl = buildNewPermissions(doc.acl); | |
bulk.find({"_id": doc._id}).updateOne({"$set": {"_acl": acl}}); | |
}); | |
migrateCollection("family", {_acl: {$exists: false}}, {acl: 1}, function(bulk, doc) { | |
var acl = buildNewPermissions(doc.acl); | |
bulk.find({"_id": doc._id}).updateOne({"$set": {"_acl": acl}}); | |
}); | |
migrateCollection("file", {_acl: {$exists: false}}, {acl: 1}, function(bulk, doc) { | |
var acl = buildNewPermissions(doc.acl); | |
bulk.find({"_id": doc._id}).updateOne({"$set": {"_acl": acl}}); | |
}); | |
migrateCollection("individual", {_acl: {$exists: false}}, {acl: 1}, function(bulk, doc) { | |
var acl = buildNewPermissions(doc.acl); | |
bulk.find({"_id": doc._id}).updateOne({"$set": {"_acl": acl}}); | |
}); | |
migrateCollection("job", {_acl: {$exists: false}}, {acl: 1}, function(bulk, doc) { | |
var acl = buildNewPermissions(doc.acl); | |
bulk.find({"_id": doc._id}).updateOne({"$set": {"_acl": acl}}); | |
}); | |
migrateCollection("panel", {_acl: {$exists: false}}, {acl: 1}, function(bulk, doc) { | |
var acl = buildNewPermissions(doc.acl); | |
bulk.find({"_id": doc._id}).updateOne({"$set": {"_acl": acl}}); | |
}); | |
migrateCollection("sample", {_acl: {$exists: false}}, {acl: 1}, function(bulk, doc) { | |
var acl = buildNewPermissions(doc.acl); | |
bulk.find({"_id": doc._id}).updateOne({"$set": {"_acl": acl}}); | |
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Add _ownerId to every study | |
var users = db.user.find({projects: {$exists: true, $ne: []}}, {"projects.id": 1}); | |
for (var i = 0; i < users.length(); i++) { | |
var user = users[i]; | |
var projectIds = []; | |
for (var j = 0; j < user.projects.length; j++) { | |
projectIds.push(user.projects[j].id); | |
} | |
// Add _ownerId to all the studies belonging to these projects | |
db.study.update({"_projectId": {$in: projectIds}}, {$set: {_ownerId: user["_id"]}}, {multi: 1}); | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Add release to all the entries. #616 | |
// Add release to projects | |
migrateCollection("user", {projects: {$exists: true, $ne: []}}, {projects: 1}, function(bulk, doc) { | |
var projects = []; | |
var changed = false; | |
for (var j = 0; j < doc.projects.length; j++) { | |
var project = doc.projects[j]; | |
if (!project.hasOwnProperty("currentRelease")) { | |
project["currentRelease"] = 1; | |
changed = true; | |
} | |
projects.push(project); | |
} | |
if (changed) { | |
bulk.find({"_id": doc._id}).updateOne({$set: {projects: projects}}); | |
} | |
}); | |
// Add release to studies, files, samples, cohorts, individuals, jobs, panels, | |
db.study.update({}, {$set: {release: 1}}, {multi: 1}); | |
db.cohort.update({}, {$set: {release: 1}}, {multi: 1}); | |
db.family.update({}, {$set: {release: 1}}, {multi: 1}); | |
db.file.update({}, {$set: {release: 1}}, {multi: 1}); | |
db.individual.update({}, {$set: {release: 1}}, {multi: 1}); | |
db.job.update({}, {$set: {release: 1}}, {multi: 1}); | |
db.panel.update({}, {$set: {release: 1}}, {multi: 1}); | |
db.sample.update({}, {$set: {release: 1}}, {multi: 1}); | |
// Add release to variable sets | |
migrateCollection("study", {variableSets: {$exists: true, $ne: []}}, {variableSets: 1}, function(bulk, doc) { | |
var variableSets = []; | |
var changed = false; | |
for (var j = 0; j < doc.variableSets.length; j++) { | |
var varSet = doc.variableSets[j]; | |
if (!varSet.hasOwnProperty("release")) { | |
varSet["release"] = 1; | |
varSet["confidential"] = false; | |
changed = true; | |
} | |
variableSets.push(varSet); | |
} | |
if (changed) { | |
bulk.find({"_id": doc._id}).updateOne({$set: {variableSets: variableSets}}); | |
} | |
}); | |
// Add release to annotation sets | |
function addReleaseToAnnotationSets(bulk, doc) { | |
var annotationSets = []; | |
var changed = false; | |
for (var j = 0; j < doc.annotationSets.length; j++) { | |
var annSet = doc.annotationSets[j]; | |
if (!annSet.hasOwnProperty("release")) { | |
annSet["release"] = 1; | |
changed = true; | |
} | |
annotationSets.push(annSet); | |
} | |
if (changed) { | |
bulk.find({"_id": doc._id}).updateOne({$set: {annotationSets: annotationSets}}); | |
} | |
} | |
migrateCollection("sample", {annotationSets: {$exists: true, $ne: []}}, {annotationSets: 1}, addReleaseToAnnotationSets); | |
migrateCollection("cohort", {annotationSets: {$exists: true, $ne: []}}, {annotationSets: 1}, addReleaseToAnnotationSets); | |
migrateCollection("individual", {annotationSets: {$exists: true, $ne: []}}, {annotationSets: 1}, addReleaseToAnnotationSets); | |
migrateCollection("family", {annotationSets: {$exists: true, $ne: []}}, {annotationSets: 1}, addReleaseToAnnotationSets); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
db.study.createIndex({"_acl": 1}, {"background": true}); | |
db.cohort.createIndex({"_acl": 1}, {"background": true}); | |
db.family.createIndex({"_acl": 1}, {"background": true}); | |
db.file.createIndex({"_acl": 1}, {"background": true}); | |
db.individual.createIndex({"_acl": 1}, {"background": true}); | |
db.job.createIndex({"_acl": 1}, {"background": true}); | |
db.sample.createIndex({"_acl": 1}, {"background": true}); | |
db.clinical.createIndex({"_acl": 1}, {"background": true}); | |
db.file.dropIndex({"sampleIds": 1}); | |
db.file.createIndex({"samples.id": 1}, {"background": true}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Variable set ids were not being stored as long but as integers | |
migrateCollection("study", {"variableSets" : {$exists: true, $ne: []}}, {variableSets: 1}, function(bulk, doc) { | |
if (typeof doc.variableSets[0].id !== "number") { | |
// It is already converted | |
return; | |
} | |
var variableSets = []; | |
doc.variableSets.forEach(function(variableSet) { | |
variableSet.id = NumberLong(variableSet.id); | |
variableSets.push(variableSet); | |
}); | |
bulk.find({"_id": doc._id}).updateOne({"$set": {"variableSets": variableSets}}); | |
}); | |
function int2LongAnnotationSets(bulk, doc) { | |
if (typeof doc.annotationSets[0].variableSetId !== "number") { | |
// It is already converted | |
return; | |
} | |
var annotationSets = []; | |
doc.annotationSets.forEach(function(annotationSet) { | |
annotationSet.variableSetId = NumberLong(annotationSet.variableSetId); | |
annotationSets.push(annotationSet); | |
}); | |
bulk.find({"_id": doc._id}).updateOne({"$set": {"annotationSets": annotationSets}}); | |
} | |
migrateCollection("sample", {"annotationSets" : {$exists: true, $ne: []}}, {annotationSets: 1}, int2LongAnnotationSets); | |
migrateCollection("individual", {"annotationSets" : {$exists: true, $ne: []}}, {annotationSets: 1}, int2LongAnnotationSets); | |
migrateCollection("cohort", {"annotationSets" : {$exists: true, $ne: []}}, {annotationSets: 1}, int2LongAnnotationSets); | |
migrateCollection("family", {"annotationSets" : {$exists: true, $ne: []}}, {annotationSets: 1}, int2LongAnnotationSets); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment