Last active
October 2, 2019 12:19
-
-
Save fintanmm/ebcd2ce615867c6dd4e2532e258e1f8e to your computer and use it in GitHub Desktop.
Fixing metadata within Alfresco.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/** | |
* Summary. Attempts to fix metadata that is in the Alfresco Repo. | |
* @param {object} args | |
*/ | |
const invalidArg = function (message) { | |
status.code = 400 | |
status.message = message | |
status.redirect = true | |
} | |
// check that search siteId has been provided | |
if (args.siteId === undefined || !siteService.hasSite(args.siteId)) { | |
invalidArg('A valid siteId has not been provided.') | |
} else { | |
importClass(java.util.concurrent.ArrayBlockingQueue) | |
importClass(java.util.Arrays) | |
importClass(java.util.concurrent.ExecutorService) | |
importClass(java.util.concurrent.Executors) | |
importClass(java.lang.Thread) | |
importClass(org.alfresco.repo.security.authentication.AuthenticationUtil) | |
const preformSearch = function (forFiles) { | |
if (forFiles.length < 1) { | |
return [] | |
} | |
// logger.warn("Search query is " + forFiles) | |
return search.query({ | |
query: forFiles, | |
store: 'workspace://SpacesStore', | |
language: 'fts-alfresco' | |
}) | |
} | |
const nodeRefRegex = /^workspace:\/\/SpacesStore\/[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$/; | |
if (nodeRefRegex.exec(args.metadataFile) !== null) { | |
var listOfNodes = utils.getNodeFromString(args.metadataFile) | |
var contents = listOfNodes.content.toString() | |
if (args.debug === 'true') { | |
logger.warn(listOfNodes.exists()) | |
} | |
} else if (!args.metadataFile.indexOf('json')) { | |
invalidArg('Metadata file is not in JSON format') | |
} else { | |
var listOfNodes = preformSearch('cm:name:' + args.metadataFile) | |
var contents = listOfNodes[0].content.toString() | |
} | |
const parsedJSON = JSON.parse(contents) | |
const fileNameAsKey = {} | |
if ('metadata' in parsedJSON) { | |
parsedJSON.metadata.filter(function (el) { | |
return el != null || el !== undefined | |
}).map(function (p) { | |
if ('SourceFile' in p) { | |
fileNameAsKey['cm:name:' + p.SourceFile + '.pdf'] = p | |
} else { | |
logger.warn("No SourceFile key found.") | |
} | |
}) | |
var listOfFilesNames = new java.util.concurrent.ArrayBlockingQueue(parsedJSON.metadata.length) | |
listOfFilesNames.addAll(Object.keys(fileNameAsKey)) | |
const fixMetadata = function (listOfFilesNames) { | |
var searchForFiles = new java.util.concurrent.ArrayBlockingQueue(5000) | |
listOfFilesNames.drainTo(searchForFiles, 5000) | |
var query = searchForFiles.toArray().join(' or ') | |
preformSearch(query) | |
.map(function (f) { | |
// Set the fileName as the key to the hashmap for easy manipulation | |
var fileName = f.properties['cm:name'] | |
var metadataFileName = fileNameAsKey['cm:name:' + fileName] | |
if (args.debug === 'true') { | |
logger.warn('File name is ' + fileName) | |
logger.warn('Aspects are ' + args.aspects) | |
logger.warn('Path to file ' + f.displayPath) | |
logger.warn('NodeRef ' + f.nodeRef) | |
} | |
args.aspects.split(',').map(function (asp) { | |
if (!f.hasAspect(asp)) { | |
// eslint-disable-next-line no-unused-expressions | |
f.addAspect(asp) | |
if (args.dryrun !== 'true') { | |
f.save() | |
} | |
} | |
}) | |
if (metadataFileName !== undefined) { | |
// logger.warn("Metadata file name is " + metadataFileName.SourceFile) | |
var titles = Object.keys(metadataFileName) | |
titles.map(function (t) { | |
if (t !== 'SourceFile') { | |
f.properties[t] = metadataFileName[t] | |
if (args.debug) { | |
logger.warn(t + ': ' + metadataFileName[t]) | |
} | |
} | |
}) | |
if (args.dryrun !== 'true') { | |
f.save() | |
} | |
} | |
// [{http://www.alfresco.org/model/content/1.0}versionable, {http://www.alfresco.org/model/content/1.0}titled, {http://www.alfresco.org/model/content/1.0}auditable, {http://www.alfresco.org/model/system/1.0}referenceable, {http://www.alfresco.org/model/system/1.0}localized, {http://www.alfresco.org/model/content/1.0}author] | |
// cm:versionable,cm:titled,cm:auditable,sys:referenceable,sys:localized,cm:author | |
}) | |
} | |
// } | |
const threadpool = Executors.newFixedThreadPool(13) | |
logger.warn('Submitting Task ...') | |
// const future = threadpool.submit(fixMetadata) | |
while (!listOfFilesNames.isEmpty()) { | |
// var future = threadpool.submit(fixMetadata) | |
// if (!future.isDone()){ | |
// logger.warn("Task is not completed yet....") | |
// } else { | |
// logger.warn("Task is completed, let's check result " + future.get()) | |
// } | |
fixMetadata(listOfFilesNames) | |
logger.warn('There are ' + listOfFilesNames.size() + ' documents remaining') | |
} | |
threadpool.shutdown() | |
} else { | |
invalidArg('Metadata is not in the correct format.') | |
logger.warn('Metadata is not in the correct format.') | |
} | |
} | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment