Instantly share code, notes, and snippets.

Embed
What would you like to do?
Script that uses SPARQL to detect HMDB identifiers in the old format and creates QuickStatements to replace them with the same identifier in the new scheme
sparql = """
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
SELECT (substr(str(?cmp),32) as ?wd) ?hmdb WHERE {
?cmp wdt:P2057 ?hmdb .
FILTER (STRLEN(STR(?hmdb)) = 9)
} ORDER BY ?hmdb LIMIT 500
"""
if (bioclipse.isOnline()) {
results = rdf.sparqlRemote(
"https://query.wikidata.org/sparql", sparql
)
}
def renewFile(file) {
if (ui.fileExists(file)) ui.remove(file)
ui.newFile(file)
return file
}
mappingsFile = "/HMDB/patternUpdates.txt"
renewFile(mappingsFile)
mappingContent = ""
for (i=1;i<=results.rowCount;i++) {
rowVals = results.getRow(i)
wdid = rowVals[0]
hmdb = rowVals[1]
mappingContent += "-${wdid}\tP2057\t\"${hmdb}\"\n"
hmdb = hmdb.replace("HMDB", "HMDB00")
mappingContent += "${wdid}\tP2057\t\"${hmdb}\"\n"
}
ui.append(mappingsFile, mappingContent)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment