Skip to content

Instantly share code, notes, and snippets.

@egonw
Created June 30, 2018 21:37
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save egonw/16c017da5b763ecc5674547f40942d16 to your computer and use it in GitHub Desktop.
Save egonw/16c017da5b763ecc5674547f40942d16 to your computer and use it in GitHub Desktop.
Type endocannobinoids as such in Wikidata.
restAPI = "http://www.lipidmaps.org/rest/compound/lm_id/LMFA0804/all/download"
propID = "P2063"
allData = bioclipse.downloadAsFile(
restAPI, "/LipidMaps/endocannabinoids.txt"
)
sparql = """
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wd: <http://www.wikidata.org/entity/>
SELECT (substr(str(?compound),32) as ?wd) ?key ?lmid WHERE {
?compound wdt:P235 ?key ; wdt:${propID} ?lmid .
MINUS { ?compound wdt:P31 wd:Q55282178 . }
}
"""
if (bioclipse.isOnline()) {
results = rdf.sparqlRemote(
"https://query.wikidata.org/sparql", sparql
)
}
def renewFile(file) {
if (ui.fileExists(file)) ui.remove(file)
ui.newFile(file)
return file
}
mappingsFile = "/LipidMaps/endocannabinoidMappings.txt"
missingCompoundFile = "/LipidMaps/endocannabinoidMissing.txt"
// ignore certain Wikidata items, where I don't want the DSSTox ID added
ignores = new java.util.HashSet();
// ignores.add("Q37111097")
// make a map
map = new HashMap()
for (i=1;i<=results.rowCount;i++) {
rowVals = results.getRow(i)
map.put(rowVals[1], rowVals[0])
}
inchikey = ""
batchSize = 500
batchCounter = 0
mappingContent = ""
missingContent = ""
print "Saved a batch"
renewFile(mappingsFile)
renewFile(missingCompoundFile)
new File(bioclipse.fullPath("/LipidMaps/endocannabinoids.txt")).eachLine{ line ->
fields = line.split("\t")
if (fields.length > 15) {
lmid = fields[1]
inchikey = fields[15]
if (inchikey != null && inchikey.length() > 10) {
batchCounter++
if (map.containsKey(inchikey)) {
wdid = map.get(inchikey)
if (!ignores.contains(wdid)) {
mappingContent += "${wdid}\tP31\tQ55282178\tS143\tQ20968889\tS854\t\"${restAPI}\"\tS813\t+2018-06-30T00:00:00Z/11\n"
}
} else {
missingContent += "${inchikey}\n"
}
}
}
if (batchCounter >= batchSize) {
ui.append(mappingsFile, mappingContent)
ui.append(missingCompoundFile, missingContent)
batchCounter = 0
mappingContent = ""
missingContent = ""
print "."
}
}
ui.append(mappingsFile, mappingContent)
ui.append(missingCompoundFile, missingContent)
println "\n"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment