Skip to content

Instantly share code, notes, and snippets.

@egonw
Created January 28, 2017 11:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save egonw/7c8a2bdba6d9d4b66f4efe0d9a9dbcb3 to your computer and use it in GitHub Desktop.
Save egonw/7c8a2bdba6d9d4b66f4efe0d9a9dbcb3 to your computer and use it in GitHub Desktop.
Script that checks the status of HMDB identifiers
urlPrefix = "http://www.hmdb.ca/metabolites/"
secondaries = new HashMap()
outdated = new ArrayList()
def renewFile(file) {
if (ui.fileExists(file)) ui.remove(file)
ui.newFile(file)
return file
}
outdatedFile = "/HMDB/outdated-10k.txt"
renewFile(outdatedFile)
secondariesFile = "/HMDB/secondaries-10k.txt"
renewFile(secondariesFile)
for (int i=1; i<=10000; i++) {
hmdbid = "HMDB" + String.format("%05d", i);
println "checking $hmdbid ..."
try {
pageContent = bioclipse.download(urlPrefix + hmdbid)
if (!secondaries.containsKey(hmdbid)) { // skip, already did this one
contentModel = jsoup.parseString(pageContent)
tableContent = jsoup.select(contentModel, "table.content-table th")
if (tableContent == null) {
// Outdated HMDB ID ?
println "Oudated HMDB? $hmdbid"
outdated.add(hmdbid)
} else {
// OK, valid HMDB ID, it seems
for (element in tableContent) {
if ("Secondary Accession Numbers" == element.ownText()) {
for (sibling in element.siblingElements()) {
for (secid in jsoup.select(sibling, "ul li")) {
secondaries.put(secid.ownText(), hmdbid)
println secid.ownText() + ", $hmdbid"
ui.append(secondariesFile, secid.ownText() + ", ${hmdbid}\n")
}
}
}
}
}
}
} catch (Exception exception) {
println "Oudated HMDB? $hmdbid (${exception.message})"
outdated.add(hmdbid)
ui.append(outdatedFile, hmdbid + "\n")
}
sleep(500)
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment