Skip to content

Instantly share code, notes, and snippets.

@peterk
Created October 27, 2015 14:04
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save peterk/052aa53e96d5144dd53a to your computer and use it in GitHub Desktop.
Save peterk/052aa53e96d5144dd53a to your computer and use it in GitHub Desktop.
Map local authority records to libris URI:s
@Grab('com.github.albaker:GroovySparql:0.9.0')
import groovy.sparql.*
@Grab('commons-lang#commons-lang;2.4')
import org.apache.commons.lang.WordUtils
import groovy.json.StringEscapeUtils
// Tab separated file with ids and names to look up in Libris.
// Example line format:
// 20384204 Strindberg, August, 1849-1912
//
fname = 'cleaned.txt'
// Libris sparql endpoint
spep = new Sparql(endpoint:"http://libris.kb.se/sparql")
/**
* Tries to find a related libris URI.
*
* @param name Name to look for e.g. "Strindberg, August, 1849-1912"
* @return a list of matching URI:s
*/
def lookup(name) {
result = []
name = StringEscapeUtils.escapeJavaScript(WordUtils.capitalizeFully(name))
query = """
PREFIX owl: <http://www.w3.org/2002/07/owl#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX dbpedia: <http://dbpedia.org/ontology/>
PREFIX rdfs: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
select ?s {
?s a foaf:Person .
?s foaf:name "${name}" .
}
"""
spep.each query, {
result << "${s}"
}
return result
}
File csv = new File(fname)
lineno = 1
skipped = 0
csv.withReader { reader ->
while ((line = reader.readLine())!=null) {
l = line.split("\t")
if (l[1].size() < 50 && !l[1].contains("/") && !l[1].contains("\"")) {
librisUri = lookup(l[1])
if (librisUri != []) {
println "${l[0]}\t${l[1]}\t${librisUri[0]}"
}
} else {
skipped++
}
lineno++
}
}
//println "Checked ${lineno} names."
//println "Skipped ${skipped} lines."
//println "Matched ${lineno-skipped} lines."
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment