Skip to content

Instantly share code, notes, and snippets.

@tts
Last active December 15, 2015 09:09
Construct RDF triples from the Europeana SPARQL endpoint on different AV resource types both enriched with GeoNameIDs and based on the name of the municipality
#######################################################################
#
# Construct RDF triples to an in-memory triple store
# from the Europeana SPARQL endpoint on
# different AV resource types a) enriched with GeoNameIDs and
# b) based on the name of the municipality.
#
# Save triple store as RDF/XML.
#
# For getting the names of the municipalities (object comms and Ncomms below),
# and querying DBpedia on GeoNameIDs (object geo.df below), see
# https://gist.github.com/tts/5237811
#
# More on this in
# http://tts2.blogspot.fi/2013/03/some-europeana-av-resources-related-to.html
#
# Tuija Sonkkila 25.3.2013
#
#######################################################################
library(rrdf)
eu_endpoint <- "http://europeana.ontotext.com/sparql"
bigstoreNames <- new.rdf()
for (i in 1:Ncomms) {
store <- new.rdf()
c <- comms[i]
cat("Next constructing ", c, " (", i , "/", Ncomms , ")\n", sep = "")
store <- construct.remote(eu_endpoint,
paste("CONSTRUCT { ?resource <http://www.europeana.eu/schemas/edm/type> ?type ;
<http://purl.org/dc/elements/1.1/title> ?title ;
<http://purl.org/dc/elements/1.1/spatial> '", c, "' . }
WHERE {
{
{
SELECT ?resource ?title ?type WHERE {
?resource <http://www.europeana.eu/schemas/edm/type> ?type ;
<http://purl.org/dc/elements/1.1/language> 'fi' ;
<http://purl.org/dc/elements/1.1/title> ?title ;
<http://purl.org/dc/elements/1.1/description> ?d .
FILTER ( regex(?d, '", c, "') )
FILTER ( ?type != 'TEXT' )
}
}
}
UNION
{
{
SELECT ?resource ?title ?type WHERE {
?resource <http://www.europeana.eu/schemas/edm/type> ?type ;
<http://purl.org/dc/elements/1.1/language> 'fi' ;
<http://purl.org/dc/elements/1.1/title> ?title ;
<http://purl.org/dc/elements/1.1/subject> ?s .
FILTER ( regex(?s, '", c, "') )
FILTER ( ?type != 'TEXT' )
}
}
UNION
{
{
SELECT ?resource ?title ?type WHERE {
?resource <http://www.europeana.eu/schemas/edm/type> ?type ;
<http://purl.org/dc/elements/1.1/title> ?title ;
<http://purl.org/dc/elements/1.1/language> 'fi' ;
<http://purl.org/dc/terms/alternative> ?a .
FILTER ( regex(?a, '", c, "') )
FILTER ( ?type != 'TEXT' )
}
}
}
UNION
{
{
SELECT ?resource ?title ?type WHERE {
?resource <http://www.europeana.eu/schemas/edm/type> ?type ;
<http://purl.org/dc/elements/1.1/language> 'fi' ;
<http://purl.org/dc/elements/1.1/title> ?title .
FILTER ( regex(?title, '", c, "') )
FILTER ( ?type != 'TEXT' )
}
}
}
UNION
{
{
SELECT ?resource ?title ?type WHERE {
?resource <http://www.europeana.eu/schemas/edm/type> ?type ;
<http://purl.org/dc/elements/1.1/language> 'fi' ;
<http://purl.org/dc/elements/1.1/title> ?title ;
<http://purl.org/dc/terms/spatial> '", c, "' .
FILTER ( ?type != 'TEXT' )
}
}
}
}
}", sep = ""))
bigstoreNames <- combine.rdf(store, bigstoreNames)
}
#####################################
#
# CONSTRUCT triples from Europeana
# by the GeoNameID of the municipality
#
#####################################
geoids <- geo.df[geo.df$id != 0, ]
# Excluding Rauma because it generates lots of Norwegian false positives
geoids <- geoids[geoids$c != 'Rauma',]
Ngeoid <- nrow(geoids)
bigstoreGeoids <- new.rdf()
for (i in 1:Ngeoid) {
store <- new.rdf()
c <- geoids$c[i]
cat("Next constructing ", c, " (", i , "/", Ngeoid , ")\n", sep = "")
store <- construct.remote(eu_endpoint,
paste("CONSTRUCT { ?resource <http://www.europeana.eu/schemas/edm/type> ?type ;
<http://purl.org/dc/elements/1.1/title> ?title ;
<http://purl.org/dc/elements/1.1/spatial> '", c, "' . }
WHERE {
?euProxy <http://www.openarchives.org/ore/terms/proxyFor> ?resource;
<http://www.europeana.eu/schemas/edm/hasMet> ", geoid, " ;
<http://www.openarchives.org/ore/terms/proxyIn> ?euAggr .
?euAggr <http://www.openarchives.org/ore/terms/aggregates> ?providerAggr .
?providerProxy <http://www.openarchives.org/ore/terms/proxyIn> ?providerAggr ;
<http://purl.org/dc/elements/1.1/title> ?title ;
<http://www.europeana.eu/schemas/edm/type> ?type .
FILTER ( ?type != 'TEXT' )
}", sep = ""))
bigstoreGeoids <- combine.rdf(store, bigstoreGeoids)
}
bigstoreAll <- combine.rdf(bigstoreNames, bigstoreGeoids)
# summarize.rdf(bigstoreAll)
save.rdf(bigstoreAll, "europeana_rdf_store.xml", format = "RDF/XML")
# Convert from RDF/XML to Turtle/N3 with Jena rdfcat
# http://jena.apache.org/documentation/javadoc/jena/jena/rdfcat.html
#
# $JENAROOT/bin/rdfcat -out ttl -in RDF/XML -x europeana_rdf_store.xml >europeana_rdf_store.ttl
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment