Skip to content

Instantly share code, notes, and snippets.

@tts
Last active December 15, 2015 09:09
Show Gist options
  • Save tts/5236036 to your computer and use it in GitHub Desktop.
Save tts/5236036 to your computer and use it in GitHub Desktop.
Construct RDF triples from the Europeana SPARQL endpoint on different AV resource types both enriched with GeoNameIDs and based on the name of the municipality
#######################################################################
#
# Construct RDF triples to an in-memory triple store
# from the Europeana SPARQL endpoint on
# different AV resource types a) enriched with GeoNameIDs and
# b) based on the name of the municipality.
#
# Save triple store as RDF/XML.
#
# For getting the names of the municipalities (object comms and Ncomms below),
# and querying DBpedia on GeoNameIDs (object geo.df below), see
# https://gist.github.com/tts/5237811
#
# More on this in
# http://tts2.blogspot.fi/2013/03/some-europeana-av-resources-related-to.html
#
# Tuija Sonkkila 25.3.2013
#
#######################################################################
library(rrdf)
eu_endpoint <- "http://europeana.ontotext.com/sparql"
bigstoreNames <- new.rdf()
for (i in 1:Ncomms) {
store <- new.rdf()
c <- comms[i]
cat("Next constructing ", c, " (", i , "/", Ncomms , ")\n", sep = "")
store <- construct.remote(eu_endpoint,
paste("CONSTRUCT { ?resource <http://www.europeana.eu/schemas/edm/type> ?type ;
<http://purl.org/dc/elements/1.1/title> ?title ;
<http://purl.org/dc/elements/1.1/spatial> '", c, "' . }
WHERE {
{
{
SELECT ?resource ?title ?type WHERE {
?resource <http://www.europeana.eu/schemas/edm/type> ?type ;
<http://purl.org/dc/elements/1.1/language> 'fi' ;
<http://purl.org/dc/elements/1.1/title> ?title ;
<http://purl.org/dc/elements/1.1/description> ?d .
FILTER ( regex(?d, '", c, "') )
FILTER ( ?type != 'TEXT' )
}
}
}
UNION
{
{
SELECT ?resource ?title ?type WHERE {
?resource <http://www.europeana.eu/schemas/edm/type> ?type ;
<http://purl.org/dc/elements/1.1/language> 'fi' ;
<http://purl.org/dc/elements/1.1/title> ?title ;
<http://purl.org/dc/elements/1.1/subject> ?s .
FILTER ( regex(?s, '", c, "') )
FILTER ( ?type != 'TEXT' )
}
}
UNION
{
{
SELECT ?resource ?title ?type WHERE {
?resource <http://www.europeana.eu/schemas/edm/type> ?type ;
<http://purl.org/dc/elements/1.1/title> ?title ;
<http://purl.org/dc/elements/1.1/language> 'fi' ;
<http://purl.org/dc/terms/alternative> ?a .
FILTER ( regex(?a, '", c, "') )
FILTER ( ?type != 'TEXT' )
}
}
}
UNION
{
{
SELECT ?resource ?title ?type WHERE {
?resource <http://www.europeana.eu/schemas/edm/type> ?type ;
<http://purl.org/dc/elements/1.1/language> 'fi' ;
<http://purl.org/dc/elements/1.1/title> ?title .
FILTER ( regex(?title, '", c, "') )
FILTER ( ?type != 'TEXT' )
}
}
}
UNION
{
{
SELECT ?resource ?title ?type WHERE {
?resource <http://www.europeana.eu/schemas/edm/type> ?type ;
<http://purl.org/dc/elements/1.1/language> 'fi' ;
<http://purl.org/dc/elements/1.1/title> ?title ;
<http://purl.org/dc/terms/spatial> '", c, "' .
FILTER ( ?type != 'TEXT' )
}
}
}
}
}", sep = ""))
bigstoreNames <- combine.rdf(store, bigstoreNames)
}
#####################################
#
# CONSTRUCT triples from Europeana
# by the GeoNameID of the municipality
#
#####################################
geoids <- geo.df[geo.df$id != 0, ]
# Excluding Rauma because it generates lots of Norwegian false positives
geoids <- geoids[geoids$c != 'Rauma',]
Ngeoid <- nrow(geoids)
bigstoreGeoids <- new.rdf()
for (i in 1:Ngeoid) {
store <- new.rdf()
c <- geoids$c[i]
cat("Next constructing ", c, " (", i , "/", Ngeoid , ")\n", sep = "")
store <- construct.remote(eu_endpoint,
paste("CONSTRUCT { ?resource <http://www.europeana.eu/schemas/edm/type> ?type ;
<http://purl.org/dc/elements/1.1/title> ?title ;
<http://purl.org/dc/elements/1.1/spatial> '", c, "' . }
WHERE {
?euProxy <http://www.openarchives.org/ore/terms/proxyFor> ?resource;
<http://www.europeana.eu/schemas/edm/hasMet> ", geoid, " ;
<http://www.openarchives.org/ore/terms/proxyIn> ?euAggr .
?euAggr <http://www.openarchives.org/ore/terms/aggregates> ?providerAggr .
?providerProxy <http://www.openarchives.org/ore/terms/proxyIn> ?providerAggr ;
<http://purl.org/dc/elements/1.1/title> ?title ;
<http://www.europeana.eu/schemas/edm/type> ?type .
FILTER ( ?type != 'TEXT' )
}", sep = ""))
bigstoreGeoids <- combine.rdf(store, bigstoreGeoids)
}
bigstoreAll <- combine.rdf(bigstoreNames, bigstoreGeoids)
# summarize.rdf(bigstoreAll)
save.rdf(bigstoreAll, "europeana_rdf_store.xml", format = "RDF/XML")
# Convert from RDF/XML to Turtle/N3 with Jena rdfcat
# http://jena.apache.org/documentation/javadoc/jena/jena/rdfcat.html
#
# $JENAROOT/bin/rdfcat -out ttl -in RDF/XML -x europeana_rdf_store.xml >europeana_rdf_store.ttl
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment