Last active
December 15, 2015 09:09
-
-
Save tts/5236036 to your computer and use it in GitHub Desktop.
Construct RDF triples from the Europeana SPARQL endpoint on different AV resource types both enriched with GeoNameIDs and based on the name of the municipality
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
####################################################################### | |
# | |
# Construct RDF triples to an in-memory triple store | |
# from the Europeana SPARQL endpoint on | |
# different AV resource types a) enriched with GeoNameIDs and | |
# b) based on the name of the municipality. | |
# | |
# Save triple store as RDF/XML. | |
# | |
# For getting the names of the municipalities (object comms and Ncomms below), | |
# and querying DBpedia on GeoNameIDs (object geo.df below), see | |
# https://gist.github.com/tts/5237811 | |
# | |
# http://tts2.blogspot.fi/2013/03/some-europeana-av-resources-related-to.html | |
# | |
# Tuija Sonkkila 25.3.2013 | |
# | |
####################################################################### | |
library(rrdf) | |
eu_endpoint <- "http://europeana.ontotext.com/sparql" | |
bigstoreNames <- new.rdf() | |
for (i in 1:Ncomms) { | |
store <- new.rdf() | |
c <- comms[i] | |
cat("Next constructing ", c, " (", i , "/", Ncomms , ")\n", sep = "") | |
store <- construct.remote(eu_endpoint, | |
paste("CONSTRUCT { ?resource <http://www.europeana.eu/schemas/edm/type> ?type ; | |
<http://purl.org/dc/elements/1.1/title> ?title ; | |
<http://purl.org/dc/elements/1.1/spatial> '", c, "' . } | |
WHERE { | |
{ | |
{ | |
SELECT ?resource ?title ?type WHERE { | |
?resource <http://www.europeana.eu/schemas/edm/type> ?type ; | |
<http://purl.org/dc/elements/1.1/language> 'fi' ; | |
<http://purl.org/dc/elements/1.1/title> ?title ; | |
<http://purl.org/dc/elements/1.1/description> ?d . | |
FILTER ( regex(?d, '", c, "') ) | |
FILTER ( ?type != 'TEXT' ) | |
} | |
} | |
} | |
UNION | |
{ | |
{ | |
SELECT ?resource ?title ?type WHERE { | |
?resource <http://www.europeana.eu/schemas/edm/type> ?type ; | |
<http://purl.org/dc/elements/1.1/language> 'fi' ; | |
<http://purl.org/dc/elements/1.1/title> ?title ; | |
<http://purl.org/dc/elements/1.1/subject> ?s . | |
FILTER ( regex(?s, '", c, "') ) | |
FILTER ( ?type != 'TEXT' ) | |
} | |
} | |
UNION | |
{ | |
{ | |
SELECT ?resource ?title ?type WHERE { | |
?resource <http://www.europeana.eu/schemas/edm/type> ?type ; | |
<http://purl.org/dc/elements/1.1/title> ?title ; | |
<http://purl.org/dc/elements/1.1/language> 'fi' ; | |
<http://purl.org/dc/terms/alternative> ?a . | |
FILTER ( regex(?a, '", c, "') ) | |
FILTER ( ?type != 'TEXT' ) | |
} | |
} | |
} | |
UNION | |
{ | |
{ | |
SELECT ?resource ?title ?type WHERE { | |
?resource <http://www.europeana.eu/schemas/edm/type> ?type ; | |
<http://purl.org/dc/elements/1.1/language> 'fi' ; | |
<http://purl.org/dc/elements/1.1/title> ?title . | |
FILTER ( regex(?title, '", c, "') ) | |
FILTER ( ?type != 'TEXT' ) | |
} | |
} | |
} | |
UNION | |
{ | |
{ | |
SELECT ?resource ?title ?type WHERE { | |
?resource <http://www.europeana.eu/schemas/edm/type> ?type ; | |
<http://purl.org/dc/elements/1.1/language> 'fi' ; | |
<http://purl.org/dc/elements/1.1/title> ?title ; | |
<http://purl.org/dc/terms/spatial> '", c, "' . | |
FILTER ( ?type != 'TEXT' ) | |
} | |
} | |
} | |
} | |
}", sep = "")) | |
bigstoreNames <- combine.rdf(store, bigstoreNames) | |
} | |
##################################### | |
# | |
# CONSTRUCT triples from Europeana | |
# by the GeoNameID of the municipality | |
# | |
##################################### | |
geoids <- geo.df[geo.df$id != 0, ] | |
# Excluding Rauma because it generates lots of Norwegian false positives | |
geoids <- geoids[geoids$c != 'Rauma',] | |
Ngeoid <- nrow(geoids) | |
bigstoreGeoids <- new.rdf() | |
for (i in 1:Ngeoid) { | |
store <- new.rdf() | |
c <- geoids$c[i] | |
cat("Next constructing ", c, " (", i , "/", Ngeoid , ")\n", sep = "") | |
store <- construct.remote(eu_endpoint, | |
paste("CONSTRUCT { ?resource <http://www.europeana.eu/schemas/edm/type> ?type ; | |
<http://purl.org/dc/elements/1.1/title> ?title ; | |
<http://purl.org/dc/elements/1.1/spatial> '", c, "' . } | |
WHERE { | |
?euProxy <http://www.openarchives.org/ore/terms/proxyFor> ?resource; | |
<http://www.europeana.eu/schemas/edm/hasMet> ", geoid, " ; | |
<http://www.openarchives.org/ore/terms/proxyIn> ?euAggr . | |
?euAggr <http://www.openarchives.org/ore/terms/aggregates> ?providerAggr . | |
?providerProxy <http://www.openarchives.org/ore/terms/proxyIn> ?providerAggr ; | |
<http://purl.org/dc/elements/1.1/title> ?title ; | |
<http://www.europeana.eu/schemas/edm/type> ?type . | |
FILTER ( ?type != 'TEXT' ) | |
}", sep = "")) | |
bigstoreGeoids <- combine.rdf(store, bigstoreGeoids) | |
} | |
bigstoreAll <- combine.rdf(bigstoreNames, bigstoreGeoids) | |
# summarize.rdf(bigstoreAll) | |
save.rdf(bigstoreAll, "europeana_rdf_store.xml", format = "RDF/XML") | |
# Convert from RDF/XML to Turtle/N3 with Jena rdfcat | |
# http://jena.apache.org/documentation/javadoc/jena/jena/rdfcat.html | |
# | |
# $JENAROOT/bin/rdfcat -out ttl -in RDF/XML -x europeana_rdf_store.xml >europeana_rdf_store.ttl |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment