Last active
December 14, 2015 17:48
-
-
Save tts/5124316 to your computer and use it in GitHub Desktop.
Querying the Europeana SPARQL endpoint on videos related to Finnish municipalities
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
######################################################## | |
# | |
# Querying Europeana on the amount of VIDEO objects | |
# related to Finnish municipalities. | |
# | |
# Consonant gradation is not applied, | |
# i.e. only the base name forms are queried | |
# (plus endings if applicable). | |
# | |
# Tuija Sonkkila 10.3.2013 | |
# | |
######################################################## | |
library(sorvi) | |
# Read municipality borders from Land Survey Finland (MML) data | |
# (C) MML 2011 | |
sp <- LoadMML(data.id = "kunta1_p", resolution = "1_milj_Shape_etrs_shape") | |
comms <- ConvertMunicipalityNames(sp$Kunta.FI) | |
comms <- iconv(comms, from = "UTF-8", to = "ISO_8859-1") | |
###################################################### | |
# | |
# Querying the Europeana SPARQL endpoint | |
# | |
# Linked Open Data (pilot data) by Europeana | |
# http://pro.europeana.eu/web/guest/linked-open-data | |
# | |
# Creative Commons CC0 | |
# | |
###################################################### | |
library(SPARQL) | |
eu_endpoint <- "http://europeana.ontotext.com/sparql" | |
N <- length(comms) | |
res.v.df <- data.frame(c = character(N), | |
count = integer(N), | |
stringsAsFactors = FALSE) | |
for (i in 1:N) { | |
c <- comms[i] | |
euq <- paste("SELECT (COUNT(DISTINCT ?r) AS ?count) WHERE { | |
{ | |
{ | |
SELECT (?resource AS ?r) WHERE { | |
?resource <http://www.europeana.eu/schemas/edm/type> 'VIDEO' ; | |
<http://purl.org/dc/elements/1.1/language> 'fi' ; | |
<http://purl.org/dc/elements/1.1/description> ?d . | |
FILTER(regex(?d, '", c, "')) | |
} | |
} | |
} | |
UNION | |
{ | |
{ | |
SELECT (?resource AS ?r) WHERE { | |
?resource <http://www.europeana.eu/schemas/edm/type> 'VIDEO' ; | |
<http://purl.org/dc/elements/1.1/language> 'fi' ; | |
<http://purl.org/dc/elements/1.1/subject> ?s . | |
FILTER(regex(?s, '", c, "')) | |
} | |
} | |
} | |
UNION | |
{ | |
{ | |
SELECT (?resource AS ?r) WHERE { | |
?resource <http://www.europeana.eu/schemas/edm/type> 'VIDEO' ; | |
<http://purl.org/dc/elements/1.1/language> 'fi' ; | |
<http://purl.org/dc/terms/alternative> ?a . | |
FILTER(regex(?a, '", c, "')) | |
} | |
} | |
} | |
UNION | |
{ | |
{ | |
SELECT (?resource AS ?r) WHERE { | |
?resource <http://www.europeana.eu/schemas/edm/type> 'VIDEO' ; | |
<http://purl.org/dc/elements/1.1/language> 'fi' ; | |
<http://purl.org/dc/elements/1.1/title> ?t . | |
FILTER(regex(?t, '", c, "')) | |
} | |
} | |
} | |
UNION | |
{ | |
{ | |
SELECT (?resource AS ?r) WHERE { | |
?resource <http://www.europeana.eu/schemas/edm/type> 'VIDEO' ; | |
<http://purl.org/dc/elements/1.1/language> 'fi' ; | |
<http://purl.org/dc/terms/spatial> '", c, "' . | |
} | |
} | |
} | |
}", sep = "") | |
cat("Next querying ", c, " (", i , "/", N , ")\n", sep = "") | |
eures <- SPARQL(url = eu_endpoint, euq)$results | |
res.v.df$c[i] <- c | |
res.v.df$count[i] <- eures[1,1] | |
} | |
write.csv(res.v.df, file = "europeana_result_video.csv", row.names = FALSE) | |
############################### | |
# | |
# Store counts by matching | |
# with the municipality name | |
# | |
############################### | |
sp@data$video <- res.v.df$count[match(sp$Kunta.FI, res.v.df$c)] | |
# Those without videos -> 0 | |
sp[["video"]][is.na(sp[["video"]])] <- 0 | |
# Sort desc by the count column and show top 30 | |
head(res.v.df[with(res.v.df, order(-count)), ], n=30) | |
###################################### | |
# | |
# Plot only those municipalities with | |
# max 100 videos, ie exclude Helsinki | |
# | |
######################################## | |
max100 <- sp[sp@data$video <= 100, ] | |
png("europeana_fi_videos_max100.png", width = 1024, height = 768, res = 72) | |
q <- PlotShape(max100, varname, type = "oneway", | |
palette = colorRampPalette(c("white", "red", "blue"), | |
space = "Lab"), | |
main = "Europeana video items per municipality excluding Helsinki") | |
dev.off() | |
###################################### | |
# | |
# Plot only those municipalities with | |
# max 20 videos | |
# | |
######################################## | |
max100 <- sp[sp@data$video <= 100, ] | |
png("europeana_fi_videos_max20.png", width = 1024, height = 768, res = 72) | |
q <- PlotShape(max100, varname, type = "oneway", | |
palette = colorRampPalette(c("white", "red", "blue"), | |
space = "Lab"), | |
main = "Europeana video items (max 20) per municipality") | |
dev.off() | |
################################## | |
# | |
# Count of videos depending on the | |
# number of inhabitants | |
# | |
################################### | |
municipality.info <- GetMunicipalityInfo() | |
# Column 4 = Väkiluku 31.12.2011. | |
# | |
# Note that the number of municipalities diminished in 2013. | |
# However, here I use data as if this hasn't occurred. | |
# In other words, both the maps and municipality data reflect | |
# the situation in 2012 | |
sp@data$ppl <- municipality.info[ ,4][match(sp$Kunta.FI, municipality.info[ ,1])] | |
sp[["ppl"]][is.na(sp[["ppl"]])] <- 0 | |
# Which ones have ppl=0? | |
sp[["Kunta.FI"]][sp[["ppl"]]==0] | |
# Store the nr of missing inhabitants, here from Wikipedia... | |
sp@data$ppl[sp[["Kunta.FI"]]=='Nilsiä'] <- 6528 | |
sp@data$ppl[sp[["Kunta.FI"]]=='Töysä'] <- 3160 | |
sp@data$ppl[sp[["Kunta.FI"]]=='Suomenniemi'] <- 763 | |
sp@data$ppl[sp[["Kunta.FI"]]=='Nummi-Pusula'] <- 6175 | |
sp@data$ppl[sp[["Kunta.FI"]]=='Karjalohja'] <- 1474 | |
sp@data$ppl[sp[["Kunta.FI"]]=='Ristiina'] <- 4856 | |
sp@data$ppl[sp[["Kunta.FI"]]=='Kerimäki'] <- 5526 | |
sp@data$ppl[sp[["Kunta.FI"]]=='Kesälahti'] <- 2326 | |
sp@data$ppl[sp[["Kunta.FI"]]=='Punkaharju'] <- 3702 | |
sp@data$ppl[sp[["Kunta.FI"]]=='Koski Tl'] <- 2449 | |
sp@data$ppl[sp[["Kunta.FI"]]=='Kiiminki'] <- 13320 | |
sp@data$ppl[sp[["Kunta.FI"]]=='Yli-Ii'] <- 2179 | |
sp@data$ppl[sp[["Kunta.FI"]]=='Vihanti'] <- 3020 | |
sp@data$ppl[sp[["Kunta.FI"]]=='Vähäkyrö'] <- 4727 | |
sp@data$ppl[sp[["Kunta.FI"]]=='Hämeenkyrö'] <- 10529 | |
sp@data$ppl[sp[["Kunta.FI"]]=='Kiikoinen'] <- 1245 | |
sp@data$ppl[sp[["Kunta.FI"]]=='Haukipudas'] <- 19053 | |
sp@data$ppl[sp[["Kunta.FI"]]=='Oulunsalo'] <- 9897 | |
sp@data$vidper100ppl <- round( (sp@data$video / (sp@data$ppl/100)) * 100) | |
sp[["vidper100ppl"]][is.na(sp[["vidper100ppl"]])] <- 0.00 | |
varname <- "vidper100ppl" | |
int <- max(abs(sp[[varname]])) | |
png("europeana_fi_videos_per100inhab.png", width = 1024, height = 768, res = 72) | |
q <- PlotShape(sp, varname, type = "oneway", | |
main = "Europeana video items per 100 inhabitant (x 100)") | |
dev.off() | |
################################################# | |
# | |
# Plotting frequencies as in | |
# https://github.com/louhos/sorvi/wiki/Datavaalit | |
# | |
################################################# | |
library(ggplot2) | |
res.v.df <- res.v.df[rev(order(res.v.df$count)), ] | |
res.v.df$ind <- 1:nrow(res.v.df) | |
# Without Helsinki | |
res.v.nohki.df <- res.v.df[res.v.df$c != 'Helsinki', ] | |
res.v.nohki.df$ind <- 1:nrow(res.v.nohki.df) | |
n <- 40 | |
png("europeana_fi_videos_all.png", width = 1024, height = 768, res = 72) | |
pics_all <- ggplot(data = res.v.df[1:n, ], aes(x = rev(ind), y = count)) + | |
geom_text(aes(label = c), size = 4) + scale_x_continuous(limits = c(1, | |
n)) + scale_y_continuous(limits = c(0.6 * min(res.v.df[1:n, "count"]), | |
1.04 * max(res.v.df[1:n, "count"]) + 20)) + coord_flip() + ylab("Videoita") + xlab("Kunta") | |
print(pics_all) | |
dev.off() | |
png("europeana_fi_videos_nohki.png", width = 1024, height = 768, res = 72) | |
pics_nohki <- ggplot(data = res.v.nohki.df[1:n, ], aes(x = rev(ind), y = count)) + | |
geom_text(aes(label = c), size = 4) + scale_x_continuous(limits = c(1, | |
n)) + scale_y_continuous(limits = c(0.6 * min(res.v.nohki.df[1:n, "count"]), | |
1.04 * max(res.v.nohki.df[1:n, "count"]) + 20)) + coord_flip() + ylab("Videoita") + xlab("Kunta") | |
print(pics_nohki) | |
dev.off() | |
# Proportional | |
v.p.df <- data.frame(sp[["Kunta.FI"]]) | |
v.p.df$count <- sp[["vidper100ppl"]] | |
names(v.p.df) <- c("c", "count") | |
v.p.df <- v.p.df[rev(order(v.p.df$count)), ] | |
v.p.df$ind <- 1:nrow(v.p.df) | |
n <- 40 | |
png("europeana_fi_videos_prop.png", width = 1024, height = 768, res = 72) | |
pics_p <- ggplot(data = v.p.df[1:n, ], aes(x = rev(ind), y = count)) + | |
geom_text(aes(label = c), size = 4) + scale_x_continuous(limits = c(1, n)) + | |
scale_y_continuous(limits = c(0.6 * min(v.p.df[1:n, "count"]), | |
1.04 * max(v.p.df[1:n, "count"]) + 20)) + coord_flip() + ylab("Videoita") + xlab("Kunta") | |
print(pics_p) | |
dev.off() | |
################################################################# | |
# | |
# How many objects have been enriched with the GeoNameID of Finland? | |
# | |
# Group by object type and provider | |
# | |
################################################################# | |
query <- "SELECT ?type ?contrib (COUNT(DISTINCT ?object) AS ?count) | |
WHERE { | |
?euProxy <http://www.openarchives.org/ore/terms/proxyFor> ?object ; | |
<http://www.europeana.eu/schemas/edm/hasMet> <http://sws.geonames.org/660013/> ; | |
<http://www.openarchives.org/ore/terms/proxyIn> ?euAggr . | |
?euAggr <http://www.openarchives.org/ore/terms/aggregates> ?providerAggr . | |
?resourceMap <http://www.openarchives.org/ore/terms/describes> ?euAggr ; | |
<http://purl.org/dc/elements/1.1/contributor> ?contrib . | |
?providerProxy <http://www.openarchives.org/ore/terms/proxyIn> ?providerAggr ; | |
<http://www.europeana.eu/schemas/edm/type> ?type . | |
} GROUP BY ?type ?contrib | |
HAVING (?count > 1) | |
ORDER BY DESC(?count)" | |
answer <- SPARQL(url = eu_endpoint, query)$results |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment