Skip to content

Instantly share code, notes, and snippets.

@tts
Last active December 14, 2015 17:48
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save tts/5124316 to your computer and use it in GitHub Desktop.
Save tts/5124316 to your computer and use it in GitHub Desktop.
Querying the Europeana SPARQL endpoint on videos related to Finnish municipalities
########################################################
#
# Querying Europeana on the amount of VIDEO objects
# related to Finnish municipalities.
#
# Consonant gradation is not applied,
# i.e. only the base name forms are queried
# (plus endings if applicable).
#
# Tuija Sonkkila 10.3.2013
#
########################################################
library(sorvi)
# Read municipality borders from Land Survey Finland (MML) data
# (C) MML 2011
sp <- LoadMML(data.id = "kunta1_p", resolution = "1_milj_Shape_etrs_shape")
comms <- ConvertMunicipalityNames(sp$Kunta.FI)
comms <- iconv(comms, from = "UTF-8", to = "ISO_8859-1")
######################################################
#
# Querying the Europeana SPARQL endpoint
#
# Linked Open Data (pilot data) by Europeana
# http://pro.europeana.eu/web/guest/linked-open-data
#
# Creative Commons CC0
#
######################################################
library(SPARQL)
eu_endpoint <- "http://europeana.ontotext.com/sparql"
N <- length(comms)
res.v.df <- data.frame(c = character(N),
count = integer(N),
stringsAsFactors = FALSE)
for (i in 1:N) {
c <- comms[i]
euq <- paste("SELECT (COUNT(DISTINCT ?r) AS ?count) WHERE {
{
{
SELECT (?resource AS ?r) WHERE {
?resource <http://www.europeana.eu/schemas/edm/type> 'VIDEO' ;
<http://purl.org/dc/elements/1.1/language> 'fi' ;
<http://purl.org/dc/elements/1.1/description> ?d .
FILTER(regex(?d, '", c, "'))
}
}
}
UNION
{
{
SELECT (?resource AS ?r) WHERE {
?resource <http://www.europeana.eu/schemas/edm/type> 'VIDEO' ;
<http://purl.org/dc/elements/1.1/language> 'fi' ;
<http://purl.org/dc/elements/1.1/subject> ?s .
FILTER(regex(?s, '", c, "'))
}
}
}
UNION
{
{
SELECT (?resource AS ?r) WHERE {
?resource <http://www.europeana.eu/schemas/edm/type> 'VIDEO' ;
<http://purl.org/dc/elements/1.1/language> 'fi' ;
<http://purl.org/dc/terms/alternative> ?a .
FILTER(regex(?a, '", c, "'))
}
}
}
UNION
{
{
SELECT (?resource AS ?r) WHERE {
?resource <http://www.europeana.eu/schemas/edm/type> 'VIDEO' ;
<http://purl.org/dc/elements/1.1/language> 'fi' ;
<http://purl.org/dc/elements/1.1/title> ?t .
FILTER(regex(?t, '", c, "'))
}
}
}
UNION
{
{
SELECT (?resource AS ?r) WHERE {
?resource <http://www.europeana.eu/schemas/edm/type> 'VIDEO' ;
<http://purl.org/dc/elements/1.1/language> 'fi' ;
<http://purl.org/dc/terms/spatial> '", c, "' .
}
}
}
}", sep = "")
cat("Next querying ", c, " (", i , "/", N , ")\n", sep = "")
eures <- SPARQL(url = eu_endpoint, euq)$results
res.v.df$c[i] <- c
res.v.df$count[i] <- eures[1,1]
}
write.csv(res.v.df, file = "europeana_result_video.csv", row.names = FALSE)
###############################
#
# Store counts by matching
# with the municipality name
#
###############################
sp@data$video <- res.v.df$count[match(sp$Kunta.FI, res.v.df$c)]
# Those without videos -> 0
sp[["video"]][is.na(sp[["video"]])] <- 0
# Sort desc by the count column and show top 30
head(res.v.df[with(res.v.df, order(-count)), ], n=30)
######################################
#
# Plot only those municipalities with
# max 100 videos, ie exclude Helsinki
#
########################################
max100 <- sp[sp@data$video <= 100, ]
png("europeana_fi_videos_max100.png", width = 1024, height = 768, res = 72)
q <- PlotShape(max100, varname, type = "oneway",
palette = colorRampPalette(c("white", "red", "blue"),
space = "Lab"),
main = "Europeana video items per municipality excluding Helsinki")
dev.off()
######################################
#
# Plot only those municipalities with
# max 20 videos
#
########################################
max100 <- sp[sp@data$video <= 100, ]
png("europeana_fi_videos_max20.png", width = 1024, height = 768, res = 72)
q <- PlotShape(max100, varname, type = "oneway",
palette = colorRampPalette(c("white", "red", "blue"),
space = "Lab"),
main = "Europeana video items (max 20) per municipality")
dev.off()
##################################
#
# Count of videos depending on the
# number of inhabitants
#
###################################
municipality.info <- GetMunicipalityInfo()
# Column 4 = Väkiluku 31.12.2011.
#
# Note that the number of municipalities diminished in 2013.
# However, here I use data as if this hasn't occurred.
# In other words, both the maps and municipality data reflect
# the situation in 2012
sp@data$ppl <- municipality.info[ ,4][match(sp$Kunta.FI, municipality.info[ ,1])]
sp[["ppl"]][is.na(sp[["ppl"]])] <- 0
# Which ones have ppl=0?
sp[["Kunta.FI"]][sp[["ppl"]]==0]
# Store the nr of missing inhabitants, here from Wikipedia...
sp@data$ppl[sp[["Kunta.FI"]]=='Nilsiä'] <- 6528
sp@data$ppl[sp[["Kunta.FI"]]=='Töysä'] <- 3160
sp@data$ppl[sp[["Kunta.FI"]]=='Suomenniemi'] <- 763
sp@data$ppl[sp[["Kunta.FI"]]=='Nummi-Pusula'] <- 6175
sp@data$ppl[sp[["Kunta.FI"]]=='Karjalohja'] <- 1474
sp@data$ppl[sp[["Kunta.FI"]]=='Ristiina'] <- 4856
sp@data$ppl[sp[["Kunta.FI"]]=='Kerimäki'] <- 5526
sp@data$ppl[sp[["Kunta.FI"]]=='Kesälahti'] <- 2326
sp@data$ppl[sp[["Kunta.FI"]]=='Punkaharju'] <- 3702
sp@data$ppl[sp[["Kunta.FI"]]=='Koski Tl'] <- 2449
sp@data$ppl[sp[["Kunta.FI"]]=='Kiiminki'] <- 13320
sp@data$ppl[sp[["Kunta.FI"]]=='Yli-Ii'] <- 2179
sp@data$ppl[sp[["Kunta.FI"]]=='Vihanti'] <- 3020
sp@data$ppl[sp[["Kunta.FI"]]=='Vähäkyrö'] <- 4727
sp@data$ppl[sp[["Kunta.FI"]]=='Hämeenkyrö'] <- 10529
sp@data$ppl[sp[["Kunta.FI"]]=='Kiikoinen'] <- 1245
sp@data$ppl[sp[["Kunta.FI"]]=='Haukipudas'] <- 19053
sp@data$ppl[sp[["Kunta.FI"]]=='Oulunsalo'] <- 9897
sp@data$vidper100ppl <- round( (sp@data$video / (sp@data$ppl/100)) * 100)
sp[["vidper100ppl"]][is.na(sp[["vidper100ppl"]])] <- 0.00
varname <- "vidper100ppl"
int <- max(abs(sp[[varname]]))
png("europeana_fi_videos_per100inhab.png", width = 1024, height = 768, res = 72)
q <- PlotShape(sp, varname, type = "oneway",
main = "Europeana video items per 100 inhabitant (x 100)")
dev.off()
#################################################
#
# Plotting frequencies as in
# https://github.com/louhos/sorvi/wiki/Datavaalit
#
#################################################
library(ggplot2)
res.v.df <- res.v.df[rev(order(res.v.df$count)), ]
res.v.df$ind <- 1:nrow(res.v.df)
# Without Helsinki
res.v.nohki.df <- res.v.df[res.v.df$c != 'Helsinki', ]
res.v.nohki.df$ind <- 1:nrow(res.v.nohki.df)
n <- 40
png("europeana_fi_videos_all.png", width = 1024, height = 768, res = 72)
pics_all <- ggplot(data = res.v.df[1:n, ], aes(x = rev(ind), y = count)) +
geom_text(aes(label = c), size = 4) + scale_x_continuous(limits = c(1,
n)) + scale_y_continuous(limits = c(0.6 * min(res.v.df[1:n, "count"]),
1.04 * max(res.v.df[1:n, "count"]) + 20)) + coord_flip() + ylab("Videoita") + xlab("Kunta")
print(pics_all)
dev.off()
png("europeana_fi_videos_nohki.png", width = 1024, height = 768, res = 72)
pics_nohki <- ggplot(data = res.v.nohki.df[1:n, ], aes(x = rev(ind), y = count)) +
geom_text(aes(label = c), size = 4) + scale_x_continuous(limits = c(1,
n)) + scale_y_continuous(limits = c(0.6 * min(res.v.nohki.df[1:n, "count"]),
1.04 * max(res.v.nohki.df[1:n, "count"]) + 20)) + coord_flip() + ylab("Videoita") + xlab("Kunta")
print(pics_nohki)
dev.off()
# Proportional
v.p.df <- data.frame(sp[["Kunta.FI"]])
v.p.df$count <- sp[["vidper100ppl"]]
names(v.p.df) <- c("c", "count")
v.p.df <- v.p.df[rev(order(v.p.df$count)), ]
v.p.df$ind <- 1:nrow(v.p.df)
n <- 40
png("europeana_fi_videos_prop.png", width = 1024, height = 768, res = 72)
pics_p <- ggplot(data = v.p.df[1:n, ], aes(x = rev(ind), y = count)) +
geom_text(aes(label = c), size = 4) + scale_x_continuous(limits = c(1, n)) +
scale_y_continuous(limits = c(0.6 * min(v.p.df[1:n, "count"]),
1.04 * max(v.p.df[1:n, "count"]) + 20)) + coord_flip() + ylab("Videoita") + xlab("Kunta")
print(pics_p)
dev.off()
#################################################################
#
# How many objects have been enriched with the GeoNameID of Finland?
#
# Group by object type and provider
#
#################################################################
query <- "SELECT ?type ?contrib (COUNT(DISTINCT ?object) AS ?count)
WHERE {
?euProxy <http://www.openarchives.org/ore/terms/proxyFor> ?object ;
<http://www.europeana.eu/schemas/edm/hasMet> <http://sws.geonames.org/660013/> ;
<http://www.openarchives.org/ore/terms/proxyIn> ?euAggr .
?euAggr <http://www.openarchives.org/ore/terms/aggregates> ?providerAggr .
?resourceMap <http://www.openarchives.org/ore/terms/describes> ?euAggr ;
<http://purl.org/dc/elements/1.1/contributor> ?contrib .
?providerProxy <http://www.openarchives.org/ore/terms/proxyIn> ?providerAggr ;
<http://www.europeana.eu/schemas/edm/type> ?type .
} GROUP BY ?type ?contrib
HAVING (?count > 1)
ORDER BY DESC(?count)"
answer <- SPARQL(url = eu_endpoint, query)$results
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment