Skip to content

Instantly share code, notes, and snippets.

@ColinMaudry
Last active August 29, 2015 14:25
Show Gist options
  • Save ColinMaudry/bf595abbee81284a594a to your computer and use it in GitHub Desktop.
Save ColinMaudry/bf595abbee81284a594a to your computer and use it in GitHub Desktop.
Examples of queries on data.gouv.fr and data.gov.uk metadata.
#Stats on resources (dcat:Distribution): availability, format, response time
#Statistiques sur les ressources (dcat:Distribution): disponibilité, format, temps de réponse
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX dgfr: <http://colin.maudry.com/ontologies/dgfr#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
select ?organizationLabel ?datasetLabel ?datasetUrl ?resourceTitle ?url
?available ?httpResponse ?responseTime ?availabilityCheckDate ?format where {
graph <http://www.data.maudry.com> {
?organization a foaf:Organization ;
rdfs:label ?organizationLabel ;
dgfr:published ?dataset .
?dataset a dcat:Dataset ;
rdfs:label ?datasetLabel ;
dcat:landingPage ?datasetUrl ;
dcat:distribution ?resource .
?resource a dcat:Distribution ;
rdfs:label ?resourceTitle ;
dgfr:responseStatusCode ?httpResponse ;
dgfr:responseTime ?responseTime ;
dgfr:available ?available ;
dgfr:availabilityCheckedOn ?availabilityCheckDate ;
dgfr:format ?format ;
dcat:downloadUrl ?url .
}
}
order by ?organizationLabel
limit 10
#List of RDF classes and for each, the number of instances
#Liste des classes RDF et pour chacune, le nombre d'instances
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX dgfr: <http://colin.maudry.com/ontologies/dgfr#>
select distinct ?class (count(?thing) as ?numInstances) where {
graph <http://www.data.maudry.com> {
?thing a ?class .
}
}
group by ?class
order by ?class
#Malformed resource URLs, with their datasets and their organization
#URLs de ressources mal formées, avec leur dataset et leur organisation
PREFIX dct: <http://purl.org/dc/terms/>
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX dgfr: <http://colin.maudry.com/ontologies/dgfr#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX dcterms: <http://purl.org/dc/terms/>
select ?publisherName ?datasetName ?datasetUrl ?resourceTitle ?resourceUrl where {
graph <http://www.data.maudry.com> {
?dataset a dcat:Dataset ;
dcat:distribution ?resource ;
dcat:landingPage ?datasetUrl ;
rdfs:label ?datasetName .
?resource a dcat:Distribution ;
rdfs:label ?resourceTitle ;
dcat:downloadUrl ?url ;
dcterms:publisher ?publisher .
?publisher a foaf:Organization ;
rdfs:label ?publisherName .
filter regex(str(?url),"https?%3A%2F%2F","i")
#The query engine prepends its domain to URL it doesn't understand. So we need to remove it.
bind(replace(str(?url),"http://localhost:3030/datagouvfr/","") as ?resourceUrl)
}
}
order by ?publisherName
#List of all the named graphs in the repository. <http://www.data.maudry.com> is the named
#graph where most of the data is stored.
#Liste des graphes nommés présents dans le dépôt. <http://www.data.maudry.com> est
#le graphe nommé qui contient la plus grande partie des données.
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX dgfr: <http://colin.maudry.com/ontologies/dgfr#>
select distinct ?graph where {
graph ?graph {
?s ?p ?o
}
}
#List of the organizations, with their number of datasets and distributions, sorted by the total number of downloads
#Liste des organisations, avec leur nombre de datasets, de distributions (ressources) et de téléchargements
PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX foaf: <http://xmlns.com/foaf/0.1/>
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX dgfr: <http://colin.maudry.com/ontologies/dgfr#>
SELECT ?organisation
(count (distinct ?dataset) as ?nombreDatasets)
(count (distinct ?distribution) as ?nombreRessources)
(sum(?downloads) as ?nombreTéléchargements) WHERE {
graph <http://www.data.maudry.com> {
?organizationUri a foaf:Organization ;
rdfs:label ?organisation ;
dgfr:published ?dataset .
?dataset a dcat:Dataset ;
dcat:distribution ?distribution .
?distribution dgfr:downloads ?downloads .
}
}
group by ?organisation
order by desc(?nombreTéléchargements)
limit 10
# A convenient query that retrieves: number of resources, the number of unavailable
# resources, the number of machine readable resources and the average response time
# Une requête bien utile qui montre : le nombre de ressources, le nombre de
# ressources indisponibles, le nombre de ressource lisibles par
# une machine et le temps de réponse moyen
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX dgfr: <http://colin.maudry.com/ontologies/dgfr#>
SELECT ?countMRResource ?countResource ?countAvailableResource ?avgResponseTime ?countResource3s ?countResource10s
WHERE
{ { SELECT (COUNT(?resource) AS ?countResource)
WHERE
{ GRAPH <http://www.data.maudry.com>
{ ?resource a dcat:Distribution}
}
}
{ SELECT (COUNT(?resource) AS ?countResource)
WHERE
{ GRAPH <http://www.data.maudry.com>
{ ?resource a dcat:Distribution}
}
}
{ SELECT (COUNT(?resource) AS ?countAvailableResource)
WHERE
{ GRAPH <http://www.data.maudry.com>
{ ?resource a dcat:Distribution .
?resource dgfr:available false
}
}
}
{ SELECT (COUNT(DISTINCT ?resource) AS ?countMRResource)
WHERE
{ GRAPH <http://www.data.maudry.com>
{ ?resource a dcat:Distribution .
?resource dgfr:machineReadable true
}
}
}
{ SELECT (AVG(?responseTime) AS ?avgResponseTime)
WHERE
{ GRAPH <http://www.data.maudry.com>
{ ?resource a dcat:Distribution .
?resource dgfr:available true ;
dgfr:responseTime ?responseTime .
}
}
}
{ SELECT (count(?resource) AS ?countResource3s)
WHERE
{ GRAPH <http://www.data.maudry.com>
{ ?resource a dcat:Distribution .
?resource dgfr:available true ;
dgfr:responseTime ?responseTime .
filter (?responseTime >= 3)
}
}
}
{ SELECT (count(?resource) AS ?countResource10s)
WHERE
{ GRAPH <http://www.data.maudry.com>
{ ?resource a dcat:Distribution .
?resource dgfr:available true ;
dgfr:responseTime ?responseTime .
filter (?responseTime >= 10)
}
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment