Skip to content

Instantly share code, notes, and snippets.

@cgueret
Last active August 29, 2015 14:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save cgueret/cf9f09be973cde7cf993 to your computer and use it in GitHub Desktop.
Save cgueret/cf9f09be973cde7cf993 to your computer and use it in GitHub Desktop.
CEDAR queries

This gist contains several queries that can be used against the data published by the project "CEDAR"

# Count the number of triples per graph
SELECT ?graph (COUNT(*) AS ?triples) WHERE {
GRAPH ?graph {
?s ?p ?o.
}
}
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX cedar: <http://lod.cedar-project.nl:8888/cedar/resource/>
PREFIX tablink: <http://bit.ly/cedar-tablink#>
SELECT DISTINCT ?sheet ?cell ?value FROM <urn:graph:cedar:raw-data> WHERE {
?cell a tablink:ColumnHeader .
?cell tablink:parentCell ?parent.
?cell tablink:value ?value .
?cell tablink:sheet ?sheet .
?parent a tablink:ColumnHeader .
?parent tablink:value ?target .
FILTER regex(?target, "__seed_text__", "i")
}
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX cedar: <http://lod.cedar-project.nl:8888/cedar/resource/>
PREFIX tablink: <http://bit.ly/cedar-tablink#>
PREFIX dc: <http://purl.org/dc/terms/>
SELECT DISTINCT ?ds ?value FROM <urn:graph:cedar:raw-data> WHERE {
?cell a tablink:RowHeader .
?cell tablink:parentCell ?parent.
?cell tablink:value ?value .
?cell tablink:sheet ?sheet .
?parent a tablink:RowProperty .
?parent tablink:value ?target .
?sheet a tablink:Sheet.
?dataset dc:hasPart ?sheet.
?dataset rdfs:label ?ds.
FILTER regex(?target, "__seed_text__", "i")
} ORDER BY ?ds ?value
# Get a list of the dimensions used by the harmonized dataset
PREFIX qb: <http://purl.org/linked-data/cube#>
SELECT DISTINCT ?dimension FROM <urn:graph:cedar:release> WHERE {
?d a qb:DataSet.
?d qb:structure ?dsd.
?dsd a qb:DataStructureDefinition.
?dsd qb:component [ qb:dimension ?dimension ].
}
# Evolution of the number of married women over the years
PREFIX qb: <http://purl.org/linked-data/cube#>
PREFIX cedar: <http://lod.cedar-project.nl:8888/cedar/resource/>
PREFIX maritalstatus: <http://bit.ly/cedar-maritalstatus#>
PREFIX sdmx-dimension: <http://purl.org/linked-data/sdmx/2009/dimension#>
PREFIX sdmx-code: <http://purl.org/linked-data/sdmx/2009/code#>
PREFIX cedarterms: <http://bit.ly/cedar#>
SELECT ?year (SUM(?pop) AS ?total) FROM <urn:graph:cedar:release> WHERE {
?obs a qb:Observation.
?obs maritalstatus:maritalStatus maritalstatus:married.
?obs cedarterms:population ?pop.
?obs sdmx-dimension:sex sdmx-code:sex-F.
FILTER NOT EXISTS {
?obs cedarterms:ignore "1".
}
?slice a qb:Slice.
?slice qb:observation ?obs.
?slice sdmx-dimension:refPeriod ?year.
} ORDER BY ?year
# Get a list of raw datasets and their location
PREFIX prov: <http://www.w3.org/ns/prov#>
PREFIX dcat: <http://www.w3.org/ns/dcat#>
PREFIX dcterms: <http://purl.org/dc/terms/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
SELECT DISTINCT ?dataset ?source ?dump (COUNT(DISTINCT ?sheet) AS ?nb_sheets) FROM <urn:graph:cedar:raw-data> WHERE {
?dataset a dcat:DataSet.
?dataset prov:wasDerivedFrom [ dcat:distribution [ rdfs:label ?source ] ].
?dataset dcat:distribution [ dcterms:accessURL ?dump ].
?dataset dcterms:hasPart ?sheet.
} GROUP BY ?dataset ?source ?dump ORDER BY ?source
# List of top 100 most occurring header in all the raw dataset
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX qb: <http://purl.org/linked-data/cube#>
PREFIX cedar: <http://lod.cedar-project.nl:8888/cedar/resource/>
PREFIX maritalstatus: <http://bit.ly/cedar-maritalstatus#>
PREFIX sdmx-dimension: <http://purl.org/linked-data/sdmx/2009/dimension#>
PREFIX sdmx-code: <http://purl.org/linked-data/sdmx/2009/code#>
PREFIX cedarterms: <http://bit.ly/cedar#>
PREFIX tablink: <http://bit.ly/cedar-tablink#>
SELECT ?value (COUNT(?foo) as ?refs) FROM <urn:graph:cedar:raw-data> WHERE {
?foo a ?type .
?foo tablink:value ?value .
FILTER (?type in (tablink:ColumnHeader, tablink:RowHeader))
} GROUP BY ?value ORDER BY DESC(?refs) LIMIT 100
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment