Skip to content

Instantly share code, notes, and snippets.

@guillemcanal
Created June 24, 2021 15:27
Show Gist options
  • Save guillemcanal/053939cdcca17b9c4953582fc9b14184 to your computer and use it in GitHub Desktop.
Save guillemcanal/053939cdcca17b9c4953582fc9b14184 to your computer and use it in GitHub Desktop.
Extract Disney/Pixar movies from Wikidata
PREFIX wd: <http://www.wikidata.org/entity/>
PREFIX wds: <http://www.wikidata.org/entity/statement/>
PREFIX wdv: <http://www.wikidata.org/value/>
PREFIX wdt: <http://www.wikidata.org/prop/direct/>
PREFIX wikibase: <http://wikiba.se/ontology#>
PREFIX p: <http://www.wikidata.org/prop/>
PREFIX ps: <http://www.wikidata.org/prop/statement/>
PREFIX pq: <http://www.wikidata.org/prop/qualifier/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX bd: <http://www.bigdata.com/rdf#>
# Return a list of movies produced by Disney and Pixar
SELECT DISTINCT
?item
(GROUP_CONCAT(DISTINCT ?movieType; separator=", ") AS ?moviesType)
(SAMPLE(?wikidataID) AS ?wikidataID)
(SAMPLE(?name) AS ?name)
(MIN(?publicationDate) AS ?firstPublicationDate)
(SAMPLE(?imdbID) AS ?imdbID)
(SAMPLE(?imdbUrl) AS ?imdbUrl)
(SAMPLE(?article) AS ?wikipediaUrl)
(SAMPLE(?duration) AS ?duration)
WHERE {
SERVICE wikibase:label { bd:serviceParam wikibase:language "en" . }
# Match all subclass of the "film" instance
?item wdt:P31/wdt:P279* wd:Q11424 .
# Exclude "animated short film" and "short film"
?item wdt:P31 ?instanceOf FILTER(?instanceOf NOT IN(wd:Q17517379, wd:Q24862)) .
# Get instance's label
?instanceOf rdfs:label ?movieType FILTER(LANG(?movieType) = "en") .
# Get the title of the movie localized in english (some entity labels can be empty)
?item rdfs:label ?name FILTER(LANG(?name) = "en") .
# Produced by "Disney" or "Pixar" (@FIXME we have multiple disney production companies, there is maybe a better way to target all of them)
?item wdt:P272 ?production_company FILTER(?production_company IN(wd:Q7414, wd:Q191224, wd:Q1047410, wd:Q1323594, wd:Q127552))
# Publication date in the US (it may match multiple publication dates, ex: Dumbo(wd:Q40895) )
?item p:P577 ?placeofpublication .
?placeofpublication pq:P291 wd:Q30 .
?placeofpublication ps:P577 ?publicationDate .
# Extract the Wikidata ID
BIND( REPLACE( STR(?item), "http://www.wikidata.org/entity/", "") AS ?wikidataID).
# IMDB ID
?item wdt:P345 ?imdbID .
BIND( IRI( CONCAT("https://www.imdb.com/title/", ?imdbID, "/")) AS ?imdbUrl)
# Get the associated Wikipedia article written in english
OPTIONAL {
?article schema:about ?item .
?article schema:inLanguage "en" .
?article schema:isPartOf <https://en.wikipedia.org/> .
}
# Get the duration of the feature film
OPTIONAL { ?item wdt:P2047 ?duration . }
}
GROUP BY ?item
ORDER BY ASC(?publicationDate)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment