Skip to content

Instantly share code, notes, and snippets.

@jackrusher
Last active January 8, 2022 10:47
Show Gist options
  • Save jackrusher/c41c979711378bfd5ecc4719849b7965 to your computer and use it in GitHub Desktop.
Save jackrusher/c41c979711378bfd5ecc4719849b7965 to your computer and use it in GitHub Desktop.
Quick example of using clojure with the Wikidata API to grab some facts about the world.
;;assumes that this package is available:
;;[org.wikidata.wdtk/wdtk-wikibaseapi "0.7.0"]
(import org.wikidata.wdtk.datamodel.interfaces.EntityDocument)
(import org.wikidata.wdtk.datamodel.interfaces.ItemDocument)
(import org.wikidata.wdtk.datamodel.interfaces.PropertyIdValue)
(import org.wikidata.wdtk.wikibaseapi.WikibaseDataFetcher)
(import org.wikidata.wdtk.wikibaseapi.apierrors.MediaWikiApiErrorException)
;; Trying to automatically build a list of the presidents of the US
;; using the WikiData Java API wrapped with a bit of clojure
(def get-entity-document
(let [fetcher (WikibaseDataFetcher/getWikidataDataFetcher)
filter (.getFilter fetcher)]
;; only the english wiki just now
(.setSiteLinkFilter filter (java.util.Collections/singleton "enwiki"))
(.setLanguageFilter filter (java.util.Collections/singleton "en"))
;; memoize results to improve performance and reduce network traffic
(memoize (fn [id] (.getEntityDocument fetcher id)))))
(defn get-property-id [thing]
(.getId (.getPropertyId thing)))
(defn get-value-id [thing]
(.getId (.getValue thing)))
(defn find-claim [id statement]
(->> (iterator-seq (.getAllQualifiers (.getClaim statement)))
(filter #(= id (get-property-id %)))
first))
(defn find-statement [id statements]
(first (filter #(= id (get-value-id %)) statements)))
(defn find-statement-group [id document]
(.getStatements (.findStatementGroup document id)))
(->> (get-entity-document "Q76") ;; Barack Obama
(find-statement-group "P39") ;; position(s) held
(find-statement "Q11696") ;; POTUS
(find-claim "P1365") ;; "replaced"
get-value-id)
;;=> "Q207"
;; oy, what's that then?
(defn get-label [document]
(.getText (.getValue (first (.getLabels document)))))
(get-label (get-entity-document "Q207"))
;;=> "George W. Bush"
;; Ok, this looks like a job for recursion!
(loop [id "Q22686" out []]
(let [doc (get-entity-document id)
out (conj out {:id id :name (get-label doc)})]
(if-let [prev (some->> doc
(find-statement-group "P39")
(find-statement "Q11696")
(find-claim "P1365")
get-value-id)]
(recur prev out)
out)))
;;=>
[{:id "Q22686", :name "Donald Trump"}
{:id "Q76", :name "Barack Obama"}
{:id "Q207", :name "George W. Bush"}
{:id "Q1124", :name "Bill Clinton"}
{:id "Q23505", :name "George H. W. Bush"}
{:id "Q9960", :name "Ronald Reagan"}
{:id "Q23685", :name "Jimmy Carter"}
{:id "Q9582", :name "Gerald Ford"}
{:id "Q9588", :name "Richard Nixon"}
{:id "Q9640", :name "Lyndon B. Johnson"}
{:id "Q9696", :name "John F. Kennedy"}]
;; I'm not sure, but I suspect there were more presidents before
;; JFK. Unfortunately, it turns out the "replaces" property hasn't
;; been set on Kennedy's record. It has for Eisenhower, but not for
;; Truman, and so on.
;; one can also bring in RDF4J (formerly Sesame)
;; [org.eclipse.rdf4j/rdf4j-query "2.1.2"]
;; [org.eclipse.rdf4j/rdf4j-repository-api "2.1.2"]
;; [org.eclipse.rdf4j/rdf4j-runtime "2.1.2"]
(def wikidata
(.getConnection
(doto (org.eclipse.rdf4j.repository.sparql.SPARQLRepository. "https://query.wikidata.org/sparql")
(.initialize))))
(defn query [conn sparql-string]
(org.eclipse.rdf4j.query.QueryResults/asList
(.evaluate
(.prepareTupleQuery conn
org.eclipse.rdf4j.query.QueryLanguage/SPARQL
sparql-string))))
;; NB a two-clause SPARQL query:
;;
;; items that have held the position of POTUS
;; ?item wdt:P39 wd:Q11696.
;;
;; items that are human beings
;; ?item wdt:P31 wd:Q5.
;;
;; without the second one, we get loads of fictional POTUS
(def results
(query wikidata
"SELECT ?item ?itemLabel WHERE {
?item wdt:P31 wd:Q5.
?item wdt:P39 wd:Q11696.
SERVICE wikibase:label { bd:serviceParam wikibase:language \"en\". }
}"))
(mapv (fn [bindings]
(mapv #(.toString (.getValue bindings %))
(.getBindingNames bindings)))
results)
;;=>
[["http://www.wikidata.org/entity/Q23" "\"George Washington\"@en"]
["http://www.wikidata.org/entity/Q76" "\"Barack Obama\"@en"]
["http://www.wikidata.org/entity/Q91" "\"Abraham Lincoln\"@en"]
["http://www.wikidata.org/entity/Q207" "\"George W. Bush\"@en"]
["http://www.wikidata.org/entity/Q1124" "\"Bill Clinton\"@en"]
["http://www.wikidata.org/entity/Q8007" "\"Franklin Delano Roosevelt\"@en"]
["http://www.wikidata.org/entity/Q8612" "\"Andrew Johnson\"@en"]
["http://www.wikidata.org/entity/Q9582" "\"Gerald Ford\"@en"]
["http://www.wikidata.org/entity/Q9588" "\"Richard Nixon\"@en"]
["http://www.wikidata.org/entity/Q9640" "\"Lyndon B. Johnson\"@en"]
["http://www.wikidata.org/entity/Q9696" "\"John F. Kennedy\"@en"]
["http://www.wikidata.org/entity/Q9916" "\"Dwight D. Eisenhower\"@en"]
["http://www.wikidata.org/entity/Q9960" "\"Ronald Reagan\"@en"]
["http://www.wikidata.org/entity/Q11613" "\"Harry S. Truman\"@en"]
["http://www.wikidata.org/entity/Q11806" "\"John Adams\"@en"]
["http://www.wikidata.org/entity/Q11812" "\"Thomas Jefferson\"@en"]
["http://www.wikidata.org/entity/Q11813" "\"James Madison\"@en"]
["http://www.wikidata.org/entity/Q11815" "\"James Monroe\"@en"]
["http://www.wikidata.org/entity/Q11816" "\"John Quincy Adams\"@en"]
["http://www.wikidata.org/entity/Q11817" "\"Andrew Jackson\"@en"]
["http://www.wikidata.org/entity/Q11820" "\"Martin Van Buren\"@en"]
["http://www.wikidata.org/entity/Q11869" "\"William Henry Harrison\"@en"]
["http://www.wikidata.org/entity/Q11881" "\"John Tyler\"@en"]
["http://www.wikidata.org/entity/Q11891" "\"James K. Polk\"@en"]
["http://www.wikidata.org/entity/Q11896" "\"Zachary Taylor\"@en"]
["http://www.wikidata.org/entity/Q12306" "\"Millard Fillmore\"@en"]
["http://www.wikidata.org/entity/Q12312" "\"Franklin Pierce\"@en"]
["http://www.wikidata.org/entity/Q12325" "\"James Buchanan\"@en"]
["http://www.wikidata.org/entity/Q23505" "\"George H. W. Bush\"@en"]
["http://www.wikidata.org/entity/Q23685" "\"Jimmy Carter\"@en"]
["http://www.wikidata.org/entity/Q33866" "\"Theodore Roosevelt\"@en"]
["http://www.wikidata.org/entity/Q34296" "\"Woodrow Wilson\"@en"]
["http://www.wikidata.org/entity/Q34597" "\"James A. Garfield\"@en"]
["http://www.wikidata.org/entity/Q34836" "\"Ulysses S. Grant\"@en"]
["http://www.wikidata.org/entity/Q35041" "\"William McKinley\"@en"]
["http://www.wikidata.org/entity/Q35171" "\"Grover Cleveland\"@en"]
["http://www.wikidata.org/entity/Q35236" "\"Herbert Hoover\"@en"]
["http://www.wikidata.org/entity/Q35286" "\"Warren G. Harding\"@en"]
["http://www.wikidata.org/entity/Q35498" "\"Chester A. Arthur\"@en"]
["http://www.wikidata.org/entity/Q35648" "\"William Howard Taft\"@en"]
["http://www.wikidata.org/entity/Q35678" "\"Benjamin Harrison\"@en"]
["http://www.wikidata.org/entity/Q35686" "\"Rutherford B. Hayes\"@en"]
["http://www.wikidata.org/entity/Q36023" "\"Calvin Coolidge\"@en"]]
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment