Generate a sorted list of awards from Wikidata
# Copyright 2018-2020 Sergey Leschina (mail@putnik.tech) | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
import copy | |
from collections import deque | |
from SPARQLWrapper import SPARQLWrapper, JSON, POSTDIRECTLY | |
def append_value(graph, key, value): | |
if key is None or value is None: | |
return | |
if not key in graph: | |
graph[key] = [] | |
try: | |
graph[key].index(value) | |
except Exception as e: | |
graph[key].append(value) | |
def connected_components(graph): | |
components = [] | |
seen = set() | |
full_graph = copy.deepcopy(graph) | |
for key in graph: | |
for value in graph[key]: | |
append_value(full_graph, value, key) | |
def dfs(v): | |
vset = set([v]) | |
component = [] | |
while vset: | |
v = vset.pop() | |
seen.add(v) | |
vset |= set(full_graph.get(v, ())) - seen | |
component.append(v) | |
return component | |
for v in full_graph: | |
if v not in seen: | |
component_keys = dfs(v) | |
component = { key: graph.get(key, []) for key in component_keys } | |
components.append(component) | |
return components | |
def topological_sort(graph): | |
VISITED, FINISHED = 0, 1 | |
order = deque() | |
state = {} | |
stack = [] | |
def dfs(v): | |
stack.append(v) | |
state[v] = VISITED | |
for u in graph.get(v, ()): | |
if state.get(u, None) == VISITED: | |
path = stack[stack.index(u):stack.index(v)] | |
print(stack) | |
raise ValueError("Cycle (%s): " % v + " -> ".join(path)) | |
if state.get(u, None) == None: | |
dfs(u) | |
state[v] = FINISHED | |
order.append(v) | |
stack.pop(-1) | |
for v in graph: | |
if state.get(v, None) == None: | |
dfs(v) | |
return order | |
def raw_value(value): | |
if value is None: | |
return None | |
return value["value"].replace("http://www.wikidata.org/entity/", "") | |
def load_data(): | |
sparql = SPARQLWrapper("https://query.wikidata.org/sparql") | |
sparql.setQuery(""" | |
SELECT DISTINCT ?item ?lower ?higher ?country | |
WHERE { | |
{ ?item wdt:P31/wdt:P279* wd:Q618779 } | |
UNION | |
{ ?item wdt:P31/wdt:P279* wd:Q38033430 } | |
. | |
{ | |
?item p:P3729 ?pLower . | |
?pLower ps:P3729 ?lower . | |
FILTER NOT EXISTS { ?pLower pq:P582 ?end . } | |
} UNION { | |
?item p:P3730 ?pHigher . | |
?pHigher ps:P3730 ?higher . | |
FILTER NOT EXISTS { ?pHigher pq:P582 ?end . } | |
} | |
. | |
OPTIONAL { ?item wdt:P17 ?country . } | |
} | |
ORDER BY ?country | |
""") | |
sparql.setRequestMethod(POSTDIRECTLY) | |
sparql.setReturnFormat(JSON) | |
data = [] | |
for result in sparql.query().convert()["results"]["bindings"]: | |
data.append({ | |
"item": raw_value(result['item']), | |
"lower": raw_value(result.get('lower', None)), | |
"higher": raw_value(result.get('higher', None)), | |
}) | |
return data | |
def construct_graph(): | |
graph = {} | |
data = load_data() | |
for row in data: | |
append_value(graph, row["lower"], row["item"]) | |
append_value(graph, row["item"], row["higher"]) | |
return graph | |
graph = construct_graph() | |
components = connected_components(graph) | |
sorted_list = [] | |
for component in components: | |
sorted_list += topological_sort(component) | |
prefix = "\t\"data\": [\n\t\t[\n\t\t\t\"" | |
body = "\"\n\t\t],\n\t\t[\n\t\t\t\"".join(sorted_list) | |
postfix = "\"\n\t\t]\n\t]" | |
print(prefix + body + postfix) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment