Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Generate a sorted list of awards from Wikidata
# Copyright 2018-2020 Sergey Leschina (mail@putnik.tech)
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import copy
from collections import deque
from SPARQLWrapper import SPARQLWrapper, JSON, POSTDIRECTLY
def append_value(graph, key, value):
if key is None or value is None:
return
if not key in graph:
graph[key] = []
try:
graph[key].index(value)
except Exception as e:
graph[key].append(value)
def connected_components(graph):
components = []
seen = set()
full_graph = copy.deepcopy(graph)
for key in graph:
for value in graph[key]:
append_value(full_graph, value, key)
def dfs(v):
vset = set([v])
component = []
while vset:
v = vset.pop()
seen.add(v)
vset |= set(full_graph.get(v, ())) - seen
component.append(v)
return component
for v in full_graph:
if v not in seen:
component_keys = dfs(v)
component = { key: graph.get(key, []) for key in component_keys }
components.append(component)
return components
def topological_sort(graph):
VISITED, FINISHED = 0, 1
order = deque()
state = {}
stack = []
def dfs(v):
stack.append(v)
state[v] = VISITED
for u in graph.get(v, ()):
if state.get(u, None) == VISITED:
path = stack[stack.index(u):stack.index(v)]
print(stack)
raise ValueError("Cycle (%s): " % v + " -> ".join(path))
if state.get(u, None) == None:
dfs(u)
state[v] = FINISHED
order.append(v)
stack.pop(-1)
for v in graph:
if state.get(v, None) == None:
dfs(v)
return order
def raw_value(value):
if value is None:
return None
return value["value"].replace("http://www.wikidata.org/entity/", "")
def load_data():
sparql = SPARQLWrapper("https://query.wikidata.org/sparql")
sparql.setQuery("""
SELECT DISTINCT ?item ?lower ?higher ?country
WHERE {
{ ?item wdt:P31/wdt:P279* wd:Q618779 }
UNION
{ ?item wdt:P31/wdt:P279* wd:Q38033430 }
.
{
?item p:P3729 ?pLower .
?pLower ps:P3729 ?lower .
FILTER NOT EXISTS { ?pLower pq:P582 ?end . }
} UNION {
?item p:P3730 ?pHigher .
?pHigher ps:P3730 ?higher .
FILTER NOT EXISTS { ?pHigher pq:P582 ?end . }
}
.
OPTIONAL { ?item wdt:P17 ?country . }
}
ORDER BY ?country
""")
sparql.setRequestMethod(POSTDIRECTLY)
sparql.setReturnFormat(JSON)
data = []
for result in sparql.query().convert()["results"]["bindings"]:
data.append({
"item": raw_value(result['item']),
"lower": raw_value(result.get('lower', None)),
"higher": raw_value(result.get('higher', None)),
})
return data
def construct_graph():
graph = {}
data = load_data()
for row in data:
append_value(graph, row["lower"], row["item"])
append_value(graph, row["item"], row["higher"])
return graph
graph = construct_graph()
components = connected_components(graph)
sorted_list = []
for component in components:
sorted_list += topological_sort(component)
prefix = "\t\"data\": [\n\t\t[\n\t\t\t\""
body = "\"\n\t\t],\n\t\t[\n\t\t\t\"".join(sorted_list)
postfix = "\"\n\t\t]\n\t]"
print(prefix + body + postfix)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.