Create a gist now

Instantly share code, notes, and snippets.

Wikidata-based annotation research

This gist extends the previous and consist of an interface for searching the annotations within a text. Since the annotations are marked with Wikidata resources, an index composed by their classes is constructed in order to enable researches. Two languages are allowed: English and Italian.

<?php
header("Content-Type: application/json");
$text = urlencode($_GET['text']);
$final_result = array();
/* Making ASYNC call would support even more languages
*/
$languages = array('en', 'it');
/* A call to the wikidata API is perfomed until the search-continue attribute is present in the API result
*/
foreach ($languages as $lang) {
$offset = 0;
do {
$result = json_decode(file_get_contents("https://www.wikidata.org/w/api.php?action=wbsearchentities&type=item&search=$text&limit=max&continue=$offset&language=$lang&format=json"), true);
if (isset($result['search-continue']))
$offset = $result['search-continue'];
$final_result = array_merge($final_result, $result['search']);
} while (isset($result['search-continue']));
}
echo json_encode(array_merge(array_unique($final_result, SORT_REGULAR), array()));
?>
### Index creation
###
window.class_index = {}
d3.selectAll('.annotation[data-wd-id]')[0].forEach (d) ->
# The Wikidata SPARQL endpoint is called for retrieving all the classes of a certain Wikidata resource
url = "https://query.wikidata.org/sparql?query=SELECT%20DISTINCT%20%3Fclass%20%7B%7B%20wd%3A#{d.dataset.wdId}%20wdt%3AP31%20%3Fsclass%20.%20%3Fsclass%20wdt%3AP279*%20%3Fclass%20.%20%7D%20UNION%20%7B%20wd%3A#{d.dataset.wdId}%20wdt%3AP279*%20%3Fclass%20.%20%7D%7D&format=json"
d3.json url, (error, data) ->
data.results.bindings.forEach (c) ->
key = c.class.value.split('/').slice(-1)[0]
if key not of class_index
class_index[key] = []
class_index[key].push d.dataset.wdId
### Searching features
###
search_input = d3.select '#header input'
.on 'keydown', () ->
if event.keyCode is 13
search()
d3.select '#search_button'
.on 'click', () ->
search()
search_results = d3.select '#search_results'
### Selects a class and highlights the corresponding instance within the text
###
select = (node, id) ->
d3.select('.selected').classed 'selected', false
d3.select(node).classed 'selected', true
d3.selectAll(".annotation[data-wd-id]").classed 'highlighted', true
d3.selectAll(".annotation[data-wd-id]").classed 'selected', false
class_index[id].forEach (d) ->
d3.selectAll(".annotation[data-wd-id=#{d}]").classed 'selected', true
d3.selectAll(".annotation[data-wd-id=#{d}]").classed 'highlighted', false
### Filters the data coming from Wikidata using the index
###
filter = (data) ->
data = data.filter (d) -> d.id of class_index
### Searches for a string inserted within the textbox
###
search = () ->
query_string = d3.select('#header input').node().value
if query_string isnt ''
d3.json "api.php?text=#{query_string}", (error, data) ->
data = filter data
data.sort (a,b) -> d3.descending(class_index[a.id].length, class_index[b.id].length)
# Handling no results
search_results.select '.no_results'
.remove()
if data.length is 0
search_results.append 'div'
.attr
class: 'no_results'
.html "No results found for <i>#{query_string}</i>."
# Drawing results
results = search_results.selectAll '.result'
.data data, (d) -> d.id
enter_results = results.enter().append 'div'
.attr
class: 'result'
.on 'click', (d) -> select this, d.id
results.order()
resource = enter_results.append 'div'
.attr
class: 'resource'
resource.html (d) -> "<span class='label'>#{d.label}</span> <span>(<a target='_blank' class='link' href='#{d.concepturi}'>#{d.id}</a>)</span><div class='description'>#{d.description}</div>"
count = enter_results.append 'div'
.attr
class: 'count'
.text (d) -> class_index[d.id].length
count.append 'title'
.text (d) -> "#{class_index[d.id].length} #{d.label} has been found"
results.exit().remove()
html, body {
margin: 0;
padding: 0;
width: 100%;
height: 100%;
font-family: sans-serif;
font-size: 15px;
}
/* HEADER
*/
#header {
padding: 5px;
}
#header input {
width: 30%;
height: 30px;
font-size: 15px;
}
#header #search_button {
width: 60px;
height: 32px;
background: #d0d0d0;
font-size: 15px;
border: 0;
color: #fff;
border-radius: 3px;
cursor: pointer;
}
/* CORE
*/
#core {
display: flex;
flex-direction: row;
width: 100%;
height: 100%;
}
#core > * {
height: 100%;
width: 0;
padding: 10px;
}
#search_results {
flex-grow: 1;
}
#text {
flex-grow: 1;
font-family: serif;
text-align: justify;
line-height: 20px;
}
/* SEARCH RESULTS
*/
.result {
display: flex;
flex-direction: row;
align-items: center;
width: 80%;
margin: 5px 0px 5px 5px;
padding: 5px;
background: #e2e2e2;
cursor: pointer;
}
.result .resource {
width: 90%;
}
.result .count {
width: 10%;
text-align: center;
}
.result:hover {
background: rgb(254,242,201);
}
.result .link {
color: steelblue;
}
.result .link:hover {
text-decoration: none;
}
.result .description {
color: #999;
font-style: italic;
font-size: 13px;
}
.highlighted {
background: #e2e2e2;
}
.selected {
background: rgb(254,242,201);
}
.annotation {
padding: 1px;
}
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title></title>
<link rel="stylesheet" href="index.css">
<link rel="stylesheet" href="https://maxcdn.bootstrapcdn.com/font-awesome/4.5.0/css/font-awesome.min.css">
<script src="http://d3js.org/d3.v3.min.js"></script>
</head>
<body>
<div id="header">
<input type="text">
<button id="search_button" title="Find"><i class="fa fa-search"></i></button>
</div>
<div id="core">
<div id="search_results"></div>
<div id="text">
<span class="annotation highlighted" data-wd-id="Q13375">Pisa</span> (/ˈpiːzə/; Italian pronunciation: [ˈpiːsa][2]) is a city in <span class="annotation highlighted" data-wd-id="Q1273">Tuscany</span>, Central <span class="annotation highlighted" data-wd-id="Q38">Italy</span>, straddling the River <span class="annotation highlighted" data-wd-id="Q115457">Arno</span> just before it empties into the <span class="annotation highlighted" data-wd-id="Q38882">Tyrrhenian Sea</span>. It is the <span class="annotation highlighted" data-wd-id="Q5119">capital city</span> of the <span class="annotation highlighted" data-wd-id="Q16244">Province of Pisa</span>. Although <span class="annotation highlighted" data-wd-id="Q13375">Pisa</span> is known worldwide for its <span class="annotation highlighted" data-wd-id="Q39054">leaning tower</span> (the <span class="annotation highlighted" data-wd-id="Q200334">bell tower</span> of the city's cathedral), the city of over 89,940 residents (around 200,000 with the metropolitan area) contains more than <span class="annotation highlighted" data-wd-id="Q40292">20</span> other historic <span class="annotation highlighted" data-wd-id="Q16970">churches</span>, several medieval palaces and various <span class="annotation highlighted" data-wd-id="Q12280">bridges</span> across the River <span class="annotation highlighted" data-wd-id="Q115457">Arno</span>. Much of the city's architecture was financed from its history as one of the <span class="annotation highlighted" data-wd-id="Q62825">Italian maritime republics</span>.
The city is also home of the <span class="annotation highlighted" data-wd-id="Q645663">University of Pisa</span>, which has a history going back to the <span class="annotation highlighted" data-wd-id="Q7061">12th century</span> and also has the mythic Napoleonic <span class="annotation highlighted" data-wd-id="Q672416">Scuola Normale Superiore</span> and <span class="annotation highlighted" data-wd-id="Q774609">Sant'Anna School of Advanced Studies</span> as the best sanctioned Superior Graduate Schools in <span class="annotation highlighted" data-wd-id="Q38">Italy</span>.
</div>
</div>
<script src="index.js"></script>
</body>
</html>
// Generated by CoffeeScript 1.10.0
/* Index creation
*/
(function() {
var filter, search, search_input, search_results, select;
window.class_index = {};
d3.selectAll('.annotation[data-wd-id]')[0].forEach(function(d) {
var url;
url = "https://query.wikidata.org/sparql?query=SELECT%20DISTINCT%20%3Fclass%20%7B%7B%20wd%3A" + d.dataset.wdId + "%20wdt%3AP31%20%3Fsclass%20.%20%3Fsclass%20wdt%3AP279*%20%3Fclass%20.%20%7D%20UNION%20%7B%20wd%3A" + d.dataset.wdId + "%20wdt%3AP279*%20%3Fclass%20.%20%7D%7D&format=json";
return d3.json(url, function(error, data) {
return data.results.bindings.forEach(function(c) {
var key;
key = c["class"].value.split('/').slice(-1)[0];
if (!(key in class_index)) {
class_index[key] = [];
}
return class_index[key].push(d.dataset.wdId);
});
});
});
/* Searching features
*/
search_input = d3.select('#header input').on('keydown', function() {
if (event.keyCode === 13) {
return search();
}
});
d3.select('#search_button').on('click', function() {
return search();
});
search_results = d3.select('#search_results');
/* Selects a class and highlights the corresponding instance within the text
*/
select = function(node, id) {
d3.select('.selected').classed('selected', false);
d3.select(node).classed('selected', true);
d3.selectAll(".annotation[data-wd-id]").classed('highlighted', true);
d3.selectAll(".annotation[data-wd-id]").classed('selected', false);
return class_index[id].forEach(function(d) {
d3.selectAll(".annotation[data-wd-id=" + d + "]").classed('selected', true);
return d3.selectAll(".annotation[data-wd-id=" + d + "]").classed('highlighted', false);
});
};
/* Filters the data coming from Wikidata using the index
*/
filter = function(data) {
return data = data.filter(function(d) {
return d.id in class_index;
});
};
/* Searches for a string inserted within the textbox
*/
search = function() {
var query_string;
query_string = d3.select('#header input').node().value;
if (query_string !== '') {
return d3.json("api.php?text=" + query_string, function(error, data) {
var count, enter_results, resource, results;
data = filter(data);
data.sort(function(a, b) {
return d3.descending(class_index[a.id].length, class_index[b.id].length);
});
search_results.select('.no_results').remove();
if (data.length === 0) {
search_results.append('div').attr({
"class": 'no_results'
}).html("No results found for <i>" + query_string + "</i>.");
}
results = search_results.selectAll('.result').data(data, function(d) {
return d.id;
});
enter_results = results.enter().append('div').attr({
"class": 'result'
}).on('click', function(d) {
return select(this, d.id);
});
results.order();
resource = enter_results.append('div').attr({
"class": 'resource'
});
resource.html(function(d) {
return "<span class='label'>" + d.label + "</span> <span>(<a target='_blank' class='link' href='" + d.concepturi + "'>" + d.id + "</a>)</span><div class='description'>" + d.description + "</div>";
});
count = enter_results.append('div').attr({
"class": 'count'
}).text(function(d) {
return class_index[d.id].length;
});
count.append('title').text(function(d) {
return class_index[d.id].length + " " + d.label + " has been found";
});
return results.exit().remove();
});
}
};
}).call(this);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment