Skip to content

Instantly share code, notes, and snippets.

@kleem
Last active August 29, 2015 14:02
Show Gist options
  • Save kleem/4cbcd2e37c7415663f1b to your computer and use it in GitHub Desktop.
Save kleem/4cbcd2e37c7415663f1b to your computer and use it in GitHub Desktop.
Ruby annotations

A different take on the previous example: linguistic annotations is represented by using Ruby annotations and their relative CSS properties. See this article by Richard Ishida from W3C for more information.

This implementation should be better than the previous one from a semantic web perspective, since ruby tags more or less describe the semantic of an annotation. It also has the advantage of having no CSS voodoo (with the exception of some -webkit- prefixed property). Unfortunately, browser support is still incomplete, so it may not work on your browser of choice (works on Chrome 31 for sure).

[
[
{"token":"Halley's Comet" , "lemma":"Halley's Comet", "pos":"noun" , "ne": {"class": "astronomical_object", "id": "http://en.wikipedia.org/wiki/Halley%27s_Comet"}},
{"token":" "},
{"token":"or" , "lemma":"or" , "pos":"conjunction"},
{"token":" "},
{"token":"Comet Halley" , "lemma":"Comet Halley" , "pos":"noun" , "ne": {"class": "astronomical_object", "id": "http://en.wikipedia.org/wiki/Halley%27s_Comet"}},
{"token":" "},
{"token":"is" , "lemma":"be" , "pos":"verb"},
{"token":" "},
{"token":"the" , "lemma":"the" , "pos":"article"},
{"token":" "},
{"token":"best-known" , "lemma":"best-known" , "pos":"adjective"},
{"token":" "},
{"token":"of" , "lemma":"of" , "pos":"preposition"},
{"token":" "},
{"token":"the" , "lemma":"the" , "pos":"article"},
{"token":" "},
{"token":"short-period" , "lemma":"short-period" , "pos":"adjective"},
{"token":" "},
{"token":"comets" , "lemma":"comet" , "pos":"noun"},
{"token":" "},
{"token":"and" , "lemma":"and" , "pos":"conjunction"},
{"token":" "},
{"token":"is" , "lemma":"be" , "pos":"verb"},
{"token":" "},
{"token":"visible" , "lemma":"visible" , "pos":"adjective"},
{"token":" "},
{"token":"from" , "lemma":"from" , "pos":"preposition"},
{"token":" "},
{"token":"Earth" , "lemma":"Earth" , "pos":"noun" , "ne": {"class": "astronomical_object", "id": "http://en.wikipedia.org/wiki/Earth"}},
{"token":" "},
{"token":"every" , "lemma":"every" , "pos":"adverb"},
{"token":" "},
{"token":"75" , "lemma":"75" , "pos":"adjective"},
{"token":"–"},
{"token":"76" , "lemma":"76" , "pos":"adjective"},
{"token":" "},
{"token":"years" , "lemma":"year" , "pos":"noun"},
{"token":".\n"}
],[
{"token":"Halley's Comet" , "lemma":"Halley's Comet", "pos":"noun" , "ne": {"class": "astronomical_object", "id": "http://en.wikipedia.org/wiki/Halley%27s_Comet"}},
{"token":" "},
{"token":"returns" , "lemma":"return" , "pos":"noun"},
{"token":" "},
{"token":"to" , "lemma":"to" , "pos":"preposition"},
{"token":" "},
{"token":"the" , "lemma":"the" , "pos":"article"},
{"token":" "},
{"token":"inner" , "lemma":"inner" , "pos":"adjective"},
{"token":" "},
{"token":"Solar System" , "lemma":"Solar System" , "pos":"noun" , "ne": {"class": "astronomical_object", "id": "http://en.wikipedia.org/wiki/Solar_System"}},
{"token":" "},
{"token":"have been observed", "lemma":"observe" , "pos":"verb"},
{"token":" "},
{"token":"and" , "lemma":"and" , "pos":"conjunction"},
{"token":" "},
{"token":"recorded" , "lemma":"record" , "pos":"verb"},
{"token":" "},
{"token":"by" , "lemma":"by" , "pos":"preposition"},
{"token":" "},
{"token":"astronomers" , "lemma":"astronomer" , "pos":"noun"},
{"token":" "},
{"token":"since" , "lemma":"since" , "pos":"preposition"},
{"token":" "},
{"token":"at least" , "lemma":"at least" , "pos":"adverb"},
{"token":" "},
{"token":"240 BCE" , "lemma":"240 BCE" , "pos":"noun" , "ne": {"class": "date", "id": "-239"}},
{"token":".\n"}
],[
{"token":"The" , "lemma":"the" , "pos":"article"},
{"token":" "},
{"token":"comet's" , "lemma":"comet" , "pos":"noun"},
{"token":" "},
{"token":"periodicity" , "lemma":"periodicity" , "pos":"noun"},
{"token":" "},
{"token":"was determined" , "lemma":"determine" , "pos":"verb"},
{"token":" "},
{"token":"in" , "lemma":"in" , "pos":"preposition"},
{"token":" "},
{"token":"1705" , "lemma":"1705" , "pos":"noun" , "ne": {"class": "date", "id": "1705"}},
{"token":" "},
{"token":"by" , "lemma":"by" , "pos":"preposition"},
{"token":" "},
{"token":"English" , "lemma":"English" , "pos":"adjective"},
{"token":"\n"},
{"token":"astronomer" , "lemma":"astronomer" , "pos":"noun"},
{"token":" "},
{"token":"Edmond Halley" , "lemma":"Edmond Halley" , "pos":"noun" , "ne": {"class": "person", "id": "http://en.wikipedia.org/wiki/Edmond_Halley"}},
{"token":", "},
{"token":"after" , "lemma":"after" , "pos":"preposition"},
{"token":" "},
{"token":"whom" , "lemma":"whom" , "pos":"pronoun"},
{"token":" "},
{"token":"it" , "lemma":"it" , "pos":"pronoun"},
{"token":" "},
{"token":"is named" , "lemma":"name" , "pos":"verb"},
{"token":". "}
],[
{"token":"Halley's Comet" , "lemma":"Halley's Comet", "pos":"noun" , "ne": {"class": "astronomical_object", "id": "http://en.wikipedia.org/wiki/Halley%27s_Comet"}},
{"token":" "},
{"token":"last" , "lemma":"last" , "pos":"adverb"},
{"token":" "},
{"token":"appeared" , "lemma":"appear" , "pos":"verb"},
{"token":" "},
{"token":"in" , "lemma":"in" , "pos":"preposition"},
{"token":" "},
{"token":"the" , "lemma":"the" , "pos":"article"},
{"token":" "},
{"token":"inner" , "lemma":"inner" , "pos":"adjective"},
{"token":" "},
{"token":"Solar System" , "lemma":"Solar System" , "pos":"noun" , "ne": {"class": "astronomical_object", "id": "http://en.wikipedia.org/wiki/Solar_System"}},
{"token":" "},
{"token":"in" , "lemma":"in" , "pos":"preposition"},
{"token":" "},
{"token":"1986" , "lemma":"1986" , "pos":"noun" , "ne": {"class": "date", "id": "1986"}},
{"token":" "},
{"token":"and" , "lemma":"and" , "pos":"conjunction"},
{"token":" "},
{"token":"will appear" , "lemma":"appear" , "pos":"verb"},
{"token":" "},
{"token":"in" , "lemma":"in" , "pos":"preposition"},
{"token":" "},
{"token":"2061" , "lemma":"2061" , "pos":"noun" , "ne": {"class": "date", "id": "2061"}},
{"token":".\n"}
]
]
window.main = () ->
d3.json 'halley.json', (error, corpus) ->
return console.warn(error) if error
# pos colors
pos_color = d3.scale.ordinal()
.domain(['noun','verb','adjective','adverb','pronoun','conjunction','preposition','article'])
.range(['#335BE2','#EF1D84','#FFBA1F','#57BF00','#24A6DE','#CCC','#D197C4','#CCC'])
vis = d3.select('body')
sentences = vis.selectAll('.sentence')
.data(corpus)
.enter().append('span')
.attr('class', 'sentence')
new_tokens = sentences.selectAll('.token')
.data((d) -> d)
.enter().append('span')
.attr('class', 'token')
### draw annotated tokens ###
rubys = new_tokens.filter((d) -> d.lemma?)
.append('ruby')
rubys.append('rb')
.html((d) -> d.token.replace /\n/g, '<br/>')
rubys.append('rt')
.text((d) -> d.lemma)
.style('border-top', (d) -> "2px solid #{pos_color(d.pos)}")
.style('color', (d) -> pos_color(d.pos))
### draw non-annotated tokens (e.g. spaces) ###
new_tokens.filter((d) -> not d.lemma?)
.append('span')
.html((d) -> d.token.replace /\n/g, '<br/>')
.token {
font-family: sans-serif;
font-size: 10pt;
}
.sentence {
padding: 2px;
}
.sentence::before {
content: "■ ";
color: #aaaaaa;
}
.sentence {
line-height: 2.5em;
}
ruby {
ruby-position: after;
-webkit-ruby-position: after;
}
rb {
padding-bottom: 4px;
}
rt {
font-size: 8pt;
text-align: center;
}
body {
padding: 40px;
}
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Ruby annotations</title>
<link type="text/css" href="index.css" rel="stylesheet"/>
<script src="http://d3js.org/d3.v3.min.js"></script>
<script src="index.js"></script>
</head>
<body onload="main()"></body>
</html>
(function() {
window.main = function() {
return d3.json('halley.json', function(error, corpus) {
var new_tokens, pos_color, rubys, sentences, vis;
if (error) return console.warn(error);
pos_color = d3.scale.ordinal().domain(['noun', 'verb', 'adjective', 'adverb', 'pronoun', 'conjunction', 'preposition', 'article']).range(['#335BE2', '#EF1D84', '#FFBA1F', '#57BF00', '#24A6DE', '#CCC', '#D197C4', '#CCC']);
vis = d3.select('body');
sentences = vis.selectAll('.sentence').data(corpus).enter().append('span').attr('class', 'sentence');
new_tokens = sentences.selectAll('.token').data(function(d) {
return d;
}).enter().append('span').attr('class', 'token');
/* draw annotated tokens
*/
rubys = new_tokens.filter(function(d) {
return d.lemma != null;
}).append('ruby');
rubys.append('rb').html(function(d) {
return d.token.replace(/\n/g, '<br/>');
});
rubys.append('rt').text(function(d) {
return d.lemma;
}).style('border-top', function(d) {
return "2px solid " + (pos_color(d.pos));
}).style('color', function(d) {
return pos_color(d.pos);
});
/* draw non-annotated tokens (e.g. spaces)
*/
return new_tokens.filter(function(d) {
return !(d.lemma != null);
}).append('span').html(function(d) {
return d.token.replace(/\n/g, '<br/>');
});
});
};
}).call(this);
.token
font-family: sans-serif
font-size: 10pt
.sentence
padding: 2px
.sentence::before
content: '■ '
color: #AAA
// lemma
.sentence
line-height: 2.5em
ruby
ruby-position: after
-webkit-ruby-position: after
rb
padding-bottom: 4px
rt
font-size: 8pt
text-align: center
// bl.ocks
body
padding: 40px
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment