Create a gist now

Instantly share code, notes, and snippets.

Manuscript Transcription
`
// noprotect
`
### Manuscript EDITOR
###
width = 960/2
height = 500
margin_up = 60
margin_left = 35
diameter = width*1.4
max_depth = 3
distance = 40
char_window = 15
tree = d3.layout.tree()
.size [0, 0]
diagonal = d3.svg.diagonal()
.projection (d) -> [d.y, d.x]
color = d3.scale.ordinal()
.domain ['line', 'sentence', 'abbreviation', 'named_entity', 'page']
.range ['#1f77b4', '#d62728', '#2ca02c', '#ff7f0e', '#9467bd']
svg = d3.select 'svg'
.append 'g'
.attr
transform: "translate(#{margin_left}, #{margin_up})"
svg.append 'text'
.text 'Content'
.attr
class: 'title'
x: -23
y: -35
svg.append 'text'
.text 'Annotations'
.attr
class: 'title'
x: 125
y: -35
CodeMirror.defineSimpleMode('mtss', {
start: [
{regex: new RegExp('\\|\\|'), token: 'sentence'},
#{regex: new RegExp('\\n---\\n'), token: 'page'},
{regex: new RegExp('(\\[)([^\\]]*)(\\])(\\()([^\\)]*)(\\))'), token: ['choice_square','choice_abbr','choice_square','choice_round','choice_expan','choice_round']},
{regex: new RegExp('(\\<)([^\\>]*)(\\>)(\\()([^\\)]*)(\\))'), token: ['entity_angular','entity_text','entity_angular','entity_round','entity_uri','entity_round']},
{regex: new RegExp('{{'), token: 'w', next: 'w'}
],
w: [
{regex: new RegExp('}}'), token: 'w', next: 'start'},
{regex: new RegExp('.'), token: 'w_content'}
]
})
editor = CodeMirror.fromTextArea document.getElementById('editor'), {
mode: 'mtss',
lineNumbers: true,
lineWrapping: true
}
# Returns the plain text without the annotation syntax
clean = (text, index) ->
if index is undefined
index = -1
replaces = [
{regex: new RegExp('\\|\\|','g'), replace_text: ''},
{regex: new RegExp('{{','g'), replace_text: ''},
{regex: new RegExp('}}','g'), replace_text: ''},
{regex: new RegExp('\\[([^\\]]*)\\]\\(([^\\)]*)\\)','g'), replace_text: '$1'},
{regex: new RegExp('\\<([^\\>]*)\\>\\(([^\\)]*)\\)','g'), replace_text: '$1'}
]
for r,i in replaces
if i isnt index
text = text.replace(r.regex, r.replace_text)
text
get_label = (node, text) ->
text = clean(text)
switch node.name
when 'line'
if node.start isnt node.end-1
text.slice(node.start, (if node.end-node.start > char_window then node.start+char_window else node.end)) + (if node.end-node.start > char_window then " ... " else "") + text.slice(node.end-(if node.end-node.start > char_window then char_window else 0),node.end)
else ""
when 'sentence'
if node.start isnt node.end-1
text.slice(node.start, (if node.end-node.start > char_window then char_window+node.start else node.end)) + (if node.end-node.start > char_window then " ... " else "") + text.slice(node.end-(if node.end-node.start > char_window then char_window else 0),node.end)
else ""
when 'abbreviation' then "#{text.slice(node.start-1,node.end)}#{node.extra}"
when 'named_entity' then node.extra
### TEXT TRANSLATION
Every time the text is changed new annotations are searched, the diagram is updated
###
editor.on 'change', () -> find_annotations()
find_annotations = () ->
### DATA CONSTRUCTION
###
text = editor.getValue()
# Lines
lines_data = []
lines = clean(text).split '\n'
for l,i in lines
if i != lines.length-1
lines[i] += '\n'
lines_data.push {"name": "line", "index": lines_data.length, "start": lines.slice(0,i).join('').length, "end": lines.slice(0,i).join('').length+lines[i].length}
# Sentences
sentences_data = []
sentences = clean(text, 0).split '||'
for s,i in sentences
sentences_data.push {"name": "sentence", "index": sentences_data.length, "start": sentences.slice(0,i).join('').length, "end": sentences.slice(0,i).join('').length+sentences[i].length}
# Abbreviations
abbreviations_data = []
abbr_text = clean(text, 3)
abbreviations = abbr_text.match(new RegExp('\\[([^\\]]*)\\]\\(([^\\)]*)\\)','g'))
if abbreviations
for a,i in abbreviations
abbreviations_data.push {
"name": "abbreviation",
"index": abbreviations_data.length,
"extra": a.replace(new RegExp('\\[([^\\]]*)\\]\\(([^\\)]*)\\)','g'), '$2'),
"start": abbr_text.indexOf(a)+1,
"end": abbr_text.indexOf(a)+a.replace(new RegExp('\\[([^\\]]*)\\]\\(([^\\)]*)\\)','g'), '$1').length
}
abbr_text = abbr_text.replace(RegExp('\\[([^\\]]*)\\]\\(([^\\)]*)\\)'), '$1')
# Named Entities
named_entities_data = []
ne_text = clean(text, 4)
named_entities = ne_text.match(new RegExp('\\<([^\\>]*)\\>\\(([^\\)]*)\\)','g'))
if named_entities
for ne,i in named_entities
named_entities_data.push {
"name": "named_entity",
"index": named_entities_data.length,
"extra": ne.replace(new RegExp('\\<([^\\>]*)\\>\\(([^\\)]*)\\)','g'), '$2'),
"start": ne_text.indexOf(ne)+1,
"end": ne_text.indexOf(ne)+ne.replace(new RegExp('\\<([^\\>]*)\\>\\(([^\\)]*)\\)','g'), '$1').length}
ne_text = ne_text.replace(RegExp('\\<([^\\>]*)\\>\\(([^\\)]*)\\)'), '$1')
### VISUALIZATION
###
data = {"name": "content", "children": lines_data.concat(sentences_data).concat(abbreviations_data).concat(named_entities_data), "start": -1, "end": -1}
data.children.sort (a,b) -> if a.start isnt b.start then d3.ascending(a.start, b.start) else d3.descending(a.name, b.name)
nodes = tree.nodes data
links = tree.links nodes
h = 0
nodes.forEach (n) ->
if n.parent? and n.parent.children[0] isnt n
h += distance
n.x = h
n.y = n.depth * (width / max_depth)
# Links
link = svg.selectAll '.link'
.data links, (d) -> "#{d.target.name}_#{d.target.start}_#{d.target.end}_#{d.extra}"
link.enter().append 'path'
.attr
class: 'link'
link
.attr
d: diagonal
link.exit().remove()
# Nodes
node = svg.selectAll '.node'
.data (nodes.filter (d) -> d.depth > 0), ((d,i) -> "#{d.name}_#{d.start}_#{d.end}_#{d.extra}")
enter_node = node.enter().append 'g'
.attr
class: 'node'
enter_node.append 'text'
.attr
class: 'label_details'
x: 20
y: 11
enter_node.append 'text'
.attr
class: 'label_class'
x: 20
y: -2
enter_node.append 'title'
enter_node.append 'circle'
.attr
r: 15
fill: (d) -> color d.name
stroke: 'transparent'
node.select '.label_details'
.html (d) ->
t = get_label d, text
if t.indexOf('http://') isnt -1 then "<a class='ne_link' target='_blank' xlink:href='#{t}'>#{t}</a>" else t
node.select '.label_class'
.text (d) -> "#{d.name.toUpperCase().replace(/_/, ' ')} #{d.index+1}"
.attr
fill: (d) -> color d.name
node.select 'title'
.text (d) -> "#{d.name}\nstart:#{d.start}\nend:#{d.end}"+(if d.extra? then "\n#{d.extra}" else "")
node
.attr
transform: (d) -> "translate(#{d.y},#{d.x})"
node.exit().remove()
# Root Node is separately treated to keep it over the edges
d3.select '.rootnode'
.remove()
svg.append 'circle'
.attr
class: 'rootnode'
transform: (d) -> "translate(0,0)"
'stroke-width': 1.5
r: 20
fill: '#FFF'
stroke: 'gray'
find_annotations()
### Sentence highlighting
###
current_sentence = null
editor.on 'cursorActivity', () ->
cursor = editor.getCursor()
search_cursor = editor.getSearchCursor('||', cursor)
search_cursor.findPrevious()
from = search_cursor.pos.to
search_cursor.findNext()
to = search_cursor.pos.from
if current_sentence?
current_sentence.clear()
current_sentence = editor.markText(from, to, {className: 'sentence_highlight'})
svg {
background: white;
}
.CodeMirror-gutter-elt {
color: #1f77b4;
}
.cm-sentence{
font-weight: bold;
color: #d62728;
}
.cm-page{
font-weight: bold;
color: #9467bd;
}
.cm-choice_square, .cm-choice_round {
font-weight: bold;
color: #2ca02c;
}
.cm-entity_angular, .cm-entity_round {
font-weight: bold;
color: #ff7f0e;
}
.cm-w {
font-weight: bold;
color: #092;
}
.cm-w_content {
color: #092;
}
.cm-choice_expan {
font-style: italic;
color: #2ca02c;
opacity: 0.6;
}
.cm-entity_uri {
font-style: italic;
color: #ff7f0e;
opacity: 0.6;
}
.cm-sentence-2 {
background: yellow;
}
#editor {
flex: 1;
}
.CodeMirror {
flex: 1;
height: 500px;
line-height: normal;
}
svg {
margin: 0;
border-left: 2px solid gray;
background: #EEE;
white-space: pre-wrap;
overflow-y: scroll;
height: 500px;
flex: 1;
}
body {
display: -webkit-box; /* OLD - iOS 6-, Safari 3.1-6 */
display: -moz-box; /* OLD - Firefox 19- (buggy but mostly works) */
display: -ms-flexbox; /* TWEENER - IE 10 */
display: -webkit-flex; /* NEW - Chrome */
display: flex; /* NEW, Spec - Opera 12.1, Firefox 20+ */
-ms-flex-flow: row;
-webkit-flex-flow: row;
flex-flow: row;
}
.sentence_highlight {
/*background: rgba(255,255,0,0.15);*/
}
.title {
font-family: sans-serif;
font-size: 14px;
font-weight: bold;
}
.node circle {
stroke-width: 1.5
}
.node circle:hover {
opacity: 0.8
}
.link {
fill: none;
stroke: #ccc;
stroke-width: 1.5px;
}
.label_details {
font-family: sans-serif;
font-size: 12px;
}
.label_class {
font-family: sans-serif;
font-size: 10px;
}
.ne_link {
cursor: pointer;
text-decoration: underline;
}
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="description" content="Manuscript Transcription Simple Syntax" />
<title>Manuscript Transcription</title>
<link type="text/css" href="//cdnjs.cloudflare.com/ajax/libs/codemirror/4.6.0/codemirror.min.css" rel="stylesheet"/>
<link type="text/css" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css" rel="stylesheet"/>
<link type="text/css" href="index.css" rel="stylesheet"/>
<script src="http://d3js.org/d3.v3.min.js"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/codemirror/4.6.0/codemirror.min.js"></script>
<script src="//wafi.iit.cnr.it/webvis/tmp/codemirror_mode_simple.js"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/codemirror/4.6.0/addon/search/searchcursor.min.js"></script>
</head>
<body>
<textarea id="editor">
<Christopher Clavius>(http://dbpedia.org/resource/Christopher_Clavius) was a
German Jesuit mathematician and astronomer who
modified the proposal of the modern Gregorian calendar
after the death of its primary author, <Aloysius Lilius>(http://dbpedia.org/resource/Aloysius_Lilius).|| Clavius would later write defences and an
explanation of the reformed calendar, including an
emphatic [ack.](acknowledgement) of Lilio's work.</textarea>
<svg></svg>
<script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script>
<script src="index.js"></script>
</body>
</html>
// Generated by CoffeeScript 1.10.0
(function() {
// noprotect
;
/* Manuscript EDITOR
*/
var char_window, clean, color, current_sentence, diagonal, diameter, distance, editor, find_annotations, get_label, height, margin_left, margin_up, max_depth, svg, tree, width;
width = 960 / 2;
height = 500;
margin_up = 60;
margin_left = 35;
diameter = width * 1.4;
max_depth = 3;
distance = 40;
char_window = 15;
tree = d3.layout.tree().size([0, 0]);
diagonal = d3.svg.diagonal().projection(function(d) {
return [d.y, d.x];
});
color = d3.scale.ordinal().domain(['line', 'sentence', 'abbreviation', 'named_entity', 'page']).range(['#1f77b4', '#d62728', '#2ca02c', '#ff7f0e', '#9467bd']);
svg = d3.select('svg').append('g').attr({
transform: "translate(" + margin_left + ", " + margin_up + ")"
});
svg.append('text').text('Content').attr({
"class": 'title',
x: -23,
y: -35
});
svg.append('text').text('Annotations').attr({
"class": 'title',
x: 125,
y: -35
});
CodeMirror.defineSimpleMode('mtss', {
start: [
{
regex: new RegExp('\\|\\|'),
token: 'sentence'
}, {
regex: new RegExp('(\\[)([^\\]]*)(\\])(\\()([^\\)]*)(\\))'),
token: ['choice_square', 'choice_abbr', 'choice_square', 'choice_round', 'choice_expan', 'choice_round']
}, {
regex: new RegExp('(\\<)([^\\>]*)(\\>)(\\()([^\\)]*)(\\))'),
token: ['entity_angular', 'entity_text', 'entity_angular', 'entity_round', 'entity_uri', 'entity_round']
}, {
regex: new RegExp('{{'),
token: 'w',
next: 'w'
}
],
w: [
{
regex: new RegExp('}}'),
token: 'w',
next: 'start'
}, {
regex: new RegExp('.'),
token: 'w_content'
}
]
});
editor = CodeMirror.fromTextArea(document.getElementById('editor'), {
mode: 'mtss',
lineNumbers: true,
lineWrapping: true
});
clean = function(text, index) {
var i, j, len, r, replaces;
if (index === void 0) {
index = -1;
}
replaces = [
{
regex: new RegExp('\\|\\|', 'g'),
replace_text: ''
}, {
regex: new RegExp('{{', 'g'),
replace_text: ''
}, {
regex: new RegExp('}}', 'g'),
replace_text: ''
}, {
regex: new RegExp('\\[([^\\]]*)\\]\\(([^\\)]*)\\)', 'g'),
replace_text: '$1'
}, {
regex: new RegExp('\\<([^\\>]*)\\>\\(([^\\)]*)\\)', 'g'),
replace_text: '$1'
}
];
for (i = j = 0, len = replaces.length; j < len; i = ++j) {
r = replaces[i];
if (i !== index) {
text = text.replace(r.regex, r.replace_text);
}
}
return text;
};
get_label = function(node, text) {
text = clean(text);
switch (node.name) {
case 'line':
if (node.start !== node.end - 1) {
return text.slice(node.start, (node.end - node.start > char_window ? node.start + char_window : node.end)) + (node.end - node.start > char_window ? " ... " : "") + text.slice(node.end - (node.end - node.start > char_window ? char_window : 0), node.end);
} else {
return "";
}
break;
case 'sentence':
if (node.start !== node.end - 1) {
return text.slice(node.start, (node.end - node.start > char_window ? char_window + node.start : node.end)) + (node.end - node.start > char_window ? " ... " : "") + text.slice(node.end - (node.end - node.start > char_window ? char_window : 0), node.end);
} else {
return "";
}
break;
case 'abbreviation':
return (text.slice(node.start - 1, node.end)) + "" + node.extra;
case 'named_entity':
return node.extra;
}
};
/* TEXT TRANSLATION
Every time the text is changed new annotations are searched, the diagram is updated
*/
editor.on('change', function() {
return find_annotations();
});
find_annotations = function() {
/* DATA CONSTRUCTION
*/
var a, abbr_text, abbreviations, abbreviations_data, data, enter_node, h, i, j, k, l, len, len1, len2, len3, lines, lines_data, link, links, m, named_entities, named_entities_data, ne, ne_text, node, nodes, o, s, sentences, sentences_data, text;
text = editor.getValue();
lines_data = [];
lines = clean(text).split('\n');
for (i = j = 0, len = lines.length; j < len; i = ++j) {
l = lines[i];
if (i !== lines.length - 1) {
lines[i] += '\n';
}
lines_data.push({
"name": "line",
"index": lines_data.length,
"start": lines.slice(0, i).join('').length,
"end": lines.slice(0, i).join('').length + lines[i].length
});
}
sentences_data = [];
sentences = clean(text, 0).split('||');
for (i = k = 0, len1 = sentences.length; k < len1; i = ++k) {
s = sentences[i];
sentences_data.push({
"name": "sentence",
"index": sentences_data.length,
"start": sentences.slice(0, i).join('').length,
"end": sentences.slice(0, i).join('').length + sentences[i].length
});
}
abbreviations_data = [];
abbr_text = clean(text, 3);
abbreviations = abbr_text.match(new RegExp('\\[([^\\]]*)\\]\\(([^\\)]*)\\)', 'g'));
if (abbreviations) {
for (i = m = 0, len2 = abbreviations.length; m < len2; i = ++m) {
a = abbreviations[i];
abbreviations_data.push({
"name": "abbreviation",
"index": abbreviations_data.length,
"extra": a.replace(new RegExp('\\[([^\\]]*)\\]\\(([^\\)]*)\\)', 'g'), '$2'),
"start": abbr_text.indexOf(a) + 1,
"end": abbr_text.indexOf(a) + a.replace(new RegExp('\\[([^\\]]*)\\]\\(([^\\)]*)\\)', 'g'), '$1').length
});
abbr_text = abbr_text.replace(RegExp('\\[([^\\]]*)\\]\\(([^\\)]*)\\)'), '$1');
}
}
named_entities_data = [];
ne_text = clean(text, 4);
named_entities = ne_text.match(new RegExp('\\<([^\\>]*)\\>\\(([^\\)]*)\\)', 'g'));
if (named_entities) {
for (i = o = 0, len3 = named_entities.length; o < len3; i = ++o) {
ne = named_entities[i];
named_entities_data.push({
"name": "named_entity",
"index": named_entities_data.length,
"extra": ne.replace(new RegExp('\\<([^\\>]*)\\>\\(([^\\)]*)\\)', 'g'), '$2'),
"start": ne_text.indexOf(ne) + 1,
"end": ne_text.indexOf(ne) + ne.replace(new RegExp('\\<([^\\>]*)\\>\\(([^\\)]*)\\)', 'g'), '$1').length
});
ne_text = ne_text.replace(RegExp('\\<([^\\>]*)\\>\\(([^\\)]*)\\)'), '$1');
}
}
/* VISUALIZATION
*/
data = {
"name": "content",
"children": lines_data.concat(sentences_data).concat(abbreviations_data).concat(named_entities_data),
"start": -1,
"end": -1
};
data.children.sort(function(a, b) {
if (a.start !== b.start) {
return d3.ascending(a.start, b.start);
} else {
return d3.descending(a.name, b.name);
}
});
nodes = tree.nodes(data);
links = tree.links(nodes);
h = 0;
nodes.forEach(function(n) {
if ((n.parent != null) && n.parent.children[0] !== n) {
h += distance;
}
n.x = h;
return n.y = n.depth * (width / max_depth);
});
link = svg.selectAll('.link').data(links, function(d) {
return d.target.name + "_" + d.target.start + "_" + d.target.end + "_" + d.extra;
});
link.enter().append('path').attr({
"class": 'link'
});
link.attr({
d: diagonal
});
link.exit().remove();
node = svg.selectAll('.node').data(nodes.filter(function(d) {
return d.depth > 0;
}), (function(d, i) {
return d.name + "_" + d.start + "_" + d.end + "_" + d.extra;
}));
enter_node = node.enter().append('g').attr({
"class": 'node'
});
enter_node.append('text').attr({
"class": 'label_details',
x: 20,
y: 11
});
enter_node.append('text').attr({
"class": 'label_class',
x: 20,
y: -2
});
enter_node.append('title');
enter_node.append('circle').attr({
r: 15,
fill: function(d) {
return color(d.name);
},
stroke: 'transparent'
});
node.select('.label_details').html(function(d) {
var t;
t = get_label(d, text);
if (t.indexOf('http://') !== -1) {
return "<a class='ne_link' target='_blank' xlink:href='" + t + "'>" + t + "</a>";
} else {
return t;
}
});
node.select('.label_class').text(function(d) {
return (d.name.toUpperCase().replace(/_/, ' ')) + " " + (d.index + 1);
}).attr({
fill: function(d) {
return color(d.name);
}
});
node.select('title').text(function(d) {
return (d.name + "\nstart:" + d.start + "\nend:" + d.end) + (d.extra != null ? "\n" + d.extra : "");
});
node.attr({
transform: function(d) {
return "translate(" + d.y + "," + d.x + ")";
}
});
node.exit().remove();
d3.select('.rootnode').remove();
return svg.append('circle').attr({
"class": 'rootnode',
transform: function(d) {
return "translate(0,0)";
},
'stroke-width': 1.5,
r: 20,
fill: '#FFF',
stroke: 'gray'
});
};
find_annotations();
/* Sentence highlighting
*/
current_sentence = null;
editor.on('cursorActivity', function() {
var cursor, from, search_cursor, to;
cursor = editor.getCursor();
search_cursor = editor.getSearchCursor('||', cursor);
search_cursor.findPrevious();
from = search_cursor.pos.to;
search_cursor.findNext();
to = search_cursor.pos.from;
if (current_sentence != null) {
current_sentence.clear();
}
return current_sentence = editor.markText(from, to, {
className: 'sentence_highlight'
});
});
}).call(this);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment