Skip to content

Instantly share code, notes, and snippets.

@nitaku
Last active February 1, 2016 16:54
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nitaku/bc6fe6041d201b32a5ae to your computer and use it in GitHub Desktop.
Save nitaku/bc6fe6041d201b32a5ae to your computer and use it in GitHub Desktop.
Text Annotation (Jison)
`
// noprotect
`
prefixes =
rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
owl: 'http://www.w3.org/2002/07/owl#'
rdfs: 'http://www.w3.org/2000/01/rdf-schema#'
foaf: 'http://xmlns.com/foaf/0.1/'
dbo: 'http://dbpedia.org/ontology/'
dbr: 'http://dbpedia.org/resource/'
wiki: 'http://en.wikipedia.org/wiki/'
### Manuscript EDITOR
###
width = 960/2
height = 500
diameter = width*1.4
max_depth = 3
distance = 40
char_window = 15
svg = d3.select 'svg'
vis = svg.append 'g'
defs = svg.append('defs')
R = 18
### define arrow markers for graph links ###
defs.append('marker')
.attr
id: 'end-arrow'
viewBox: '0 0 10 10'
refX: 5+R
refY: 5
orient: 'auto'
.append('path')
.attr
d: 'M0,0 L0,10 L10,5 z'
CodeMirror.defineSimpleMode('mtss', {
start: [
{regex: new RegExp('(\\<)([^\\>]*)(\\>)(\\()([^\\)]*)(\\))'), token: ['span_symbol','span_text','span_symbol','entity_symbol','entity_id','entity_symbol']},
{regex: new RegExp('^___$'), token: 'start_directives', next: 'directives'}
],
directives: [
{regex: new RegExp('(\\()(.*)(\\))'), token: ['entity_symbol','entity_id','entity_symbol']},
{regex: new RegExp('.'), token: 'directive'}
]
})
editor = CodeMirror.fromTextArea document.getElementById('editor'), {
mode: 'mtss',
lineNumbers: false,
lineWrapping: true,
gutters: ['error_gutter']
}
### TEXT TRANSLATION
Every time the text is changed new annotations are searched, the diagram is updated
###
editor.on 'change', () -> find_annotations()
find_annotations = () ->
### DATA CONSTRUCTION
###
text = editor.getValue()
editor.clearGutter('error_gutter')
parser.parse(text)
### GRAPH
###
content = {type: 'content'}
nodes = [content]
links = []
graph = {nodes: nodes, links: links}
entity_index = {}
parser.directives.forEach (d) ->
if d.id not of entity_index
n = {type: 'entity', id: d.id}
nodes.push n
entity_index[d.id] = n
else
n = entity_index[d.id]
d.popairs.forEach (p) ->
ext_n = {type: 'external', id: p.object}
nodes.push ext_n
links.push {source: n, target: ext_n, type: 'predicate', predicate: p.predicate}
parser.annotations.forEach (a, i) ->
n = {type: 'span', id: i}
nodes.push n
links.push {source: content, target: n, start: a.start, end: a.end, type: 'locus', inverted: true}
links.push {source: n, target: entity_index[a.id], type: 'about'}
### VISUALIZATION
###
### store the node index into the node itself ###
for n, i in graph.nodes
n.i = i
### store neighbor nodes into each node ###
for n, i in graph.nodes
n.in_neighbors = []
n.out_neighbors = []
for l in graph.links
l.source.out_neighbors.push l.target
l.target.in_neighbors.push l.source
### compute longest distances ###
topological_order = tsort(graph.links.map (l) -> [l.source.i, l.target.i])
# console.debug 'Topological order: ' + topological_order
for n in graph.nodes
n.longest_dist = -Infinity
# Root nodes have no incoming links
graph.nodes.forEach (n) ->
if n.in_neighbors.length is 0
n.longest_dist = switch n.type
when 'content' then 0
when 'entity' then 2
for i in topological_order # control direction
n = graph.nodes[i]
for nn in n.out_neighbors # control direction
if nn.longest_dist < n.longest_dist+1
nn.longest_dist = n.longest_dist+1
graph.constraints = []
### create the alignment contraints ###
levels = _.uniq graph.nodes.map (n) -> n.longest_dist
levels.sort() # this seems to be necessary
# console.log levels
levels.forEach (depth) ->
graph.constraints.push {
type: 'alignment',
axis: 'x',
offsets: graph.nodes.filter((n) -> n.longest_dist is depth).map (n) -> {
node: n.i,
offset: 0
}
}
PAD_MULTIPLIER = 3.5
### create the position contraints ###
levels.forEach (depth, i) ->
if i < levels.length-1
n1 = _.find graph.nodes, (n) -> n.longest_dist is depth
n2 = _.find graph.nodes, (n) -> n.longest_dist is depth+1
graph.constraints.push {
axis: 'x',
left: n1.i,
right: n2.i,
gap: if depth < 2 then 5*R else 8*R
}
### create nodes and links ###
vis.selectAll '.link'
.remove()
vis.selectAll '.node'
.remove()
vis.selectAll '.link_label'
.remove()
links = vis.selectAll '.link'
.data graph.links
enter_links = links
.enter().append 'line'
.attr
class: (d) -> "link #{d.type}"
enter_links.append 'title'
.text (d) -> 'link'
link_labels = vis.selectAll '.link_label'
.data graph.links
link_labels.enter().append 'text'
.text (d) ->
if d.type is 'predicate'
return d.predicate
else
return d.type
.attr
class: 'link_label'
nodes = vis.selectAll '.node'
.data graph.nodes
enter_nodes = nodes.enter().append 'g'
.attr
class: (d) -> "node #{d.type}"
enter_nodes.append 'circle'
.attr
r: R
enter_as = enter_nodes.append 'a'
.attr
target: '_blank'
enter_as.filter (d) -> d.type is 'external'
.attr
class: 'valid'
'xlink:href': (d) ->
if d.type isnt 'external'
return ''
splitted = (''+d.id).split ':'
if splitted[0] is 'http'
return d.id
else
return prefixes[splitted[0]] + splitted[1]
enter_as.append 'text'
.text (d) ->
switch d.type
when 'external' then d.id
when 'content' then 'CONTENT'
when 'span' then "< >"
when 'entity' then "(#{d.id})"
.attr
class: 'label'
dy: '0.35em'
### cola layout ###
graph.nodes.forEach (v, i) ->
v.width = PAD_MULTIPLIER*R
v.height = PAD_MULTIPLIER*R
# useful to untangle the graph
v.x = i
v.y = i
content.x = R + 20
content.y = height/2
content.fixed = true
d3cola = cola.d3adaptor()
.size([width, height])
.linkDistance(60)
.constraints(graph.constraints)
.avoidOverlaps(true)
.nodes(graph.nodes)
.links(graph.links)
.on 'tick', () ->
### update nodes and links ###
nodes
.attr('transform', (d) -> "translate(#{d.x},#{d.y})")
links.filter (d) -> not d.inverted
.attr('x1', (d) -> d.source.x)
.attr('y1', (d) -> d.source.y)
.attr('x2', (d) -> d.target.x)
.attr('y2', (d) -> d.target.y)
links.filter (d) -> d.inverted
.attr('x1', (d) -> d.target.x)
.attr('y1', (d) -> d.target.y)
.attr('x2', (d) -> d.source.x)
.attr('y2', (d) -> d.source.y)
link_labels
.attr
transform: (d) -> "translate(#{(d.source.x+d.target.x)/2} #{(d.source.y+d.target.y)/2}) rotate(#{ Math.atan2((d.target.y-d.source.y),(d.target.x-d.source.x))/Math.PI/2*360 }) translate(0,-5)"
enter_nodes
.call(d3cola.drag)
d3cola.start(100,30,30)
find_annotations()
### Sentence highlighting
###
current_sentence = null
editor.on 'cursorActivity', () ->
cursor = editor.getCursor()
search_cursor = editor.getSearchCursor('||', cursor)
search_cursor.findPrevious()
from = search_cursor.pos.to
search_cursor.findNext()
to = search_cursor.pos.from
if current_sentence?
current_sentence.clear()
current_sentence = editor.markText(from, to, {className: 'sentence_highlight'})
### Error reporting ###
window.on_code_error = (message, details) ->
error_marker = d3.select document.createElement('a')
.text 'X'
.style
'text-align': 'center'
background: 'red'
color: 'white'
display: 'inline-block'
width: '100%'
.attr
title: "Unexpected #{details.token}: #{details.text}"
editor.setGutterMarker(details.line, 'error_gutter', error_marker.node())
svg {
background: white;
}
.cm-span_symbol {
font-weight: bold;
color: rgba( 31,118,180,1);
background: rgba( 31,118,180,0.1);
}
.cm-span_text {
background: rgba( 31,118,180,0.1);
}
.cm-entity_id {
color: #ff7f0e;
font-weight: bold;
}
.cm-sentence-2 {
background: yellow;
}
.cm-start_directives {
color: brown;
font-weight: bold;
}
.cm-directive {
font-style: italic;
color: #666;
}
.cm-entity_symbol {
color: #ff7f0e;
}
.error_gutter {
width: 12px;
}
#editor {
flex: 1;
}
.CodeMirror {
flex: 1;
height: 500px;
line-height: normal;
}
svg {
margin: 0;
border-left: 2px solid gray;
background: #EEE;
white-space: pre-wrap;
overflow-y: scroll;
height: 500px;
flex: 1;
}
body {
display: -webkit-box; /* OLD - iOS 6-, Safari 3.1-6 */
display: -moz-box; /* OLD - Firefox 19- (buggy but mostly works) */
display: -ms-flexbox; /* TWEENER - IE 10 */
display: -webkit-flex; /* NEW - Chrome */
display: flex; /* NEW, Spec - Opera 12.1, Firefox 20+ */
-ms-flex-flow: row;
-webkit-flex-flow: row;
flex-flow: row;
}
.sentence_highlight {
/*background: rgba(255,255,0,0.15);*/
}
.title {
font-family: sans-serif;
font-size: 14px;
font-weight: bold;
}
.node circle {
stroke-width: 2;
}
.link {
fill: none;
stroke: #CCC;
stroke-width: 4px;
marker-end: url(#end-arrow);
}
#end-arrow {
fill: #CCC;
}
.label {
font-family: monospace;
font-size: 12px;
text-anchor: middle;
fill: #333;
stroke: none;
font-weight: bold;
text-shadow: -1px -1px #EEE, -1px 1px #EEE, 1px 1px #EEE, 1px -1px #EEE, -1px 0 #EEE, 0 1px #EEE, 1px 0 #EEE, 0 -1px #EEE;
pointer-events: none;
}
a.valid .label {
pointer-events: all;
}
a.valid:hover .label {
text-decoration: underline;
}
.link_label {
font-family: sans-serif;
font-size: 9px;
text-anchor: middle;
fill: #AAA;
stroke: none;
text-shadow: -1px -1px #EEE, -1px 1px #EEE, 1px 1px #EEE, 1px -1px #EEE, -1px 0 #EEE, 0 1px #EEE, 1px 0 #EEE, 0 -1px #EEE;
pointer-events: none;
}
.ne_link {
cursor: pointer;
text-decoration: underline;
}
.content.node {
fill: white;
stroke: #777;
}
.span.node {
fill: #B9DBF3;
stroke: #1f77b4;
}
.span.node .label {
fill: #1f77b4;
}
.entity.node {
fill: #FFD7B3;
stroke: #ff7f0e;
}
.entity.node .label {
fill: #ff7f0e;
}
.external.node {
fill: #EEE;
stroke: #999;
}
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<title>Text Annotation (Jison)</title>
<link type="text/css" href="//cdnjs.cloudflare.com/ajax/libs/codemirror/4.6.0/codemirror.min.css" rel="stylesheet"/>
<link type="text/css" href="//cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/styles/github.min.css" rel="stylesheet"/>
<link type="text/css" href="index.css" rel="stylesheet"/>
<script src="http://d3js.org/d3.v3.min.js"></script>
<script src="http://marvl.infotech.monash.edu/webcola/cola.v3.min.js"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/codemirror/4.6.0/codemirror.min.js"></script>
<script src="//wafi.iit.cnr.it/webvis/tmp/codemirror_mode_simple.js"></script>
<script src="//cdnjs.cloudflare.com/ajax/libs/codemirror/4.6.0/addon/search/searchcursor.min.js"></script>
<script src="http://cdnjs.cloudflare.com/ajax/libs/lodash.js/3.10.0/lodash.min.js"></script>
<script src="tsort.js"></script>
<script src="/webvis/tmp/jison.js"></script>
</head>
<body>
<textarea id="editor"><Christopher Clavius>(1) was a
German Jesuit mathematician and astronomer who
modified the proposal of the modern Gregorian calendar
after the death of its primary author, <Aloysius Lilius>(2). <Clavius>(1) would later write defences and an
explanation of the reformed calendar, including an
emphatic acknowledgement of <Lilio>(2)'s work.
In his last years he was probably the most respected
astronomer in Europe and his textbooks were used for
astronomical education for over fifty years in and even out
of <Europe>(3).
___
(1) owl:sameAs dbr:Christopher_Clavius
(1) foaf:isPrimaryTopicOf wiki:Christopher_Clavius
(2) owl:sameAs dbr:Aloysius_Lilius
(3) foaf:isPrimaryTopicOf wiki:Europe</textarea>
<svg></svg>
<script src="//cdnjs.cloudflare.com/ajax/libs/highlight.js/8.3/highlight.min.js"></script>
<script src="parser.js"></script>
<script src="index.js"></script>
</body>
</html>
// Generated by CoffeeScript 1.10.0
(function() {
// noprotect
;
var R, char_window, current_sentence, defs, diameter, distance, editor, find_annotations, height, max_depth, prefixes, svg, vis, width;
prefixes = {
rdf: 'http://www.w3.org/1999/02/22-rdf-syntax-ns#',
owl: 'http://www.w3.org/2002/07/owl#',
rdfs: 'http://www.w3.org/2000/01/rdf-schema#',
foaf: 'http://xmlns.com/foaf/0.1/',
dbo: 'http://dbpedia.org/ontology/',
dbr: 'http://dbpedia.org/resource/',
wiki: 'http://en.wikipedia.org/wiki/'
};
/* Manuscript EDITOR
*/
width = 960 / 2;
height = 500;
diameter = width * 1.4;
max_depth = 3;
distance = 40;
char_window = 15;
svg = d3.select('svg');
vis = svg.append('g');
defs = svg.append('defs');
R = 18;
/* define arrow markers for graph links */
defs.append('marker').attr({
id: 'end-arrow',
viewBox: '0 0 10 10',
refX: 5 + R,
refY: 5,
orient: 'auto'
}).append('path').attr({
d: 'M0,0 L0,10 L10,5 z'
});
CodeMirror.defineSimpleMode('mtss', {
start: [
{
regex: new RegExp('(\\<)([^\\>]*)(\\>)(\\()([^\\)]*)(\\))'),
token: ['span_symbol', 'span_text', 'span_symbol', 'entity_symbol', 'entity_id', 'entity_symbol']
}, {
regex: new RegExp('^___$'),
token: 'start_directives',
next: 'directives'
}
],
directives: [
{
regex: new RegExp('(\\()(.*)(\\))'),
token: ['entity_symbol', 'entity_id', 'entity_symbol']
}, {
regex: new RegExp('.'),
token: 'directive'
}
]
});
editor = CodeMirror.fromTextArea(document.getElementById('editor'), {
mode: 'mtss',
lineNumbers: false,
lineWrapping: true,
gutters: ['error_gutter']
});
/* TEXT TRANSLATION
Every time the text is changed new annotations are searched, the diagram is updated
*/
editor.on('change', function() {
return find_annotations();
});
find_annotations = function() {
/* DATA CONSTRUCTION
*/
var PAD_MULTIPLIER, content, d3cola, enter_as, enter_links, enter_nodes, entity_index, graph, i, j, k, l, len, len1, len2, len3, len4, len5, levels, link_labels, links, m, n, nn, nodes, o, q, r, ref, ref1, ref2, ref3, ref4, text, topological_order;
text = editor.getValue();
editor.clearGutter('error_gutter');
parser.parse(text);
/* GRAPH
*/
content = {
type: 'content'
};
nodes = [content];
links = [];
graph = {
nodes: nodes,
links: links
};
entity_index = {};
parser.directives.forEach(function(d) {
var n;
if (!(d.id in entity_index)) {
n = {
type: 'entity',
id: d.id
};
nodes.push(n);
entity_index[d.id] = n;
} else {
n = entity_index[d.id];
}
return d.popairs.forEach(function(p) {
var ext_n;
ext_n = {
type: 'external',
id: p.object
};
nodes.push(ext_n);
return links.push({
source: n,
target: ext_n,
type: 'predicate',
predicate: p.predicate
});
});
});
parser.annotations.forEach(function(a, i) {
var n;
n = {
type: 'span',
id: i
};
nodes.push(n);
links.push({
source: content,
target: n,
start: a.start,
end: a.end,
type: 'locus',
inverted: true
});
return links.push({
source: n,
target: entity_index[a.id],
type: 'about'
});
});
/* VISUALIZATION
*/
/* store the node index into the node itself */
ref = graph.nodes;
for (i = j = 0, len = ref.length; j < len; i = ++j) {
n = ref[i];
n.i = i;
}
/* store neighbor nodes into each node */
ref1 = graph.nodes;
for (i = k = 0, len1 = ref1.length; k < len1; i = ++k) {
n = ref1[i];
n.in_neighbors = [];
n.out_neighbors = [];
}
ref2 = graph.links;
for (m = 0, len2 = ref2.length; m < len2; m++) {
l = ref2[m];
l.source.out_neighbors.push(l.target);
l.target.in_neighbors.push(l.source);
}
/* compute longest distances */
topological_order = tsort(graph.links.map(function(l) {
return [l.source.i, l.target.i];
}));
ref3 = graph.nodes;
for (o = 0, len3 = ref3.length; o < len3; o++) {
n = ref3[o];
n.longest_dist = -Infinity;
}
graph.nodes.forEach(function(n) {
if (n.in_neighbors.length === 0) {
return n.longest_dist = (function() {
switch (n.type) {
case 'content':
return 0;
case 'entity':
return 2;
}
})();
}
});
for (q = 0, len4 = topological_order.length; q < len4; q++) {
i = topological_order[q];
n = graph.nodes[i];
ref4 = n.out_neighbors;
for (r = 0, len5 = ref4.length; r < len5; r++) {
nn = ref4[r];
if (nn.longest_dist < n.longest_dist + 1) {
nn.longest_dist = n.longest_dist + 1;
}
}
}
graph.constraints = [];
/* create the alignment contraints */
levels = _.uniq(graph.nodes.map(function(n) {
return n.longest_dist;
}));
levels.sort();
levels.forEach(function(depth) {
return graph.constraints.push({
type: 'alignment',
axis: 'x',
offsets: graph.nodes.filter(function(n) {
return n.longest_dist === depth;
}).map(function(n) {
return {
node: n.i,
offset: 0
};
})
});
});
PAD_MULTIPLIER = 3.5;
/* create the position contraints */
levels.forEach(function(depth, i) {
var n1, n2;
if (i < levels.length - 1) {
n1 = _.find(graph.nodes, function(n) {
return n.longest_dist === depth;
});
n2 = _.find(graph.nodes, function(n) {
return n.longest_dist === depth + 1;
});
return graph.constraints.push({
axis: 'x',
left: n1.i,
right: n2.i,
gap: depth < 2 ? 5 * R : 8 * R
});
}
});
/* create nodes and links */
vis.selectAll('.link').remove();
vis.selectAll('.node').remove();
vis.selectAll('.link_label').remove();
links = vis.selectAll('.link').data(graph.links);
enter_links = links.enter().append('line').attr({
"class": function(d) {
return "link " + d.type;
}
});
enter_links.append('title').text(function(d) {
return 'link';
});
link_labels = vis.selectAll('.link_label').data(graph.links);
link_labels.enter().append('text').text(function(d) {
if (d.type === 'predicate') {
return d.predicate;
} else {
return d.type;
}
}).attr({
"class": 'link_label'
});
nodes = vis.selectAll('.node').data(graph.nodes);
enter_nodes = nodes.enter().append('g').attr({
"class": function(d) {
return "node " + d.type;
}
});
enter_nodes.append('circle').attr({
r: R
});
enter_as = enter_nodes.append('a').attr({
target: '_blank'
});
enter_as.filter(function(d) {
return d.type === 'external';
}).attr({
"class": 'valid',
'xlink:href': function(d) {
var splitted;
if (d.type !== 'external') {
return '';
}
splitted = ('' + d.id).split(':');
if (splitted[0] === 'http') {
return d.id;
} else {
return prefixes[splitted[0]] + splitted[1];
}
}
});
enter_as.append('text').text(function(d) {
switch (d.type) {
case 'external':
return d.id;
case 'content':
return 'CONTENT';
case 'span':
return "< >";
case 'entity':
return "(" + d.id + ")";
}
}).attr({
"class": 'label',
dy: '0.35em'
});
/* cola layout */
graph.nodes.forEach(function(v, i) {
v.width = PAD_MULTIPLIER * R;
v.height = PAD_MULTIPLIER * R;
v.x = i;
return v.y = i;
});
content.x = R + 20;
content.y = height / 2;
content.fixed = true;
d3cola = cola.d3adaptor().size([width, height]).linkDistance(60).constraints(graph.constraints).avoidOverlaps(true).nodes(graph.nodes).links(graph.links).on('tick', function() {
/* update nodes and links */
nodes.attr('transform', function(d) {
return "translate(" + d.x + "," + d.y + ")";
});
links.filter(function(d) {
return !d.inverted;
}).attr('x1', function(d) {
return d.source.x;
}).attr('y1', function(d) {
return d.source.y;
}).attr('x2', function(d) {
return d.target.x;
}).attr('y2', function(d) {
return d.target.y;
});
links.filter(function(d) {
return d.inverted;
}).attr('x1', function(d) {
return d.target.x;
}).attr('y1', function(d) {
return d.target.y;
}).attr('x2', function(d) {
return d.source.x;
}).attr('y2', function(d) {
return d.source.y;
});
return link_labels.attr({
transform: function(d) {
return "translate(" + ((d.source.x + d.target.x) / 2) + " " + ((d.source.y + d.target.y) / 2) + ") rotate(" + (Math.atan2(d.target.y - d.source.y, d.target.x - d.source.x) / Math.PI / 2 * 360) + ") translate(0,-5)";
}
});
});
enter_nodes.call(d3cola.drag);
return d3cola.start(100, 30, 30);
};
find_annotations();
/* Sentence highlighting
*/
current_sentence = null;
editor.on('cursorActivity', function() {
var cursor, from, search_cursor, to;
cursor = editor.getCursor();
search_cursor = editor.getSearchCursor('||', cursor);
search_cursor.findPrevious();
from = search_cursor.pos.to;
search_cursor.findNext();
to = search_cursor.pos.from;
if (current_sentence != null) {
current_sentence.clear();
}
return current_sentence = editor.markText(from, to, {
className: 'sentence_highlight'
});
});
/* Error reporting */
window.on_code_error = function(message, details) {
var error_marker;
error_marker = d3.select(document.createElement('a')).text('X').style({
'text-align': 'center',
background: 'red',
color: 'white',
display: 'inline-block',
width: '100%'
}).attr({
title: "Unexpected " + details.token + ": " + details.text
});
return editor.setGutterMarker(details.line, 'error_gutter', error_marker.node());
};
}).call(this);
language = '''
%lex
%%
\\n___\\n return '___'
"<" return '<'
">" return '>'
"(" return '('
")" return ')'
"[" return '['
"]" return ']'
[_] return 'UNDERSCORE'
(" "|\\t)+ return 'SPACES'
";" return ';'
[0-9] return 'DIGIT'
[a-zA-Z] return 'ALPHABETIC_ASCII'
. return 'OTHER_CHAR'
\\n return 'NEWLINE'
<<EOF>> return 'EOF'
/lex
%start Document
%%
Document
: EOF
| Code EOF
| Code '___' Directives EOF
;
Code
: TChars
| Span
| %empty
| Code Code
;
TChar
: OTHER_CHAR
| '('
| ')'
| '['
| ']'
| ';'
| UNDERSCORE
| DIGIT
| ALPHABETIC_ASCII
| SPACES
| NEWLINE
;
TChars
: TChar
{ parser.on_text($1); }
| TChars TChar
{ $$ = $1+$2; parser.on_text($2);}
;
Span
: '<' TChars '>' '(' Id ')'
{ parser.on_annotation($2, $5); }
;
TextWithoutNewlineNorSpaceChar
: NotNewlineNorSpaceChar
| TextWithoutNewlineNorSpaceChar NotNewlineNorSpaceChar
{ $$ = $1+$2; }
;
TextWithoutNewline
: NotNewlineChar
| TextWithoutNewline NotNewlineChar
{ $$ = $1+$2; }
;
NotNewlineNorSpaceChar
: OTHER_CHAR
| '<'
| '>'
| '('
| ')'
| '['
| ']'
| ';'
| UNDERSCORE
| DIGIT
| ALPHABETIC_ASCII
;
NotNewlineChar
: NotNewlineNorSpaceChar
| SPACES
;
SpacesOrNothing
: SPACES
| %empty
;
SpacesNewlinesOrNothing
: SPACES
| %empty
| SpacesNewlinesOrNothing NEWLINE SpacesNewlinesOrNothing
;
Id
: IdChar
| Id IdChar
{ $$ = $1+$2; }
;
IdChar
: UNDERSCORE
| DIGIT
| ALPHABETIC_ASCII
;
Directives
: Directive
| Directive NEWLINE Directives
;
Directive
: SpacesOrNothing '(' Id ')' SpacesOrNothing POSequence SpacesOrNothing
{ parser.on_directive($6, $3); }
| SpacesOrNothing '[' Id ']' SpacesOrNothing POSequence SpacesOrNothing
{ parser.on_directive($6, $3); }
| SpacesOrNothing
;
/* RDF */
POSequence
: POPair
{ $$ = [$1]; }
| POSequence SpacesOrNothing ';' SpacesOrNothing POPair
{ $$ = $1.concat([$5]); }
;
POPair
: Predicate SPACES Object
{ $$ = {predicate: $1, object: $3}; }
;
POChar
: OTHER_CHAR
| '<'
| '>'
| '('
| ')'
| '['
| ']'
| UNDERSCORE
| DIGIT
| ALPHABETIC_ASCII
;
POChars
: POChar
| POChar POChars
{ $$ = $1+$2; }
;
Predicate
: POChars
;
Object
: POChars
;
'''
Jison.print = () ->
jison_parser = Jison.Generator(bnf.parse(language), {type: 'lalr'}).createParser()
jison_parser.yy.parseError = (message, details) ->
on_code_error(message, details)
window.parser =
offset: 0
plain_text: ''
annotations: []
directives: {}
parse: (code) ->
this.offset = 0
this.annotations = []
this.directives = []
this.plain_text = ''
jison_parser.parse(code)
# resolve annotations-directive reference
this.annotations.forEach (a) =>
a.directives = []
this.directives.forEach (d) =>
if a.id is d.id
a.directives.push d
on_text: (content) ->
this.offset += content.length
this.plain_text += content
on_annotation: (content, id) ->
this.annotations.push {
id: id,
start: this.offset - content.length,
end: this.offset,
content: content
}
on_directive: (popairs, id) ->
this.directives.push {
id: id,
popairs: popairs
}
// Generated by CoffeeScript 1.10.0
(function() {
var jison_parser, language;
language = '%lex\n%%\n\n\\n___\\n return \'___\'\n"<" return \'<\'\n">" return \'>\'\n"(" return \'(\'\n")" return \')\'\n"[" return \'[\'\n"]" return \']\'\n[_] return \'UNDERSCORE\'\n(" "|\\t)+ return \'SPACES\'\n";" return \';\'\n[0-9] return \'DIGIT\'\n[a-zA-Z] return \'ALPHABETIC_ASCII\'\n. return \'OTHER_CHAR\'\n\\n return \'NEWLINE\'\n<<EOF>> return \'EOF\'\n\n/lex\n\n%start Document\n%%\n\nDocument\n : EOF\n | Code EOF\n | Code \'___\' Directives EOF\n ;\n\nCode\n : TChars\n | Span\n | %empty\n | Code Code\n ;\n\nTChar\n : OTHER_CHAR\n | \'(\'\n | \')\'\n | \'[\'\n | \']\'\n | \';\'\n | UNDERSCORE\n | DIGIT\n | ALPHABETIC_ASCII\n | SPACES\n | NEWLINE\n ;\n\nTChars\n : TChar\n { parser.on_text($1); }\n | TChars TChar\n { $$ = $1+$2; parser.on_text($2);}\n ;\n\nSpan\n : \'<\' TChars \'>\' \'(\' Id \')\'\n { parser.on_annotation($2, $5); }\n ;\n\nTextWithoutNewlineNorSpaceChar\n : NotNewlineNorSpaceChar\n | TextWithoutNewlineNorSpaceChar NotNewlineNorSpaceChar\n { $$ = $1+$2; }\n ;\n \nTextWithoutNewline\n : NotNewlineChar\n | TextWithoutNewline NotNewlineChar\n { $$ = $1+$2; }\n ;\n\nNotNewlineNorSpaceChar\n : OTHER_CHAR\n | \'<\'\n | \'>\'\n | \'(\'\n | \')\'\n | \'[\'\n | \']\'\n | \';\'\n | UNDERSCORE\n | DIGIT\n | ALPHABETIC_ASCII\n ;\n\nNotNewlineChar\n : NotNewlineNorSpaceChar\n | SPACES\n ;\n\nSpacesOrNothing\n : SPACES\n | %empty\n ;\n\nSpacesNewlinesOrNothing\n : SPACES\n | %empty\n | SpacesNewlinesOrNothing NEWLINE SpacesNewlinesOrNothing\n ;\n\nId\n : IdChar\n | Id IdChar\n { $$ = $1+$2; }\n ;\n\nIdChar\n : UNDERSCORE\n | DIGIT\n | ALPHABETIC_ASCII\n ;\n\nDirectives\n : Directive\n | Directive NEWLINE Directives\n ;\n\nDirective\n : SpacesOrNothing \'(\' Id \')\' SpacesOrNothing POSequence SpacesOrNothing\n { parser.on_directive($6, $3); }\n | SpacesOrNothing \'[\' Id \']\' SpacesOrNothing POSequence SpacesOrNothing\n { parser.on_directive($6, $3); }\n | SpacesOrNothing\n ;\n\n/* RDF */\nPOSequence\n : POPair\n { $$ = [$1]; }\n | POSequence SpacesOrNothing \';\' SpacesOrNothing POPair\n { $$ = $1.concat([$5]); }\n ;\n\nPOPair\n : Predicate SPACES Object\n { $$ = {predicate: $1, object: $3}; }\n ;\n\nPOChar\n : OTHER_CHAR\n | \'<\'\n | \'>\'\n | \'(\'\n | \')\'\n | \'[\'\n | \']\'\n | UNDERSCORE\n | DIGIT\n | ALPHABETIC_ASCII\n ;\n\nPOChars\n : POChar\n | POChar POChars\n { $$ = $1+$2; }\n ;\n\nPredicate\n : POChars\n ;\n\nObject\n : POChars\n ;\n';
Jison.print = function() {};
jison_parser = Jison.Generator(bnf.parse(language), {
type: 'lalr'
}).createParser();
jison_parser.yy.parseError = function(message, details) {
return on_code_error(message, details);
};
window.parser = {
offset: 0,
plain_text: '',
annotations: [],
directives: {},
parse: function(code) {
this.offset = 0;
this.annotations = [];
this.directives = [];
this.plain_text = '';
jison_parser.parse(code);
return this.annotations.forEach((function(_this) {
return function(a) {
a.directives = [];
return _this.directives.forEach(function(d) {
if (a.id === d.id) {
return a.directives.push(d);
}
});
};
})(this));
},
on_text: function(content) {
this.offset += content.length;
return this.plain_text += content;
},
on_annotation: function(content, id) {
return this.annotations.push({
id: id,
start: this.offset - content.length,
end: this.offset,
content: content
});
},
on_directive: function(popairs, id) {
return this.directives.push({
id: id,
popairs: popairs
});
}
};
}).call(this);
/**
* general topological sort
* @author SHIN Suzuki (shinout310@gmail.com)
* @param Array<Array> edges : list of edges. each edge forms Array<ID,ID> e.g. [12 , 3]
*
* @returns Array : topological sorted list of IDs
**/
function tsort(edges) {
var nodes = {}, // hash: stringified id of the node => { id: id, afters: lisf of ids }
sorted = [], // sorted list of IDs ( returned value )
visited = {}; // hash: id of already visited node => true
var Node = function(id) {
this.id = id;
this.afters = [];
};
// 1. build data structures
edges.forEach(function(v) {
var from = v[0], to = v[1];
if (!nodes[from]) nodes[from] = new Node(from);
if (!nodes[to]) nodes[to] = new Node(to);
nodes[from].afters.push(to);
});
// 2. topological sort
Object.keys(nodes).forEach(function visit(idstr, ancestors) {
var node = nodes[idstr],
id = node.id;
// if already exists, do nothing
if (visited[idstr]) return;
if (!Array.isArray(ancestors)) ancestors = [];
ancestors.push(id);
visited[idstr] = true;
node.afters.forEach(function(afterID) {
if (ancestors.indexOf(afterID) >= 0) // if already in ancestors, a closed chain exists.
throw new Error('closed chain : ' + afterID + ' is in ' + id);
visit(afterID.toString(), ancestors.map(function(v) { return v })); // recursive call
});
sorted.unshift(id);
});
return sorted;
}
/**
* TEST
**/
function tsortTest() {
// example 1: success
var edges = [
[1, 2],
[1, 3],
[2, 4],
[3, 4]
];
var sorted = tsort(edges);
console.log(sorted);
// example 2: failure ( A > B > C > A )
edges = [
['A', 'B'],
['B', 'C'],
['C', 'A']
];
try {
sorted = tsort(edges);
}
catch (e) {
console.log(e.message);
}
// example 3: generate random edges
var max = 100, iteration = 30;
function randomInt(max) {
return Math.floor(Math.random() * max) + 1;
}
edges = (function() {
var ret = [], i = 0;
while (i++ < iteration) ret.push( [randomInt(max), randomInt(max)] );
return ret;
})();
try {
sorted = tsort(edges);
console.log("succeeded", sorted);
}
catch (e) {
console.log("failed", e.message);
}
}
// for node.js
if (typeof exports == 'object' && exports === this) {
module.exports = tsort;
if (process.argv[1] === __filename) tsortTest();
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment