Skip to content

Instantly share code, notes, and snippets.

@tanraya
Created April 30, 2016 20:07
Show Gist options
  • Save tanraya/c0bbc9a8f183e5a10e7f27232362f1be to your computer and use it in GitHub Desktop.
Save tanraya/c0bbc9a8f183e5a10e7f27232362f1be to your computer and use it in GitHub Desktop.
class Tagger {
constructor(text, markup = []) {
this.text = text;
this.markup = this.cloneMarkup(markup);
}
cloneMarkup(markup) {
let result = [];
markup.forEach((x) => {
result.push([ x[0], x[1], x[2] ])
})
return result;
}
// Рассчитывает отрезки, занимаемые пробельными символами
calcWhitespaceSegments() {
let result = [];
let pos = { start: null, end: null };
for (let i = 0, len = this.text.length; i < len; i++) {
let isWhitespace = /\s/.test(this.text[i]);
let isNextWhitespace = /\s/.test(this.text[i + 1]);
if (isWhitespace && pos.start === null) {
pos.start = i;
}
if (isWhitespace && !isNextWhitespace && pos.start !== null) {
pos.end = i + 1;
}
if (pos.start && pos.end) {
// true обозначает что этот отрезок пробельный
let a = [' '.repeat(pos.end - pos.start), pos.start, pos.end];
Object.defineProperty(a, 'whitespace', { value: true });
result.push(a);
pos = { start: null, end: null };
}
}
return result;
}
// 2. Определить вложенность отрезков, записать её.
// 3. Построить древовидную структуру на основе п. 2.
calculate() {
// Рассчитать отрезки, занимаемые пробельными символами.
this.markup = this.markup.concat(this.calcWhitespaceSegments());
let result = [];
let del = []
this.markup.forEach((a) => {
this.markup.forEach((b) => {
if (a == b) { return }
let exists = result.some((x) => {
return x[0] == a[0] && x[1] == a[1] && x[2] == a[2]
});
if (b.whitespace === true && b[1] >= a[1] && b[2] <= a[2]) {
var t1 = Math.min(a[1], b[1]);
var t2 = Math.min(a[2], b[2]);
var t3 = Math.max(a[1], b[1]);
var t4 = Math.max(a[2], b[2]);
if (t1 != t3) { result.push([a[0], t1, t3]) }
if (t2 != t4) { result.push([a[0], t2, t4]) }
del.push(a);
} else {
if (exists) { return }
result.push(a);
}
})
});
result = result.filter((a) => {
return !del.some((x) => {
return x[0] == a[0] && x[1] == a[1] && x[2] == a[2]
});
});
result = result.sort(function (a, b) {
if (a[1] > b[1] && a[0] > b[0]) return 1;
if (a[1] < b[1] && a[0] < b[0]) return -1;
return 0;
});
return result;
}
}
describe('Tagger dev', () => {
let tagger;
describe('#calculate', () => {
beforeEach(function() {
tagger = new Tagger('Hello world', [
['strong', 0, 5],
['span', 0, 5],
['em', 5, 11],
['s', 2, 8]
]);
});
// <strong><span>He<s>llo</s></span></strong> <em><s>wo</s>rld</em>
it('calculates proper data', () => {
expect(tagger.calculate()).to.deep.equal([
[ 'strong', 0, 5 ],
[ 'span', 0, 5 ],
[ 'em', 6, 11 ],
[ 's', 2, 5 ],
[ ' ', 5, 6 ],
[ 's', 6, 8 ]
]);
});
});
describe('#calcWhitespaceSegments', () => {
it('calc right 1', () => {
tagger = new Tagger('Hello world')
expect(tagger.calcWhitespaceSegments()).to.deep.equal([[' ', 5, 6]]);
});
it('calc right 2', () => {
tagger = new Tagger('Hello world')
expect(tagger.calcWhitespaceSegments()).to.deep.equal([[' ', 5, 8]]);
});
it('calc right 3', () => {
tagger = new Tagger('Once upon a time in America')
expect(tagger.calcWhitespaceSegments()).to.deep.equal(
[[' ', 4, 6], [' ', 10, 11], [' ', 12, 13], [' ', 17, 19], [' ', 21, 22]]
);
});
})
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment