Skip to content

Instantly share code, notes, and snippets.

@glebm

glebm/generate.js

Last active Oct 24, 2017
Embed
What would you like to do?
A quick hack to generate a codepoint set presence condition (JS)
const regenerate = require('regenerate');
/* char-graphic? */
const SETS = ['Letter', 'Mark', 'Number', 'Punctuation', 'Symbol'].map(category =>
require(`unicode-10.0.0/General_Category/${category}/code-points.js`));
/* char-blank? */
// const SETS = [['\t']].concat(['Space_Separator'].map(category =>
// require(`unicode-10.0.0/General_Category/${category}/code-points.js`)));
/* char-whitespace? */
// const SETS = ['White_Space'].map(property =>
// require(`unicode-10.0.0/Binary_Property/${property}/code-points.js`))
function main() {
const set = regenerate();
for (const s of SETS) {
set.add(s);
}
console.log(toIf(set.valueOf()));
//console.log(toRacketRanges(set.valueOf()));
}
const VAR_NAME = 'c';
function shortestInt(a) {
const b10 = a.toString();
const b16 = a.toString(16);
return (b10.length < b16.length + 2) ? b10 : `0x${b16.toUpperCase()}`;
}
function printOneRange(a, b) {
return a === b
? `${VAR_NAME} === ${shortestInt(a)}`
: `${VAR_NAME} > ${shortestInt(a - 1)} && ${VAR_NAME} < ${shortestInt(b + 1)}`;
}
function printExcepts(cps) {
cps.sort();
const ranges = genRanges(cps);
if (ranges.length === cps.length) {
return cps.map(cp => `${VAR_NAME} !== ${shortestInt(cp)}`).join(' && ');
} else {
return `!(${printRanges(ranges)})`;
}
}
function genRanges(codepoints) {
const result = [];
let prev = codepoints[0];
let start = null;
for (let i = 1; i < codepoints.length; i++) {
const v = codepoints[i];
if (prev + 1 === v) {
if (start === null) {
start = prev;
}
} else {
if (start === null) {
result.push([prev, prev]);
} else {
result.push([start, prev]);
start = null;
}
}
prev = v;
}
if (start === null) {
result.push([prev, prev]);
} else {
result.push([start, prev]);
}
return result;
}
function printRanges(ranges) {
let excepts = [];
const result = [];
let prevRange = ranges[0];
let start = null;
for (let i = 1; i < ranges.length; i++) {
const r = ranges[i];
const numExcepts = r[0] - prevRange[1] - 1;
if (numExcepts == 1 || numExcepts == 2 && (r[1] != r[0] && prevRange[0] != prevRange[1])) {
if (start === null) start = prevRange[0];
for (let j = 1; j <= numExcepts; j++) {
excepts.push(prevRange[1] + j);
}
} else if (prevRange != null) {
if (excepts.length === 0) {
result.push(printOneRange(prevRange[0], prevRange[1]));
} else {
result.push(`(${printOneRange(start, prevRange[1])} && ${printExcepts(excepts)})`);
}
excepts = [];
start = null;
}
prevRange = r;
}
const lastRange = ranges[ranges.length - 1];
if (excepts.length === 0) {
result.push(printOneRange(lastRange[0], lastRange[1]));
} else {
result.push(`(${printOneRange(start, lastRange[1])} && ${printExcepts(excepts)})`);
}
return result.join(' || ');
}
function toIf(codepoints) {
return printRanges(genRanges(codepoints));
}
function toRacketRanges(codepoints) {
return genRanges(codepoints).map(([from, to]) => {
return from === to ? from : `(range ${from} ${to})`
}).join(' ');
}
main();
// Output of the script above for char-whitespace?
c > 8 && c < 14 || c === 32 || c === 133 || c === 160 || c === 5760 ||
c > 8191 && c < 8203 || c > 8231 && c < 8234 || c === 8239 ||
c === 8287 || c === 12288
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.