A quick hack to generate a codepoint set presence condition (JS)
const regenerate = require('regenerate'); | |
/* char-graphic? */ | |
const SETS = ['Letter', 'Mark', 'Number', 'Punctuation', 'Symbol'].map(category => | |
require(`unicode-10.0.0/General_Category/${category}/code-points.js`)); | |
/* char-blank? */ | |
// const SETS = [['\t']].concat(['Space_Separator'].map(category => | |
// require(`unicode-10.0.0/General_Category/${category}/code-points.js`))); | |
/* char-whitespace? */ | |
// const SETS = ['White_Space'].map(property => | |
// require(`unicode-10.0.0/Binary_Property/${property}/code-points.js`)) | |
function main() { | |
const set = regenerate(); | |
for (const s of SETS) { | |
set.add(s); | |
} | |
console.log(toIf(set.valueOf())); | |
//console.log(toRacketRanges(set.valueOf())); | |
} | |
const VAR_NAME = 'c'; | |
function shortestInt(a) { | |
const b10 = a.toString(); | |
const b16 = a.toString(16); | |
return (b10.length < b16.length + 2) ? b10 : `0x${b16.toUpperCase()}`; | |
} | |
function printOneRange(a, b) { | |
return a === b | |
? `${VAR_NAME} === ${shortestInt(a)}` | |
: `${VAR_NAME} > ${shortestInt(a - 1)} && ${VAR_NAME} < ${shortestInt(b + 1)}`; | |
} | |
function printExcepts(cps) { | |
cps.sort(); | |
const ranges = genRanges(cps); | |
if (ranges.length === cps.length) { | |
return cps.map(cp => `${VAR_NAME} !== ${shortestInt(cp)}`).join(' && '); | |
} else { | |
return `!(${printRanges(ranges)})`; | |
} | |
} | |
function genRanges(codepoints) { | |
const result = []; | |
let prev = codepoints[0]; | |
let start = null; | |
for (let i = 1; i < codepoints.length; i++) { | |
const v = codepoints[i]; | |
if (prev + 1 === v) { | |
if (start === null) { | |
start = prev; | |
} | |
} else { | |
if (start === null) { | |
result.push([prev, prev]); | |
} else { | |
result.push([start, prev]); | |
start = null; | |
} | |
} | |
prev = v; | |
} | |
if (start === null) { | |
result.push([prev, prev]); | |
} else { | |
result.push([start, prev]); | |
} | |
return result; | |
} | |
function printRanges(ranges) { | |
let excepts = []; | |
const result = []; | |
let prevRange = ranges[0]; | |
let start = null; | |
for (let i = 1; i < ranges.length; i++) { | |
const r = ranges[i]; | |
const numExcepts = r[0] - prevRange[1] - 1; | |
if (numExcepts == 1 || numExcepts == 2 && (r[1] != r[0] && prevRange[0] != prevRange[1])) { | |
if (start === null) start = prevRange[0]; | |
for (let j = 1; j <= numExcepts; j++) { | |
excepts.push(prevRange[1] + j); | |
} | |
} else if (prevRange != null) { | |
if (excepts.length === 0) { | |
result.push(printOneRange(prevRange[0], prevRange[1])); | |
} else { | |
result.push(`(${printOneRange(start, prevRange[1])} && ${printExcepts(excepts)})`); | |
} | |
excepts = []; | |
start = null; | |
} | |
prevRange = r; | |
} | |
const lastRange = ranges[ranges.length - 1]; | |
if (excepts.length === 0) { | |
result.push(printOneRange(lastRange[0], lastRange[1])); | |
} else { | |
result.push(`(${printOneRange(start, lastRange[1])} && ${printExcepts(excepts)})`); | |
} | |
return result.join(' || '); | |
} | |
function toIf(codepoints) { | |
return printRanges(genRanges(codepoints)); | |
} | |
function toRacketRanges(codepoints) { | |
return genRanges(codepoints).map(([from, to]) => { | |
return from === to ? from : `(range ${from} ${to})` | |
}).join(' '); | |
} | |
main(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment