'use strict'; | |
/******************************************************************************/ | |
const path = process.argv[2]; | |
if (!path) { | |
console.log('Add the file path, please.'); | |
process.exit(); | |
} | |
/******************************************************************************/ | |
const fs = require('fs'); | |
const rl = require('readline').createInterface({ | |
input: fs.createReadStream(path, {encoding: 'utf8'}), //ascii utf8 utf16le | |
terminal: false, | |
historySize: 0 | |
}); | |
const formatNumberRE = /\B(?=(?:\d{3})+$)/g; | |
const headwordsRE = /^[^\s#]/; | |
const uniqueHeadwords = {}; | |
const doubles = []; | |
let allHeadwords = 0; | |
let lineNum = 0; | |
/******************************************************************************/ | |
rl.on('line', line => { | |
lineNum++; | |
if (lineNum === 1) line = line.replace(/^\uFEFF/, ''); | |
if (headwordsRE.test(line)) { | |
allHeadwords++; | |
if (uniqueHeadwords[line]) doubles.push(line); | |
else uniqueHeadwords[line] = true; | |
} | |
}).on('close', () => { | |
console.log( | |
//////////////////////////////////////////////////////////////////////////////// | |
` | |
All headwords: ${ allHeadwords.toString().replace(formatNumberRE, ' ') } | |
Unique headwords: ${ Object.keys(uniqueHeadwords).length.toString().replace(formatNumberRE, ' ') } | |
Doubles: ${ doubles.length } | |
` | |
//////////////////////////////////////////////////////////////////////////////// | |
); | |
if (doubles.length) { | |
fs.writeFileSync(path.replace(/(\.[^.]+)?$/, '.doubles$1'), | |
`\uFEFF${doubles.join('\n')}\n`, {encoding: 'utf8'}); | |
} | |
}); | |
/******************************************************************************/ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment