Skip to content

Instantly share code, notes, and snippets.

@vsemozhetbyt
Created January 17, 2016 10:07
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vsemozhetbyt/06f67e6647310339500d to your computer and use it in GitHub Desktop.
Save vsemozhetbyt/06f67e6647310339500d to your computer and use it in GitHub Desktop.
'use strict';
/******************************************************************************/
const path = process.argv[2];
if (!path) {
console.log('Add the file path, please.');
process.exit();
}
/******************************************************************************/
const fs = require('fs');
const rl = require('readline').createInterface({
input: fs.createReadStream(path, {encoding: 'utf8'}), //ascii utf8 utf16le
terminal: false,
historySize: 0
});
const formatNumberRE = /\B(?=(?:\d{3})+$)/g;
const headwordsRE = /^[^\s#]/;
const uniqueHeadwords = {};
const doubles = [];
let allHeadwords = 0;
let lineNum = 0;
/******************************************************************************/
rl.on('line', line => {
lineNum++;
if (lineNum === 1) line = line.replace(/^\uFEFF/, '');
if (headwordsRE.test(line)) {
allHeadwords++;
if (uniqueHeadwords[line]) doubles.push(line);
else uniqueHeadwords[line] = true;
}
}).on('close', () => {
console.log(
////////////////////////////////////////////////////////////////////////////////
`
All headwords: ${ allHeadwords.toString().replace(formatNumberRE, ' ') }
Unique headwords: ${ Object.keys(uniqueHeadwords).length.toString().replace(formatNumberRE, ' ') }
Doubles: ${ doubles.length }
`
////////////////////////////////////////////////////////////////////////////////
);
if (doubles.length) {
fs.writeFileSync(path.replace(/(\.[^.]+)?$/, '.doubles$1'),
`\uFEFF${doubles.join('\n')}\n`, {encoding: 'utf8'});
}
});
/******************************************************************************/
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment