Skip to content

Instantly share code, notes, and snippets.

@ebraminio
Created October 10, 2019 14:51
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ebraminio/c089ec698ef222b42509f478af1f8bb5 to your computer and use it in GitHub Desktop.
Save ebraminio/c089ec698ef222b42509f478af1f8bb5 to your computer and use it in GitHub Desktop.
var fs = require('fs');
var lineReader = require('readline').createInterface({
input: require('fs').createReadStream('fawiki-20190920-pages-articles.xml')
});
var m = new Map();
var i = 0;
lineReader.on('line', function (line) {
line.split(/[\t\[ \]\|\.\&;:«»"'<>()=/\-\\/\{\}،٬,#_%0-9۰-var fs = require('fs');
var bijankhan = fs.readFileSync('Collection UNI.txt').toString();
var words = fs.readFileSync('ye-freq.txt').toString()
.split('\n').map(x => x.split('\t')[0].replace(/ئ/g, 'ا'));
var fs = require('fs');
var lineReader = require('readline').createInterface({
input: require('fs').createReadStream('fawiki-20190920-pages-articles.xml')
});
var result = [];
// var i = 0;
lineReader.on('line', function (line) {
if (line.includes('ئ'))
result.push(...line.split(/[\t\[ \]\|\.\&;:«»"'<>()=/\-\\/\{\}،,#_%0-9۰-۹\*؟\?!]/)
.filter(x => x.includes('ئ')));
// if (++i === 100000) lineReader.close();
});
lineReader.on('close', () => {
// console.log(result);
fs.writeFileSync('ye.txt', result.join('\n'));
var m = new Map();
result.forEach(x => m.set(x, m.has(x) ? m.get(x) + 1 : 1));
var final = [...m.entries()].sort((x, y) => y[1] - x[1]);
fs.writeFileSync('ye-freq.txt', final.map(x => x[0] + '\t' + x[1]).join('\n'));
// console.log(i);
});
var result = new Set();
bijankhan.split('\n').map(x => x.split(' ')[0]).forEach(x => {
if (words.includes(x.replace(/ي/g, 'ی').replace(/ك/g, 'ک')))
result.add(x);
});
console.log([...result].join('\n'));۹٫\*؟\?!]/).forEach(x => {
if (x)
m.set(x, m.has(x) ? m.get(x) + 1 : 1);
});
// if (++i === 100000) lineReader.close();
});
lineReader.on('close', () => {
fs.writeFileSync('result.txt',
[...m].sort((x, y) => y[1] - x[1]).map(x => x[1] + '\t' + x[0]).join('\n'));
// console.log(i);
});
var fs = require('fs');
var lineReader = require('readline').createInterface({
input: require('fs').createReadStream('fawiki-20190920-pages-articles.xml')
});
var m = new Map();
var i = 0;
lineReader.on('line', function (line) {
line.split(/[\t\[ \]\|\.\&;:«»"'<>()=/\-\\/\{\}،٬,#_%0-9۰-۹٫\*؟\?!]/).forEach(x => {
if (x)
m.set(x, m.has(x) ? m.get(x) + 1 : 1);
});
// if (++i === 100000) lineReader.close();
});
lineReader.on('close', () => {
fs.writeFileSync('result.txt',
[...m].sort((x, y) => y[1] - x[1]).map(x => x[1] + '\t' + x[0]).join('\n'));
// console.log(i);
});
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment