|
var fs = require('fs'), gem = require('gematriya'), heb = require('./hebletters'), iconv = new require('iconv').Iconv('CP1255', 'UTF8'); |
|
|
|
var gems = [], words = [], names = [], done = 0, finalPost = ''; |
|
|
|
for (var i = 1; i <= 35; i++) { |
|
(function(i){ |
|
fs.readFile('x/x'+pad(i)+'.htm', function(err, data){ |
|
console.log('evaling '+i); |
|
data = iconv.convert(data).toString(); |
|
|
|
if (err) { |
|
console.error('Error occurred fetching file '+i); |
|
console.log(err); |
|
process.exit(); |
|
} |
|
data = data.split('\n'); |
|
names[i] = data[i==1?24:11].replace(/<\/?H1+>/g,''); |
|
data = data.slice(i==1?26:13).join('').replace(/(<B>[^<]+<\/B>|<\/?[A-Z]+>|{.})/g,''); // clean up |
|
data = data.replace(/[,.;:-]/g,' '); |
|
data = data.split(/\s+/).map(heb.toRegular).filter(function(word,i,arr){return word && word[0] != '(' && arr.indexOf(word)==i}); |
|
words[i] = data; |
|
|
|
|
|
/* data = data.map(function(word){return [word,gem(word)]}); |
|
gems[i] = data.sort(function(a,b){return b[1] - a[1]}); |
|
|
|
fs.writeFileSync('tmp/'+i, data); |
|
console.log('writing '+i);*/ |
|
|
|
if (++done == 35) { // done |
|
fs.writeFileSync('tmp/names.json',JSON.stringify(names)); |
|
callback(); |
|
} |
|
}); |
|
})(i); |
|
} |
|
|
|
//callback(); |
|
|
|
function callback() { |
|
/* fs.writeFileSync('tmp/separate-words.json',JSON.stringify(words)); |
|
console.log('concating'); |
|
words = [].concat.apply([],words); |
|
console.log('filtering'); |
|
words = words.filter(function(word,i,arr){ |
|
return arr.indexOf(word)==i; |
|
}); |
|
fs.writeFileSync('tmp/allwords.json',JSON.stringify(words)); |
|
console.log('geming'); |
|
gems = words.map(function(word){ |
|
try {return [word,gem(word)];} |
|
catch (e) {console.log(word)} |
|
return [word,0]; |
|
}); |
|
fs.writeFileSync('tmp/gems.json',JSON.stringify(gems)); |
|
return;*/ |
|
/* words = JSON.parse(fs.readFileSync('tmp/separate-words.json')); |
|
gems = JSON.parse(fs.readFileSync('tmp/gems.json')); |
|
names = JSON.parse(fs.readFileSync('tmp/names.json'));*/ |
|
|
|
var torahTop = [].concat.apply([],words.slice(1,6)).filter(function(word,i,arr){ |
|
return arr.indexOf(word)==i; |
|
})/*; |
|
console.log(torahTop); |
|
torahTop = torahTop*/.map(function(word){ |
|
return [heb.toFinal(word),gem(word)]; |
|
}); |
|
|
|
finalPost += 'Top 10 in Torah:\n' + table(getTopTen(torahTop)); |
|
|
|
fs.writeFileSync('tmp/torah',torahTop); |
|
|
|
var tehillimTop = words[25].filter(function(word,i,arr){ |
|
return arr.indexOf(word)==i; |
|
}).map(function(word){ |
|
return [heb.toFinal(word),gem(word)]; |
|
}); |
|
|
|
finalPost += '\n\nTop 10 in Tehillim:\n' + table(getTopTen(tehillimTop)); |
|
|
|
fs.writeFileSync('tmp/tehillim',tehillimTop); |
|
|
|
words.forEach(function(sefer,i){ |
|
sefer = sefer.filter(function(word,i,arr){ |
|
return arr.indexOf(word)==i; |
|
}).map(function(word){ |
|
return [heb.toFinal(word),gem(word)]; |
|
}); |
|
var topFive = getTopFive(sefer); |
|
|
|
finalPost += '\n\nTop 5 in '+names[i]+':\n\n ' + table(topFive); |
|
}); |
|
|
|
fs.writeFileSync('tmp/post.txt',finalPost); // if you're impatient |
|
|
|
console.log('concating'); |
|
words = [].concat.apply([],words); |
|
console.log('filtering'); |
|
words = words.filter(function(word,i,arr){ |
|
return arr.indexOf(word)==i; |
|
}); |
|
console.log('geming'); |
|
gems = words.map(function(word){ |
|
try {return [heb.toFinal(word),gem(word)];} |
|
catch (e) {return [word,0];} |
|
}); |
|
|
|
var topTwenty = getTopTwenty(gems); |
|
finalPost += '\n\nTop 20 overall:\n' + table(topTwenty); |
|
|
|
// console.log(finalPost); |
|
|
|
fs.writeFileSync('tmp/post.txt',finalPost); |
|
} |
|
|
|
function pad(num) {return num.toString().length == 2 ? num : '0' + num} |
|
|
|
function getTopX(arr,x) {return arr.sort(function(a,b){return b[1] - a[1]}).slice(0,x)} |
|
|
|
function getTopTen(arr) {return getTopX(arr,10)} |
|
|
|
function getTopFive(arr) {return getTopX(arr,5)} |
|
|
|
function getTopTwenty(arr) {return getTopX(arr,20)} |
|
|
|
function table(arr) { |
|
var longest = [].concat.apply([],arr).map(String).reduce(function(longest,str){ |
|
return str.length > longest ? str.length : longest; |
|
}, 0); |
|
return arr.map(function(row){ |
|
return row.map(function(cell){ |
|
return padStr(cell, longest); |
|
}).join( ' | '); |
|
}).join('\n '); |
|
} |
|
|
|
function padStr(str, length) {return str.length < length ? padStr(str + ' ',length) : str} |