Skip to content

Instantly share code, notes, and snippets.

@Scimonster
Last active August 29, 2015 14:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Scimonster/4e47d57dd8d4b9875bae to your computer and use it in GitHub Desktop.
Save Scimonster/4e47d57dd8d4b9875bae to your computer and use it in GitHub Desktop.
Gematriya of Tanach

To run this script, you will need:

  1. Download the Tanach, and unzip to a new folder. You will get two files: readme.txt and x/.
  2. Create a new folder tmp.
  3. Import this gist into the base folder.
  4. Open a terminal.
  5. cd to the base folder.
  6. Run npm install to install the necessary modules.
  7. Run node index to generate the data.
  8. The data will be placed in tmp/post.txt - copy and edit this at your leisure.
var map = {
כ: 'ך',
מ: 'ם',
נ: 'ן',
פ: 'ף',
צ: 'ץ'
};
exports.toFinal = function(text) {
for (var i in map) {
text = text.replace(new RegExp(i+'$','g'), map[i]);
}
return text;
}
exports.toRegular = function(text) {
for (var i in map) {
text = text.replace(new RegExp(map[i],'g'), i);
}
return text;
}
var fs = require('fs'), gem = require('gematriya'), heb = require('./hebletters'), iconv = new require('iconv').Iconv('CP1255', 'UTF8');
var gems = [], words = [], names = [], done = 0, finalPost = '';
for (var i = 1; i <= 35; i++) {
(function(i){
fs.readFile('x/x'+pad(i)+'.htm', function(err, data){
console.log('evaling '+i);
data = iconv.convert(data).toString();
if (err) {
console.error('Error occurred fetching file '+i);
console.log(err);
process.exit();
}
data = data.split('\n');
names[i] = data[i==1?24:11].replace(/<\/?H1+>/g,'');
data = data.slice(i==1?26:13).join('').replace(/(<B>[^<]+<\/B>|<\/?[A-Z]+>|{.})/g,''); // clean up
data = data.replace(/[,.;:-]/g,' ');
data = data.split(/\s+/).map(heb.toRegular).filter(function(word,i,arr){return word && word[0] != '(' && arr.indexOf(word)==i});
words[i] = data;
/* data = data.map(function(word){return [word,gem(word)]});
gems[i] = data.sort(function(a,b){return b[1] - a[1]});
fs.writeFileSync('tmp/'+i, data);
console.log('writing '+i);*/
if (++done == 35) { // done
fs.writeFileSync('tmp/names.json',JSON.stringify(names));
callback();
}
});
})(i);
}
//callback();
function callback() {
/* fs.writeFileSync('tmp/separate-words.json',JSON.stringify(words));
console.log('concating');
words = [].concat.apply([],words);
console.log('filtering');
words = words.filter(function(word,i,arr){
return arr.indexOf(word)==i;
});
fs.writeFileSync('tmp/allwords.json',JSON.stringify(words));
console.log('geming');
gems = words.map(function(word){
try {return [word,gem(word)];}
catch (e) {console.log(word)}
return [word,0];
});
fs.writeFileSync('tmp/gems.json',JSON.stringify(gems));
return;*/
/* words = JSON.parse(fs.readFileSync('tmp/separate-words.json'));
gems = JSON.parse(fs.readFileSync('tmp/gems.json'));
names = JSON.parse(fs.readFileSync('tmp/names.json'));*/
var torahTop = [].concat.apply([],words.slice(1,6)).filter(function(word,i,arr){
return arr.indexOf(word)==i;
})/*;
console.log(torahTop);
torahTop = torahTop*/.map(function(word){
return [heb.toFinal(word),gem(word)];
});
finalPost += 'Top 10 in Torah:\n' + table(getTopTen(torahTop));
fs.writeFileSync('tmp/torah',torahTop);
var tehillimTop = words[25].filter(function(word,i,arr){
return arr.indexOf(word)==i;
}).map(function(word){
return [heb.toFinal(word),gem(word)];
});
finalPost += '\n\nTop 10 in Tehillim:\n' + table(getTopTen(tehillimTop));
fs.writeFileSync('tmp/tehillim',tehillimTop);
words.forEach(function(sefer,i){
sefer = sefer.filter(function(word,i,arr){
return arr.indexOf(word)==i;
}).map(function(word){
return [heb.toFinal(word),gem(word)];
});
var topFive = getTopFive(sefer);
finalPost += '\n\nTop 5 in '+names[i]+':\n\n ' + table(topFive);
});
fs.writeFileSync('tmp/post.txt',finalPost); // if you're impatient
console.log('concating');
words = [].concat.apply([],words);
console.log('filtering');
words = words.filter(function(word,i,arr){
return arr.indexOf(word)==i;
});
console.log('geming');
gems = words.map(function(word){
try {return [heb.toFinal(word),gem(word)];}
catch (e) {return [word,0];}
});
var topTwenty = getTopTwenty(gems);
finalPost += '\n\nTop 20 overall:\n' + table(topTwenty);
// console.log(finalPost);
fs.writeFileSync('tmp/post.txt',finalPost);
}
function pad(num) {return num.toString().length == 2 ? num : '0' + num}
function getTopX(arr,x) {return arr.sort(function(a,b){return b[1] - a[1]}).slice(0,x)}
function getTopTen(arr) {return getTopX(arr,10)}
function getTopFive(arr) {return getTopX(arr,5)}
function getTopTwenty(arr) {return getTopX(arr,20)}
function table(arr) {
var longest = [].concat.apply([],arr).map(String).reduce(function(longest,str){
return str.length > longest ? str.length : longest;
}, 0);
return arr.map(function(row){
return row.map(function(cell){
return padStr(cell, longest);
}).join( ' | ');
}).join('\n ');
}
function padStr(str, length) {return str.length < length ? padStr(str + ' ',length) : str}
{
"name": "tanach-gematriya",
"version": "0.0.3",
"private": true,
"dependencies": {
"gematriya": "",
"iconv": "",
"request": ""
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment