Skip to content

Instantly share code, notes, and snippets.

@HyphnKnight
Last active March 1, 2019 19:01
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save HyphnKnight/c363242ae99effdb90c7995804ece5ef to your computer and use it in GitHub Desktop.
Save HyphnKnight/c363242ae99effdb90c7995804ece5ef to your computer and use it in GitHub Desktop.
const {readFileSync} = require('fs');
const buffer= readFileSync('./rawdata','utf8');
const inputs = {};
const grams = {
bi: {},
tri: {},
};
let characterIndex = -1;
const characterLength = buffer.length;
const history = [];
while(++characterIndex < characterLength) {
const initialPosition = characterIndex;
let char = buffer[characterIndex];
if(char === '[') {
// this is a command
let command = '';
while((char = buffer[++characterIndex]) !== ']') {
if(!char) break;
if(char === '[') continue;
command += char;
}
if(!inputs[command]) {
inputs[command] = {
value: command,
type:'command',
frequency: 1,
//positions: [initialPosition],
};
} else {
inputs[command].frequency +=1;
//inputs[command].positions.push(initialPosition);
}
history.push(command);
if(history.length > 3) history.shift();
const biGram = history.slice(0,2).join('_');
const triGram = history.join('_');
if(!grams.bi[biGram]) {
grams.bi[biGram] = {
value: history.slice(0,2),
frequency: 1,
};
} else {
grams.bi[biGram].frequency +=1;
}
if(!grams.tri[triGram]) {
grams.tri[triGram] = {
value: history.slice(0,3),
frequency: 1,
};
} else {
grams.tri[triGram].frequency +=1;
}
} else if(char !== ' ') {
let word = char;
while((char = buffer[++characterIndex]) !== '[' && char !== ' ') {
if(!char) break;
word += char;
}
if(!inputs[word]) {
inputs[word] = {
value: word,
type:'word',
frequency: 1,
//positions: [initialPosition],
};
} else {
inputs[word].frequency +=1;
//inputs[word].positions.push(initialPosition);
}
--characterIndex;
history.push(word);
if(history.length > 3) history.shift();
const biGram = history.slice(0,2).join('_');
const triGram = history.join('_');
if(!grams.bi[biGram]) {
grams.bi[biGram] = {
value: history.slice(0,2),
frequency: 1,
};
} else {
grams.bi[biGram].frequency +=1;
}
if(!grams.tri[triGram]) {
grams.tri[triGram] = {
value: history.slice(0,3),
frequency: 1,
};
} else {
grams.tri[triGram].frequency +=1;
}
}
}
grams.bi = Object.keys(grams.bi).reduce((acc,bi)=>{
if(grams.bi[bi].frequency > 1) {
acc[bi] = grams.bi[bi];
}
return acc;
},{});
grams.tri = Object.keys(grams.tri).reduce((acc,tri)=>{
if(grams.tri[tri].frequency > 1) {
acc[tri] = grams.tri[tri];
}
return acc;
},{});
const output = {
inputs,
commandsByFrequency: Object.keys(inputs)
.map(input => inputs[input])
.filter(({type}) => type ==='command' )
.sort((a,b)=>a.frequency-b.frequency),
wordsByFrequency: Object.keys(inputs)
.map(input => inputs[input])
.filter(({type}) => type ==='word' )
.sort((a,b)=>a.frequency-b.frequency),
grams,
biByFrequency: Object.keys(grams.bi)
.map(input => grams.bi[input])
.sort((a,b)=>a.frequency-b.frequency),
triByFrequency: Object.keys(grams.tri)
.map(input => grams.tri[input])
.sort((a,b)=>a.frequency-b.frequency),
};
console.log(JSON.stringify(output));
@HyphnKnight
Copy link
Author

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment