Skip to content

Instantly share code, notes, and snippets.

@Mike111177
Created August 13, 2020 20:20
Show Gist options
  • Save Mike111177/52e8207a612ef3b3ad278bc5d23ccf2a to your computer and use it in GitHub Desktop.
Save Mike111177/52e8207a612ef3b3ad278bc5d23ccf2a to your computer and use it in GitHub Desktop.
Quick testing tool for various tokenizers (Windows with WSL)
//To test with contents of command.txt: node tokentest.js
//To grab arguments as passed by powershell if we recieve args
if (require.main === module && process.argv.length>2){
console.log(JSON.stringify(process.argv.slice(2)))
process.exit()
}
const fs = require('fs');
const {spawnSync, execSync} = require('child_process')
const parse = require('yargs-parser')
const path = require('path');
const util = require('util')
const inspect = obj=>util.inspect(obj, {colors:true})
const thisFile = path.basename(__filename)
//Tokenizers
const basicTokenizer = str=>str.split(" ") //Simple whitespace split
//Yoinked from built tokenize-arg-string.js in yargs-parser 19.0.1
function yargsTokenizer(argString) {
if (Array.isArray(argString)) {
return argString.map(e => typeof e !== 'string' ? e + '' : e);
}
argString = argString.trim();
let i = 0;
let prevC = null;
let c = null;
let opening = null;
const args = [];
for (let ii = 0; ii < argString.length; ii++) {
prevC = c;
c = argString.charAt(ii);
// split on spaces unless we're in quotes.
if (c === ' ' && !opening) {
if (!(prevC === ' ')) {
i++;
}
continue;
}
// don't split the string if we're in matching
// opening or closing single and double quotes.
if (c === opening) {
opening = null;
}
else if ((c === "'" || c === '"') && !opening) {
opening = c;
}
if (!args[i])
args[i] = '';
args[i] += c;
}
return args;
}
function extTokenizer(exe, args, ext, prefix="", shell = false){
return command=>{
tkfile = `tokenize.${ext}`
eargs = args
eargs.push(tkfile)
fs.writeFileSync(`./${tkfile}`, `${prefix}node ${thisFile} ${command}`)
if (shell){
result = execSync(`${exe} ${eargs.join(" ")}`)
} else {
result = spawnSync(exe, eargs).stdout
}
fs.unlinkSync(`./${tkfile}`)
return JSON.parse(result)
}
}
const cmdTokenizer = extTokenizer('cmd', ["/C"], 'bat', '@echo off\n')
const powershellTokenizer = extTokenizer('powershell',['-ExecutionPolicy', 'Bypass', '-File'], 'ps1')
const bashTokenizer = extTokenizer(['bash'], [], 'sh', "~/.nvm/versions/node/v14.8.0/bin/", true) //Tested with WSL ubuntoo with nvm
//Quick way to see all the tokenizers results for one string
function compare(commandline){
console.log("Arg String:", inspect(commandline))
console.log()
console.log("Tokenizers:")
console.log("Tokens (Basic):", basicTokenizer(commandline))
console.log("Tokens (Yargs):", yargsTokenizer(commandline))
console.log("Tokens (cmd):", cmdTokenizer(commandline))
console.log("Tokens (Powershell):", powershellTokenizer(commandline))
console.log("Tokens (Bash):", bashTokenizer(commandline))
}
//If we were run directly, read from command.txt
if (require.main === module) {
const commandline = fs.readFileSync("command.txt").toString()
compare(commandline)
}
module.exports = {
basic: basicTokenizer,
yargs: yargsTokenizer,
cmd: cmdTokenizer,
psl: powershellTokenizer,
bash: bashTokenizer,
compare
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment