keithhamilton/speechToScalar.js

## speechToScalar.js
/*
 * Fun Fact: when working in the realm of real, integer numbers,the English
 * language follows a curious pattern:
 *
 * For each word in a scalar as spoken, such as 'three hundred thousand
 * two hundred forty five', a real value can be derived by considering each
 * number successively and applying the following formula:
 *
 * phrase.split(' ') => array with length l
 * for i in phrase:
 * if the integer value of phrase[i] > integer value of phrase[i+1] =>
 *   reduce((a, b) => a * b)
 *
 * if the integer value of phrase[i] < integer value of phrase[i+1] =>
 *   reduce((a, b) => a + b)
 *
 * Considering this, it's trivial to convert something like
 * 'two hundred thousand three hundred forty five' into an integer
 * (2 * 100 * 1000) + (3 * 100) + 40 + 5 == 200345
 *
 * An edge case is found when irregular forms of speech occur, such as
 * 'one million million'. This can be accounted for by reducing
 * as a product when the first term is greater or equal to the second
 * (1 * 1000000 * 1000000)
 *
 * To deal with hyphenations, it is important to first split on a space
 * then reduce each term to an integer before reducing the terms as a whole
 *
 * "twenty-six hundred" =>
 * ["twenty-six", "hundred"] =>
 * [["twenty", "six"], ["hundred"]] =>
 * [26, 100] =>
 * 2600
 *
 * As it turns out, there are only so many discreet words that are used
 * when discussing real integers, and for the purposes of my implementation,
 * I've chosen to stop counting at quadrillion, since that is what I'm
 * considering to be near or at the upper-limit of common speech in English.
 * Obviously more terms could be added if this were to be applied in
 * case-specific situations.
 *
 * Below is an ES 6 implementation I'm using for a project.
 */

const _ = require('lodash')
function resolveScalar(rawText){
  let text = rawText.split(' ')

  let numberWords = {
    zero: 0, one: 1, two: 2, three: 3, four: 4, five: 5, six: 6, seven: 7, eight: 8,
    nine: 9, ten: 10, eleven: 11, twelve: 12, thirteen: 13, fourteen: 14,
    fifteen: 15, sixteen: 16, seventeen: 17,  eighteen: 18, nineteen: 19,
    twenty: 20, thirty: 30, forty: 40, fifty: 50, sixty: 60, seventy: 70,
    eighty: 80, ninety: 90, hundred: 100, thousand: 1000, million: 1000000,
    billion: 1000000000, trillion: 1000000000000, quadrillion: 1000000000000000
  }

  let reduceScalars = (m, n) => {
    if(m.length === 0)
      return n
    if(n.length === 0)
      return reduceScalars(_.rest(m), [_.first(m)])

    if(_.first(m) > _.last(n)){
      return reduceScalars(m.slice(1),
                           _.union(_.initial(n), [_.first(m) * _.last(n)]))
    }

    return reduceScalars(m.slice(1), _.union(n, [m[0]]))
  }

   let m = _.flatten(text.split(' ').map(w => {
    let breakDown = w.replace(/[_\.-]/g, ' ').split(' ')
    .map(m => numberWords[m])
    .filter(m => m !== undefined)
    if(breakDown.length)
      return reduceScalars(breakDown, []).reduce((a, b) => a + b)
    return []}))

  if(!m.length) return null

  return reduceScalars(m, [])
    .reduce((a, b) => a + b)
}
	/*
	* Fun Fact: when working in the realm of real, integer numbers,the English
	* language follows a curious pattern:
	*
	* For each word in a scalar as spoken, such as 'three hundred thousand
	* two hundred forty five', a real value can be derived by considering each
	* number successively and applying the following formula:
	*
	* phrase.split(' ') => array with length l
	* for i in phrase:
	* if the integer value of phrase[i] > integer value of phrase[i+1] =>
	* reduce((a, b) => a * b)
	*
	* if the integer value of phrase[i] < integer value of phrase[i+1] =>
	* reduce((a, b) => a + b)
	*
	* Considering this, it's trivial to convert something like
	* 'two hundred thousand three hundred forty five' into an integer
	* (2 * 100 * 1000) + (3 * 100) + 40 + 5 == 200345
	*
	* An edge case is found when irregular forms of speech occur, such as
	* 'one million million'. This can be accounted for by reducing
	* as a product when the first term is greater or equal to the second
	* (1 * 1000000 * 1000000)
	*
	* To deal with hyphenations, it is important to first split on a space
	* then reduce each term to an integer before reducing the terms as a whole
	*
	* "twenty-six hundred" =>
	* ["twenty-six", "hundred"] =>
	* [["twenty", "six"], ["hundred"]] =>
	* [26, 100] =>
	* 2600
	*
	* As it turns out, there are only so many discreet words that are used
	* when discussing real integers, and for the purposes of my implementation,
	* I've chosen to stop counting at quadrillion, since that is what I'm
	* considering to be near or at the upper-limit of common speech in English.
	* Obviously more terms could be added if this were to be applied in
	* case-specific situations.
	*
	* Below is an ES 6 implementation I'm using for a project.
	*/

	const _ = require('lodash')
	function resolveScalar(rawText){
	let text = rawText.split(' ')

	let numberWords = {
	zero: 0, one: 1, two: 2, three: 3, four: 4, five: 5, six: 6, seven: 7, eight: 8,
	nine: 9, ten: 10, eleven: 11, twelve: 12, thirteen: 13, fourteen: 14,
	fifteen: 15, sixteen: 16, seventeen: 17, eighteen: 18, nineteen: 19,
	twenty: 20, thirty: 30, forty: 40, fifty: 50, sixty: 60, seventy: 70,
	eighty: 80, ninety: 90, hundred: 100, thousand: 1000, million: 1000000,
	billion: 1000000000, trillion: 1000000000000, quadrillion: 1000000000000000
	}

	let reduceScalars = (m, n) => {
	if(m.length === 0)
	return n
	if(n.length === 0)
	return reduceScalars(_.rest(m), [_.first(m)])

	if(_.first(m) > _.last(n)){
	return reduceScalars(m.slice(1),
	_.union(_.initial(n), [_.first(m) * _.last(n)]))
	}

	return reduceScalars(m.slice(1), _.union(n, [m[0]]))
	}

	let m = _.flatten(text.split(' ').map(w => {
	let breakDown = w.replace(/[_\.-]/g, ' ').split(' ')
	.map(m => numberWords[m])
	.filter(m => m !== undefined)
	if(breakDown.length)
	return reduceScalars(breakDown, []).reduce((a, b) => a + b)
	return []}))

	if(!m.length) return null

	return reduceScalars(m, [])
	.reduce((a, b) => a + b)
	}