Skip to content

Instantly share code, notes, and snippets.

@kizzx2
Created August 9, 2011 14:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save kizzx2/1134262 to your computer and use it in GitHub Desktop.
Save kizzx2/1134262 to your computer and use it in GitHub Desktop.
Word Numbers brute force (Haskell profiling exercise)
-- Problem: Find the 51000000000-th character of the string (wordNumber Infinity)
-- where a wordNumber is defined as
--
-- wordNumber 1 = "one"
-- wordNumber 2 = "onetwo"
-- wordNumber 3 = "onetwothree"
-- wordNumber 15 = "onetwothreefourfivesixseveneightnineteneleventwelvethirteenfourteenfifteen"
-- ...
--
-- The answer should be presented as ( sum of all numbers up to that point
-- , the 51000000000-th character
-- )
-- This is a Haskell performance tuning exercise, trying to achieve C like performance.
-- Full guided story can be found at
-- http://cfc.kizzx2.com/index.php/in-search-of-performance-in-haskell/
--
-- (This Word Number problem is actually a misunderstood version of ITA's version)
{-# LANGUAGE BangPatterns, MagicHash #-}
import qualified Data.Vector.Unboxed as VU
import Data.Vector.Unboxed ((!))
import qualified Data.Vector.Generic as VG
import GHC.Base (Int(..), quotInt#, remInt#)
ones, tens, teens :: [String]
ones = ["", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"]
tens = ["", "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
teens = ["ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"]
wordify :: Int -> String
wordify n
| n < 10 = ones !! n
| n < 20 = teens !! (n-10)
| n < 100 = splitterTen
| n < 1000 = splitter 100 "hundred"
| n < 1000000 = splitter 1000 "thousand"
| otherwise = splitter 1000000 "million"
where
splitterTen = let (t, x) = n `quotRem` 10
in (tens !! t) ++ wordify x
splitter d suffix = let (t, x) = n `quotRem` d
in (wordify t) ++ suffix ++ wordify x
-- Tail recursive version
wordLength :: Int -> Int
wordLength i = go 0 i
where
go !pad !n
| n < 10 = lenOnes `VG.unsafeIndex` n + pad
| n < 20 = lenTeens `VG.unsafeIndex` (n-10) + pad
| n < 100 = go (lenTens `VG.unsafeIndex` (n//10) + pad) (n%10)
| n < 1000 = go (go (7+pad) (n//100)) (n%100)
| n < 1000000 = go (go (8+pad) (n//1000)) (n%1000)
| otherwise = go (go (7+pad) (n//1000000)) (n%1000000)
(I# a) // (I# b) = I# (a `quotInt#` b)
(I# a) % (I# b) = I# (a `remInt#` b)
!lenOnes = VU.fromList [0,3,3,5,4,4,3,5,5,4] -- "", "one","two", ...
!lenTens = VU.fromList [0,3,6,6,5,5,5,7,6,6]
!lenTeens = VU.fromList [3,6,6,8,8,7,7,9,8,8] -- first element is "ten" 3
solve :: Int -> (Int, Int, Int)
solve n = go 0 0 1
where
go !sumNum sumLen i
| sumLen' >= n = (sumNum', sumLen, i)
| otherwise = go sumNum' sumLen' (i+1)
where
sumNum' = sumNum + i
sumLen' = sumLen + wordLength i
solution :: Int -> (Int, Char)
solution x =
let (sumNum, sumLen, n) = solve x
in (sumNum, (wordify n) !! (x - sumLen - 1))
main :: IO ()
main = do
print $ solution 510000000
// Brute force solver for the Word Number problem
#include <iostream>
#include <string>
int length_ones[10] = {0,3,3,5,4,4,3,5,5,4}; // "", one, two, three, ...
int length_tens[10] = {0,3,6,6,5,5,5,7,6,6}; // "", ten, twenty, ...
int length_teens[10] = {3,6,6,8,8,7,7,9,8,8}; // ten, eleven, twelve, ...
const char * ones[] = {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"};
const char * tens[] = {"", "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"};
const char * teens[] = {"ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteeen", "nineteen"};
std::string wordify(long n)
{
if(n < 10) return ones[n];
else if(n < 20) return teens[n-10];
else if(n < 100) return std::string(tens[n/10]) + ones[n%10];
else if(n < 1000) return std::string(ones[n/100]) + "hundred" + wordify(n%100);
else if(n < 1000000) return wordify(n/1000) + "thousand" + wordify(n%1000);
else return wordify(n/1000000) + "million" + wordify(n%1000000);
}
int word_length(long n)
{
if(n < 10) return length_ones[n];
else if(n < 20) return length_teens[n-10];
else if(n < 100) return length_tens[n/10] + length_ones[n%10];
else if(n < 1000) return length_ones[n/100] + 7 + word_length(n%100); // 7 for "hundred"
else if(n < 1000000) return word_length(n/1000) + 8 + word_length(n%1000);
else return word_length(n/1000000) + 7 + word_length(n%1000000);
}
int main()
{
long sumNumbers = 0;
long sumLength = 0;
const long target = 51000000000;
for(long i; i < 999999999; i++)
{
sumNumbers += i;
long newSumLength = word_length(i) + sumLength;
if(newSumLength >= target)
break;
sumLength = newSumLength;
}
std::cout << "Sum: " << sumNumbers << std::endl;
std::cout << "The letter is " << wordify(i)[target - sumLength - 1] << std::endl;
return 0;
}
-- Original version
-- Can you spot how to improve this performance by 3 fold right away?
{-# LANGUAGE BangPatterns #-}
import Debug.Trace
import Data.Int
import Control.Monad
import Data.Array.Unboxed
ones = ["", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"]
tens = ["", "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"]
teens = ["ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"]
lenOnes, lenTens, lenTeens :: UArray Int64 Int64
lenOnes = listArray (0,9) $ [0,3,3,5,4,5,3,5,5,4] -- "", "one","two", ...
lenTens = listArray (0,9) $ [0,3,6,6,5,5,5,7,6,6]
lenTeens = listArray (0,9) $ [3,6,6,8,8,7,7,9,8,8] -- first element is "ten" 3
-- potentially cleaner version
-- but I feared Haskell might do surprising things behind my
-- back so I stuck with the above
-- lenBelowHundred = listArray (0,99) $ map (fromIntegral . length . wordify) [1..99]
-- wordify 123 = "onehundredtwentythree"
-- This is only used once in presenting the final result character
wordify :: Int64 -> String
wordify n
| n < 10 = ones !! fromIntegral n
| n < 20 = teens !! (fromIntegral n-10)
| n < 100 = splitterTen
| n < 1000 = splitter 100 "hundred"
| n < 1000000 = splitter 1000 "thousand"
| n < 1000000000 = splitter 1000000 "million"
where
splitterTen = let (t, x) = n `divMod` 10
in (tens !! fromIntegral t) ++ wordify x
splitter div suffix = let (t, x) = n `divMod` div
in (wordify t) ++ suffix ++ wordify x
-- Optimized version of length (wordify n)
-- Used in number crunching
wordLength n = wordLength' 0 n
-- Tail recursive version
wordLength' :: Int64 -> Int64 -> Int64
wordLength' !pad !n
| n < 10 = lenOnes ! n + pad
| n < 20 = lenTeens ! (n-10) + pad
| n < 100 = splitterTen
| n < 1000 = splitter 100 7
| n < 1000000 = splitter 1000 8
| otherwise = splitter 1000000 7
where
splitterTen = let !(!t, !x) = n `divMod` 10
in wordLength' (lenTens ! t + pad) x
splitter !d !suffix = let !(!t, !x) = n `divMod` d
in wordLength' (wordLength' (suffix+pad) t) x
-- Tail recursive
solve :: Int64 -> (Int64, Int64, Int64) -> [Int64] -> (Int64, Int64, Int64)
solve !n !acc@(!sumNum, !sumLen, !curr) (!num:nums)
| sumLen' >= n = (sumNum', sumLen, num)
| otherwise = solve n (sumNum', sumLen', num) nums
where
sumNum' = sumNum + num
sumLen' = sumLen + wordLength num
solution :: Int64 -> (Int64, Char)
solution !x =
let (sumNum, sumLen, n) = solve x (0,0,1) [1..]
in (sumNum, (wordify n) !! (fromIntegral $ x - sumLen - 1))
main = do
print $ solution 1234 -- Make sure we are sane
print $ solution 51000000000
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment