Created
August 9, 2011 14:55
-
-
Save kizzx2/1134262 to your computer and use it in GitHub Desktop.
Word Numbers brute force (Haskell profiling exercise)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Problem: Find the 51000000000-th character of the string (wordNumber Infinity) | |
-- where a wordNumber is defined as | |
-- | |
-- wordNumber 1 = "one" | |
-- wordNumber 2 = "onetwo" | |
-- wordNumber 3 = "onetwothree" | |
-- wordNumber 15 = "onetwothreefourfivesixseveneightnineteneleventwelvethirteenfourteenfifteen" | |
-- ... | |
-- | |
-- The answer should be presented as ( sum of all numbers up to that point | |
-- , the 51000000000-th character | |
-- ) | |
-- This is a Haskell performance tuning exercise, trying to achieve C like performance. | |
-- Full guided story can be found at | |
-- http://cfc.kizzx2.com/index.php/in-search-of-performance-in-haskell/ | |
-- | |
-- (This Word Number problem is actually a misunderstood version of ITA's version) | |
{-# LANGUAGE BangPatterns, MagicHash #-} | |
import qualified Data.Vector.Unboxed as VU | |
import Data.Vector.Unboxed ((!)) | |
import qualified Data.Vector.Generic as VG | |
import GHC.Base (Int(..), quotInt#, remInt#) | |
ones, tens, teens :: [String] | |
ones = ["", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"] | |
tens = ["", "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"] | |
teens = ["ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"] | |
wordify :: Int -> String | |
wordify n | |
| n < 10 = ones !! n | |
| n < 20 = teens !! (n-10) | |
| n < 100 = splitterTen | |
| n < 1000 = splitter 100 "hundred" | |
| n < 1000000 = splitter 1000 "thousand" | |
| otherwise = splitter 1000000 "million" | |
where | |
splitterTen = let (t, x) = n `quotRem` 10 | |
in (tens !! t) ++ wordify x | |
splitter d suffix = let (t, x) = n `quotRem` d | |
in (wordify t) ++ suffix ++ wordify x | |
-- Tail recursive version | |
wordLength :: Int -> Int | |
wordLength i = go 0 i | |
where | |
go !pad !n | |
| n < 10 = lenOnes `VG.unsafeIndex` n + pad | |
| n < 20 = lenTeens `VG.unsafeIndex` (n-10) + pad | |
| n < 100 = go (lenTens `VG.unsafeIndex` (n//10) + pad) (n%10) | |
| n < 1000 = go (go (7+pad) (n//100)) (n%100) | |
| n < 1000000 = go (go (8+pad) (n//1000)) (n%1000) | |
| otherwise = go (go (7+pad) (n//1000000)) (n%1000000) | |
(I# a) // (I# b) = I# (a `quotInt#` b) | |
(I# a) % (I# b) = I# (a `remInt#` b) | |
!lenOnes = VU.fromList [0,3,3,5,4,4,3,5,5,4] -- "", "one","two", ... | |
!lenTens = VU.fromList [0,3,6,6,5,5,5,7,6,6] | |
!lenTeens = VU.fromList [3,6,6,8,8,7,7,9,8,8] -- first element is "ten" 3 | |
solve :: Int -> (Int, Int, Int) | |
solve n = go 0 0 1 | |
where | |
go !sumNum sumLen i | |
| sumLen' >= n = (sumNum', sumLen, i) | |
| otherwise = go sumNum' sumLen' (i+1) | |
where | |
sumNum' = sumNum + i | |
sumLen' = sumLen + wordLength i | |
solution :: Int -> (Int, Char) | |
solution x = | |
let (sumNum, sumLen, n) = solve x | |
in (sumNum, (wordify n) !! (x - sumLen - 1)) | |
main :: IO () | |
main = do | |
print $ solution 510000000 | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Brute force solver for the Word Number problem | |
#include <iostream> | |
#include <string> | |
int length_ones[10] = {0,3,3,5,4,4,3,5,5,4}; // "", one, two, three, ... | |
int length_tens[10] = {0,3,6,6,5,5,5,7,6,6}; // "", ten, twenty, ... | |
int length_teens[10] = {3,6,6,8,8,7,7,9,8,8}; // ten, eleven, twelve, ... | |
const char * ones[] = {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}; | |
const char * tens[] = {"", "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"}; | |
const char * teens[] = {"ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteeen", "nineteen"}; | |
std::string wordify(long n) | |
{ | |
if(n < 10) return ones[n]; | |
else if(n < 20) return teens[n-10]; | |
else if(n < 100) return std::string(tens[n/10]) + ones[n%10]; | |
else if(n < 1000) return std::string(ones[n/100]) + "hundred" + wordify(n%100); | |
else if(n < 1000000) return wordify(n/1000) + "thousand" + wordify(n%1000); | |
else return wordify(n/1000000) + "million" + wordify(n%1000000); | |
} | |
int word_length(long n) | |
{ | |
if(n < 10) return length_ones[n]; | |
else if(n < 20) return length_teens[n-10]; | |
else if(n < 100) return length_tens[n/10] + length_ones[n%10]; | |
else if(n < 1000) return length_ones[n/100] + 7 + word_length(n%100); // 7 for "hundred" | |
else if(n < 1000000) return word_length(n/1000) + 8 + word_length(n%1000); | |
else return word_length(n/1000000) + 7 + word_length(n%1000000); | |
} | |
int main() | |
{ | |
long sumNumbers = 0; | |
long sumLength = 0; | |
const long target = 51000000000; | |
for(long i; i < 999999999; i++) | |
{ | |
sumNumbers += i; | |
long newSumLength = word_length(i) + sumLength; | |
if(newSumLength >= target) | |
break; | |
sumLength = newSumLength; | |
} | |
std::cout << "Sum: " << sumNumbers << std::endl; | |
std::cout << "The letter is " << wordify(i)[target - sumLength - 1] << std::endl; | |
return 0; | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- Original version | |
-- Can you spot how to improve this performance by 3 fold right away? | |
{-# LANGUAGE BangPatterns #-} | |
import Debug.Trace | |
import Data.Int | |
import Control.Monad | |
import Data.Array.Unboxed | |
ones = ["", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"] | |
tens = ["", "ten", "twenty", "thirty", "forty", "fifty", "sixty", "seventy", "eighty", "ninety"] | |
teens = ["ten", "eleven", "twelve", "thirteen", "fourteen", "fifteen", "sixteen", "seventeen", "eighteen", "nineteen"] | |
lenOnes, lenTens, lenTeens :: UArray Int64 Int64 | |
lenOnes = listArray (0,9) $ [0,3,3,5,4,5,3,5,5,4] -- "", "one","two", ... | |
lenTens = listArray (0,9) $ [0,3,6,6,5,5,5,7,6,6] | |
lenTeens = listArray (0,9) $ [3,6,6,8,8,7,7,9,8,8] -- first element is "ten" 3 | |
-- potentially cleaner version | |
-- but I feared Haskell might do surprising things behind my | |
-- back so I stuck with the above | |
-- lenBelowHundred = listArray (0,99) $ map (fromIntegral . length . wordify) [1..99] | |
-- wordify 123 = "onehundredtwentythree" | |
-- This is only used once in presenting the final result character | |
wordify :: Int64 -> String | |
wordify n | |
| n < 10 = ones !! fromIntegral n | |
| n < 20 = teens !! (fromIntegral n-10) | |
| n < 100 = splitterTen | |
| n < 1000 = splitter 100 "hundred" | |
| n < 1000000 = splitter 1000 "thousand" | |
| n < 1000000000 = splitter 1000000 "million" | |
where | |
splitterTen = let (t, x) = n `divMod` 10 | |
in (tens !! fromIntegral t) ++ wordify x | |
splitter div suffix = let (t, x) = n `divMod` div | |
in (wordify t) ++ suffix ++ wordify x | |
-- Optimized version of length (wordify n) | |
-- Used in number crunching | |
wordLength n = wordLength' 0 n | |
-- Tail recursive version | |
wordLength' :: Int64 -> Int64 -> Int64 | |
wordLength' !pad !n | |
| n < 10 = lenOnes ! n + pad | |
| n < 20 = lenTeens ! (n-10) + pad | |
| n < 100 = splitterTen | |
| n < 1000 = splitter 100 7 | |
| n < 1000000 = splitter 1000 8 | |
| otherwise = splitter 1000000 7 | |
where | |
splitterTen = let !(!t, !x) = n `divMod` 10 | |
in wordLength' (lenTens ! t + pad) x | |
splitter !d !suffix = let !(!t, !x) = n `divMod` d | |
in wordLength' (wordLength' (suffix+pad) t) x | |
-- Tail recursive | |
solve :: Int64 -> (Int64, Int64, Int64) -> [Int64] -> (Int64, Int64, Int64) | |
solve !n !acc@(!sumNum, !sumLen, !curr) (!num:nums) | |
| sumLen' >= n = (sumNum', sumLen, num) | |
| otherwise = solve n (sumNum', sumLen', num) nums | |
where | |
sumNum' = sumNum + num | |
sumLen' = sumLen + wordLength num | |
solution :: Int64 -> (Int64, Char) | |
solution !x = | |
let (sumNum, sumLen, n) = solve x (0,0,1) [1..] | |
in (sumNum, (wordify n) !! (fromIntegral $ x - sumLen - 1)) | |
main = do | |
print $ solution 1234 -- Make sure we are sane | |
print $ solution 51000000000 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment