Source

phone2word / phone2word.hs

{- 
Attempt to convert numbers like 01523 568304 into
words/numbers, using the map used on most mobile
phone keypads.

-}

import Char (isDigit, toUpper)
import Control.Monad (guard)
import Data.List (partition, nub, sort)
import Maybe (fromJust)
import System.Environment (getArgs)

import qualified Data.ByteString.Char8 as B
import qualified Data.Map as Map

--
-- Definitions
--

letters = [ ('0', "")
          , ('1', "")
          , ('2', "ABC")
          , ('3', "DEF")
          , ('4', "GHI")
          , ('5', "JKL")
          , ('6', "MNO")
          , ('7', "PQRS")
          , ('8', "TUV")
          , ('9', "WXYZ")
          ]

--
-- General utilities
--

lettersMap :: Map.Map Char [Char]
lettersMap = Map.fromList letters

numbersMap :: Map.Map Char Char
numbersMap = Map.fromList [ (l,c)  | (c,word) <- letters, l <- word]

getLetters :: Char -> [Char]
getLetters digit = fromJust $ Map.lookup digit lettersMap

getNumber :: Char -> Maybe Char
getNumber char = Map.lookup (toUpper char) numbersMap

data WordTree = Node { nodeLabel :: Char           -- digit
                     , nodeSubForest :: [WordTree] -- children
                     , nodeWords :: [B.ByteString] -- words that correspond to this node
                     } deriving (Eq, Read, Show)


 -- Look up a char in the sub forests, and return ([matching WordTree], [non-matching WordTree])
subForestLookup :: Char -> WordTree -> ([WordTree],[WordTree])
subForestLookup char tree = partition (\n -> nodeLabel n == char) $ nodeSubForest tree

--
-- Building the tree
--

startNode = Node { nodeLabel = 'X' -- never used
                 , nodeSubForest = []
                 , nodeWords = []
                 }

newNode :: Char -> WordTree
newNode digit = Node { nodeLabel = digit
                     , nodeSubForest = []
                     , nodeWords = []
                     }

buildWordTree :: [B.ByteString] -> WordTree
buildWordTree words = buildWordTree' words startNode
    where
      buildWordTree' [] tree = tree
      buildWordTree' (w:ws) tree = buildWordTree' ws (addWord w w tree)


addWord :: B.ByteString -- ^ remaining characters
        -> B.ByteString -- ^ whole word
        -> WordTree     -- ^ input tree
        -> WordTree
addWord chars word tree =
    if B.null chars
       then tree { nodeWords = word : nodeWords tree }
       else let c = B.head chars
                cs = B.tail chars
                md = getNumber c
            in case md of
                 Nothing -> tree -- no corresponding digit to letter, can't add word
                 Just d ->
                     let (matching, others) = subForestLookup d tree
                         node = case matching of
                                  -- if node does not exist, need to create it
                                  [] -> newNode d
                                  [n] -> n
                                  _ -> error "impossible happened"
                         node' = addWord cs word node
                     in tree { nodeSubForest = node' : others }


--
-- Search the tree
--

-- | Find words that correspond exactly to the digits supplied
findWords :: B.ByteString -- ^ the string of digits
          -> WordTree
          -> [B.ByteString]
findWords chars tree =
    if B.null chars
       then nodeWords tree
       else let d = B.head chars
                ds = B.tail chars
                (ns, others) = subForestLookup d tree
            in case ns of
                 [] -> []
                 (n:_) -> findWords ds n

--
-- Search for combos
--

{-

Model for understanding the problem:

The string of n digits is a river n steps wide.  Words that can be found in the
digits are like logs which are placed in the river at certain positions away
from the bank.

  0 7 6 6 3 9 6 7 3 7
+---------------------+
|   S O M E           |
|           W O R D   |
|           W O R D S |
|       M E W         |
|       N E W         |
+---------------------+

The aim is to get from one side of the river to the other by stepping on the
logs, and the full solution will find all possible routes, using all the logs.

If there are gaps which stop us crossing, or stop certain logs being used, we
can add strings of the original digits as logs.  For example, in the above case,
we need to add four 'digit logs' to allow all the word logs to be used:

  0 7 6 6 3 9 6 7 3 7
+---------------------+
|   S O M E           |
|           W O R D   |
|           W O R D S |
|       M E W         |
|       N E W         |
| 0                   |
|                   7 |
| 0 7 6               |
|             6 7 3 7 |
+---------------------+

This allows us to generate:

 0-some-words
 0-some-word-7
 076-new-6737
 076-mew-6737

So the algorithm is:

 - find all the 'word logs'
   - necessarily an O(n^2) algorithm, assuming we can check a
     string of digits in constant time
 - add in all necessary 'digit logs'
   - depends on the number and spacing of word logs
 - find all routes across the river
   - potentially O(k^n) solutions, it depends on input.

Note, in finding all routes across the river, we must never use more than one
'digit log' consecutively.  This restriction is needed because the algorithm for
adding 'digit logs' can mean we get digit logs that can be laid end to end.

-}

-- Info about a single interesting 'combination' is stored as a list of
-- ByteStrings, so we can display with dashes easily.
type Combo = [B.ByteString]

showCombo :: Combo -> B.ByteString
showCombo = B.intercalate (B.pack "-")

substring :: Int -> Int -> B.ByteString -> B.ByteString
substring start len word = B.take len $ B.drop start word

hasWord :: B.ByteString -> Bool
hasWord = any (not . isDigit) . B.unpack

-- WordMap - stores the info about words that can be found
-- at each (position,length).
type WordMap = [((Int,Int),[B.ByteString])]

-- (We might need a better structure than a simple association list for WordMap
-- future, but in some functions the association list will be just right)
type WordList = [((Int,Int),[B.ByteString])]

findCombos :: B.ByteString -> WordTree -> [Combo]
findCombos digits tree =
    let wordMap = makeWordMap digits tree
        allMap = addDigitWords digits wordMap
    in findRoutes allMap

   where
     makeWordMap :: B.ByteString -> WordTree -> WordMap
     makeWordMap digits tree =
         makeMap $ do
           pos <- [0..(totalLength - 1)] -- starting position
           len <- [1..totalLength - pos] -- length
           let words = findWords (substring pos len digits) tree
           return ((pos, len), words)

     addDigitWords :: B.ByteString -> WordMap -> WordMap
     addDigitWords digits wordMap =
     {-

     Adding the 'digit words'

      - for each 'end of word' (including the left bank):

        - find all the words that appear to the right - W
        - note the shortest of the nearest of these words - n
        - for each word w in W
          - if the start of w comes before the end of n, then add a digit word to fill
            in the interval up to w

      - in doing the above, ensure that duplicate digit words are not added.
      -}
         let wordPos = findWordPositions wordMap
             endPositions = 0 : findEndPositions wordPos
         in updateMap wordMap $ nub $ do
           pos <- endPositions
           let rightWords = wordsToRightOf pos wordPos
           let (n_pos,n_len) = head $ sort rightWords -- shortest of nearest words
           (w_pos,w_len) <- rightWords
           -- don't add 'digit word' if start position of current word
           -- is outside the range covered by (n_pos,n_len):
           guard (w_pos < n_pos + n_len)
           let gapWidth = w_pos - pos
           -- if we have a gap of zero, there is nothing to add:
           guard (gapWidth > 0)
           let digitWord = substring pos gapWidth digits
           return ((pos, gapWidth), [digitWord])

       where
         wordsToRightOf pos wordPos = [ (p,len) | (p,len) <- wordPos, p >= pos ]
                                      ++
                                      -- need to make sure there a word which represents the
                                      -- 'right bank', so that we can bridge a gap from last
                                      -- word to the end of the string if needed.
                                      [rightBank]
         findEndPositions wordPos = nub [ pos + len | (pos,len) <- wordPos ]
         rightBank = (totalLength, 1)

     findRoutes :: WordMap -> [Combo]
     findRoutes wordMap = findRoutes' (findWordEntries wordMap) 0 totalLength

     findRoutes' :: WordList -> Int -> Int -> [Combo]
     findRoutes' words start end =
         do
           ((p,len),ws) <- filter (\((p,_),_) -> p == start) words
           w <- ws
           if p + len == end
              then return [w]
              else do
                c <- findRoutes' words (start + len) end
                guard (hasWord w || (hasWord $ head c))
                return (w:c)

     --
     -- Shared definitions between different parts of algorithm
     --

     totalLength = B.length digits

     -- The following functions, (makeMap, updateMap) may have trivial
     -- implementations, but they allow us to change the datastructure WordMap
     -- with minimal changes to the rest.
     makeMap :: WordList -> WordMap
     makeMap = id

     updateMap :: WordMap -> WordList -> WordMap
     updateMap wordMap additional = wordMap ++ additional

     -- get a list of all (position,length) where there are actually words
     findWordPositions :: WordMap -> [(Int,Int)]
     findWordPositions wordMap = nub [ x | (x, words) <- wordMap, not $ null words ]

     -- get the entries where there are actually words
     findWordEntries :: WordMap -> WordList
     findWordEntries wordMap = filter (not . null . snd) wordMap


--
-- Main
--

getWords :: String -> IO [B.ByteString]
getWords dictFile = B.readFile dictFile >>= (return . (filter (\x -> B.length x > 1)) . B.lines)

countNodes t = 1 + (sum $ map countNodes (nodeSubForest t))

countWords t = length (nodeWords t) + (sum $ map countWords (nodeSubForest t))

main = do
  dictFile:_ <- getArgs
  ws <- getWords dictFile
  let wordTree = buildWordTree ws
  putStr "Words: "
  print (countWords wordTree)
  putStr "Nodes: "
  print (countNodes wordTree)
  mainLoop wordTree

mainLoop wordTree = do
  putStrLn "Enter phone number: "
  number' <- getLine
  putStrLn "Matches: "
  let number = filter isDigit number'
  let combos = findCombos (B.pack number) wordTree
  case length combos of
    0 -> putStrLn " [None]"
    _ -> mapM_ B.putStrLn $ map showCombo combos
  putStrLn ""

  mainLoop wordTree