Commits

Luke Plant committed 7ac1391

Initial import

  • Participants

Comments (0)

Files changed (1)

File phone2word.hs

+{- 
+Attempt to convert numbers like 01523 568304 into
+words/numbers, using the map used on most mobile
+phone keypads.
+
+-}
+
+import Char (isDigit, toUpper)
+import Control.Monad (guard)
+import Data.List (partition)
+import Maybe (fromJust, listToMaybe)
+
+import qualified Data.ByteString.Char8 as B
+import qualified Data.Map as Map
+
+-- store words in a tree where the nodes are labelled as numbers
+
+letters = [ ('0', "")
+          , ('1', "")
+          , ('2', "ABC")
+          , ('3', "DEF")
+          , ('4', "GHI")
+          , ('5', "JKL")
+          , ('6', "MNO")
+          , ('7', "PQRS")
+          , ('8', "TUV")
+          , ('9', "WXYZ")
+          ]
+
+lettersMap :: Map.Map Char [Char]
+lettersMap = Map.fromList letters
+
+numbersMap :: Map.Map Char Char
+numbersMap = Map.fromList [ (l,c)  | (c,word) <- letters, l <- word]
+
+getLetters :: Char -> [Char]
+getLetters digit = fromJust $ Map.lookup digit lettersMap
+
+getNumber :: Char -> Maybe Char
+getNumber char = Map.lookup (toUpper char) numbersMap
+
+-- TODO - this whole datastructure should be strict
+-- and read in strictly
+
+data WordTree = Node { nodeLabel :: Char           -- digit
+                     , nodeSubForest :: [WordTree] -- children
+                     , nodeWords :: [B.ByteString] -- words that correspond to this node
+                     } deriving (Eq, Read, Show)
+
+startNode = Node { nodeLabel = 'X' -- never used
+                 , nodeSubForest = []
+                 , nodeWords = []
+                 }
+
+newNode :: Char -> WordTree
+newNode digit = Node { nodeLabel = digit
+                     , nodeSubForest = []
+                     , nodeWords = []
+                     }
+
+buildWordTree :: [B.ByteString] -> WordTree
+buildWordTree words = buildWordTree' words startNode
+    where
+      buildWordTree' [] tree = tree
+      buildWordTree' (w:ws) tree = buildWordTree' ws (addWord w w tree)
+
+
+ -- Look up a char in the sub forests, and return ([matching WordTree], [non-matching WordTree])
+subForestLookup :: Char -> WordTree -> ([WordTree],[WordTree])
+subForestLookup char tree = partition (\n -> nodeLabel n == char) $ nodeSubForest tree
+
+addWord :: B.ByteString -- ^ remaining characters
+        -> B.ByteString -- ^ whole word
+        -> WordTree     -- ^ input tree
+        -> WordTree
+addWord chars word tree =
+    if B.null chars
+       then tree { nodeWords = word : nodeWords tree }
+       else let c = B.head chars
+                cs = B.tail chars
+                md = getNumber c
+            in case md of
+                 Nothing -> tree -- no corresponding digit to letter, can't add word
+                 Just d ->
+                     let (matching, others) = subForestLookup d tree
+                         node = case matching of
+                                  -- if node does not exist, need to create it
+                                  [] -> newNode d
+                                  [n] -> n
+                                  _ -> error "impossible happened"
+                         node' = addWord cs word node
+                     in tree { nodeSubForest = node' : others }
+
+
+-- | Find words that correspond exactly to the digits supplied
+findWords :: B.ByteString -- ^ the string of digits
+          -> WordTree
+          -> [B.ByteString]
+findWords chars tree =
+    if B.null chars
+       then nodeWords tree
+       else let d = B.head chars
+                ds = B.tail chars
+                (ns, others) = subForestLookup d tree
+            in case ns of
+                 [] -> []
+                 (n:_) -> findWords ds n
+
+getWords :: IO [B.ByteString]
+getWords = B.readFile "/usr/share/dict/words" >>= (return . (filter (\x -> B.length x > 1)) . B.lines)
+
+countNodes t = 1 + (sum $ map countNodes (nodeSubForest t))
+
+countWords t = length (nodeWords t) + (sum $ map countWords (nodeSubForest t))
+
+main = do
+  ws <- getWords
+  let wordTree = buildWordTree ws
+  putStr "Words: "
+  print (countWords wordTree)
+  putStr "Nodes: "
+  print (countNodes wordTree)
+  mainLoop wordTree
+
+mainLoop wordTree = do
+  putStrLn "Enter phone number: "
+  number' <- getLine
+  putStrLn "Matches: "
+  let number = filter isDigit number'
+  let combos = findCombos (B.pack number) wordTree
+  case length combos of
+    0 -> putStrLn " [None]"
+    _ -> mapM_ B.putStrLn $ map showCombo combos
+  putStrLn ""
+
+  mainLoop wordTree
+
+-- TODO
+--
+-- * search for combinations to get a 'memorable' version of a phone number
+
+
+type Combo = [Segment]
+type Segment = B.ByteString
+
+
+findCombos :: B.ByteString
+           -> WordTree
+           -> [Combo]
+findCombos digits tree =
+        -- we need to check if all n digits make a word,
+        -- and if the first n-1 and the last 1 make
+        -- words, then (n-2, 2) etc. (n-n,n) should be ignored.
+
+        -- The first item in pair should checked directly for matches,
+        -- but the second recursively checked for combinations
+
+    let n = B.length digits
+        pairs = [ B.splitAt l digits | l <- [1..n] ]
+        calcPairOptions = (\(p1,p2) -> combineParts (findMatches p1 tree) (findCombos p2 tree))
+    in concatMap calcPairOptions pairs
+
+  where
+    findMatches :: B.ByteString -> WordTree -> [Segment]
+    findMatches digits tree =
+        let matches = findWords digits tree
+        in if null matches
+           -- If no matches, we still need the search to continue, because the
+           -- result might be interesting even if it is not entirely composed of
+           -- words.
+           then [digits]
+           else matches
+
+    combineParts :: [Segment] -> [Combo] -> [Combo]
+    combineParts segments combos =
+        if null combos
+        -- can only occur if 'segments' represents the end of the string
+        then map (:[]) segments
+        else --[ s:c | c <- combos, s <- segments ]
+            let any_interesting = hasWord $ B.concat $ concat combos
+            in
+              do
+                c <- combos
+                s <- segments
+
+              -- to prune the tree of possibilities, we need to remove:
+
+              -- * sequences of digits, where 's' is digits and the next
+              --   segment from c is digits
+              -- * combinations that contain only numbers if other combinations
+              --   have words
+                let remainder = B.concat c
+                guard (not (allDigits s
+                            && not (null c)
+                            && allDigits (head c)))
+                guard (not any_interesting || hasWord remainder)
+                return (s:c)
+
+    hasWord = any (not . isDigit) . B.unpack
+    allDigits = not . hasWord
+
+showCombo = B.intercalate (B.pack "-")