Commits

Luke Plant committed f3007de

Used a Data.Map instead of custom data structure to store dictionary.

Comments (0)

Files changed (1)

 import Char (isDigit, toUpper)
 import Control.Monad (guard)
 import Data.List (partition, nub, sort, foldl')
-import Maybe (fromJust)
+import Maybe (catMaybes, isNothing, isJust)
 import Prelude hiding (sum)
 import System.Environment (getArgs)
 
 
 {-
 
-To quickly determine if a series of digits corresponds to a word, we create a
-tree that stores all our dictionary words.  Each node is (effectively) labelled
-with a digit, and words are 'hung' onto the tree at nodes where the path to the
-node is the series of digits needed to 'spell' the word on a phone keypad.  The
-tree is sparse i.e. nodes are only created as they are need to store words or
-store the route to a word.
-
-In this way, looking up any series of digits becomes very quick, as for most
-series of digits we 'fall off the bottom' of the tree very quickly, and always
-fall off after n moves, where n is the maximum word length in the dictionary.
-
-Each node does not actually store the digit label - the root node does not have
-a digit, and children are stored in a Map where the key is the digit, so we know
-what digit each child node corresponds to by the path we have followed.
-
+We convert all our words to digit form, and store in a simple Data.Map,
+which proves to be sufficiently faster (and only about 70% slower
+than a custom tree datastructure).
 -}
 
-data WordTree = Node { nodeSubForest :: !(Map.Map Char WordTree) -- children
-                     , nodeWords :: ![B.ByteString]              -- words that correspond to this node
-                     } deriving (Eq, Read, Show)
-
+type WordTree = Map.Map B.ByteString [B.ByteString]
 
 --
 -- Building the tree
 --
 
-emptyNode = Node { nodeSubForest = Map.empty
-                 , nodeWords = []
-                 }
-
 buildWordTree :: [B.ByteString] -> WordTree
 buildWordTree words =
-    foldl' (\t w -> addWord w w t) emptyNode words
+    let l = catMaybes $ do
+          w <- words
+          let digits = convWord w
+          return $ case digits of
+                     Nothing -> Nothing
+                     Just ds -> Just (ds,[w])
+    in Map.fromAscListWith (\a1 a2 -> addWordToList (head a1) a2) (sort l)
 
-addWord :: B.ByteString -- ^ remaining characters
-        -> B.ByteString -- ^ whole word
-        -> WordTree     -- ^ input tree
-        -> WordTree
-addWord !chars !word !tree =
-    if B.null chars
-       then tree { nodeWords = addWordToList word (nodeWords tree) }
-       else let c = B.head chars
-                cs = B.tail chars
-                md = getNumber c
-            in case md of
-                 Nothing -> tree -- no corresponding digit to letter, can't add word
-                 Just d ->
-                     let matching = Map.lookup d $ nodeSubForest tree
-                         node = case matching of
-                                  -- if node does not exist, need to create it
-                                  Nothing -> emptyNode
-                                  Just n -> n
-                         node' = addWord cs word node
-                     in tree { nodeSubForest = Map.insert d node' $ nodeSubForest tree }
+-- Convert word to digit form if possible
+convWord :: B.ByteString -> Maybe B.ByteString
+convWord word = let conv = bMap getNumber word
+                in if any isNothing conv
+                   then Nothing
+                   else Just $ B.pack $ catMaybes conv
+
+bMap :: (Char -> a) -> B.ByteString -> [a]
+bMap f b = if B.null b
+           then []
+           else f (B.head b):bMap f (B.tail b)
 
 addWordToList w ws =
     -- eliminate duplicates
 findWords :: B.ByteString -- ^ the string of digits
           -> WordTree
           -> [B.ByteString]
-findWords chars tree =
-    if B.null chars
-       then nodeWords tree
-       else let d = B.head chars
-                ds = B.tail chars
-                nd = Map.lookup d $ nodeSubForest tree
-            in case nd of
-                 Nothing -> []
-                 Just n -> findWords ds n
+findWords chars tree = case Map.lookup chars tree of
+                         Just x -> x
+                         Nothing -> []
 
 --
 -- Search for combos
 getWords :: String -> IO [B.ByteString]
 getWords dictFile = B.readFile dictFile >>= (return . B.lines)
 
-countNodes t = 1 + (sum $ map countNodes (Map.elems $ nodeSubForest t))
+countNodes t = Map.size t
 
-countWords t = length (nodeWords t) + (sum $ map countWords (Map.elems $ nodeSubForest t))
+countWords t = sum $ map length $ Map.elems t
 
 main = do
   dictFile:_ <- getArgs