Commits

Luke Plant committed 7d7b630

Completely different algorithm for combinations, addressing previous bugs.

Comments (0)

Files changed (1)

 
 import Char (isDigit, toUpper)
 import Control.Monad (guard)
-import Data.List (partition)
-import Maybe (fromJust, listToMaybe)
+import Data.List (partition, nub, sort)
+import Maybe (fromJust)
+
 
 import qualified Data.ByteString.Char8 as B
 import qualified Data.Map as Map
 -- Search for combos
 --
 
-type Combo = [Segment]
-type Segment = B.ByteString
-type Cache = Map.Map B.ByteString [Combo]
+{-
 
-findCombos :: B.ByteString
-           -> WordTree
-           -> [Combo]
-findCombos digits tree =
-    let combos = fst $ findCombosR digits tree Map.empty
-        boring c = all allDigits c
-    in filter (not . boring) combos
-      -- some top level filtering we can't do in the recurisve function (as
-      -- otherwise we'll trim potential results)
+Model for understanding the problem:
 
-findCombosR :: B.ByteString
-            -> WordTree
-            -> Cache
-            -> ([Combo], Cache)
-findCombosR digits tree cache =
-        -- we need to check if all n digits make a word,
-        -- and if the first n-1 and the last 1 make
-        -- words, then (n-2, 2) etc. (n-n,n) should be ignored.
+The string of n digits is a river n steps wide.  Words that can be found in the
+digits are like logs which are placed in the river at certain positions away
+from the bank.
 
-        -- The first item in pair should checked directly for matches,
-        -- but the second recursively checked for combinations
+  0 7 6 6 3 9 6 7 3 7
++---------------------+
+|   S O M E           |
+|           W O R D   |
+|           W O R D S |
+|       M E W         |
+|       N E W         |
++---------------------+
 
-    let cacheVal = Map.lookup digits cache
-        n = B.length digits
-        pairs = [ B.splitAt l digits | l <- [1..n] ]
-        -- need to thread the cache through, therefore use a fold rather than a map
-        calcPairOptions (retval, c) (p1, p2) =
-            let (subcombos, c2) = findCombosR p2 tree c
-                (combos, c3) = combineParts (findMatches p1 tree c2) (subcombos, c2)
-            in (retval ++ combos, c3)
-        (result, cache2) = foldl calcPairOptions ([], cache) pairs
-        updatedCache = Map.insert digits result cache2
-    in case cacheVal of
-         Nothing -> (result, updatedCache)
-         Just val -> (val, cache)
-  where
-    findMatches :: B.ByteString -> WordTree -> Cache -> [Segment]
-    findMatches digits tree cache =
-        let matches = findWords digits tree
-        in if null matches
-           -- If no matches, we still need the search to continue, because the
-           -- result might be interesting even if it is not entirely composed of
-           -- words.
-           -- However, we don't want to add this 'boring' item if there are
-           -- 'interesting' alternatives.  Since we have already calculated
-           -- subcombos, we can find out if there are interesting combos.
-           -- We check all 'tails' of digits for something interesting,
-           -- excluding the full string.
-           then if any (\w ->
-                            case Map.lookup w cache of
-                              Nothing -> False
-                              Just ms -> any interesting ms
-                            || (not $ null $ findWords w tree)
-                       ) (B.tails digits)
-                then matches
-                else [digits]
-           else matches
+The aim is to get from one side of the river to the other by stepping on the
+logs, and the full solution will find all possible routes, using all the logs.
 
-    combineParts :: [Segment] -> ([Combo], Cache) -> ([Combo], Cache)
-    combineParts segments (combos, cache) =
-        if null combos
-        -- can only occur if 'segments' represents the end of the string
-        then (map (:[]) segments, cache)
-        else --[ s:c | c <- combos, s <- segments ]
-            let any_interesting = any interesting combos
-                result =
-                    do
-                      c <- combos
-                      s <- segments
+If there are gaps which stop us crossing, or stop certain logs being used, we
+can add strings of the original digits as logs.  For example, in the above case,
+we need to add four 'digit logs' to allow all the word logs to be used:
 
-                      -- to prune boring items, we need to remove:
+  0 7 6 6 3 9 6 7 3 7
++---------------------+
+|   S O M E           |
+|           W O R D   |
+|           W O R D S |
+|       M E W         |
+|       N E W         |
+| 0                   |
+|                   7 |
+| 0 7 6               |
+|             6 7 3 7 |
++---------------------+
 
-                      -- * sequences of digits, where 's' is digits and the next
-                      --   segment from c is digits
-                      let allDigits_s = allDigits s
-                      guard (not (allDigits_s
-                                  && not (null c)
-                                  && allDigits (head c)))
-                      -- * combinations that contain only numbers if other combinations
-                      --   have words
-                      guard (not any_interesting || any hasWord c)
+This allows us to generate:
 
-                      return (s:c)
-            in (result, cache)
+ 0-some-words
+ 0-some-word-7
+ 076-new-6737
+ 076-mew-6737
 
-hasWord :: Segment -> Bool
+So the algorithm is:
+
+ - find all the 'word logs'
+   - necessarily an O(n^2) algorithm, assuming we can check a
+     string of digits in constant time
+ - add in all necessary 'digit logs'
+   - depends on the number and spacing of word logs
+ - find all routes across the river
+   - potentially O(k^n) solutions, it depends on input.
+
+Note, in finding all routes across the river, we must never use more than one
+'digit log' consecutively.  This restriction is needed because the algorithm for
+adding 'digit logs' can mean we get digit logs that can be laid end to end.
+
+-}
+
+-- Info about a single interesting 'combination' is stored as a list of
+-- ByteStrings, so we can display with dashes easily.
+type Combo = [B.ByteString]
+
+showCombo :: Combo -> B.ByteString
+showCombo = B.intercalate (B.pack "-")
+
+substring :: Int -> Int -> B.ByteString -> B.ByteString
+substring start len word = B.take len $ B.drop start word
+
+hasWord :: B.ByteString -> Bool
 hasWord = any (not . isDigit) . B.unpack
 
-allDigits :: Segment -> Bool
-allDigits = not . hasWord
+-- WordMap - stores the info about words that can be found
+-- at each (position,length).
+type WordMap = [((Int,Int),[B.ByteString])]
 
-interesting :: Combo -> Bool
-interesting ss = any hasWord ss
+-- (We might need a better structure than a simple association list for WordMap
+-- future, but in some functions the association list will be just right)
+type WordList = [((Int,Int),[B.ByteString])]
 
-showCombo = B.intercalate (B.pack "-")
+findCombos :: B.ByteString -> WordTree -> [Combo]
+findCombos digits tree =
+    let wordMap = makeWordMap digits tree
+        allMap = addDigitWords digits wordMap
+    in findRoutes allMap
+
+   where
+     makeWordMap :: B.ByteString -> WordTree -> WordMap
+     makeWordMap digits tree =
+         makeMap $ do
+           pos <- [0..(totalLength - 1)] -- starting position
+           len <- [1..totalLength - pos] -- length
+           let words = findWords (substring pos len digits) tree
+           return ((pos, len), words)
+
+     addDigitWords :: B.ByteString -> WordMap -> WordMap
+     addDigitWords digits wordMap =
+     {-
+
+     Adding the 'digit words'
+
+      - for each 'end of word' (including the left bank):
+
+        - find all the words that appear to the right - W
+        - note the shortest of the nearest of these words - n
+        - for each word w in W
+          - if the start of w comes before the end of n, then add a digit word to fill
+            in the interval up to w
+
+      - in doing the above, ensure that duplicate digit words are not added.
+      -}
+         let wordPos = findWordPositions wordMap
+             endPositions = 0 : findEndPositions wordPos
+         in updateMap wordMap $ nub $ do
+           pos <- endPositions
+           let rightWords = wordsToRightOf pos wordPos
+           let (n_pos,n_len) = head $ sort rightWords -- shortest of nearest words
+           (w_pos,w_len) <- rightWords
+           -- don't add 'digit word' if start position of current word
+           -- is outside the range covered by (n_pos,n_len):
+           guard (w_pos < n_pos + n_len)
+           let gapWidth = w_pos - pos
+           -- if we have a gap of zero, there is nothing to add:
+           guard (gapWidth > 0)
+           let digitWord = substring pos gapWidth digits
+           return ((pos, gapWidth), [digitWord])
+
+       where
+         wordsToRightOf pos wordPos = [ (p,len) | (p,len) <- wordPos, p >= pos ]
+                                      ++
+                                      -- need to make sure there a word which represents the
+                                      -- 'right bank', so that we can bridge a gap from last
+                                      -- word to the end of the string if needed.
+                                      [rightBank]
+         findEndPositions wordPos = nub [ pos + len | (pos,len) <- wordPos ]
+         rightBank = (totalLength, 1)
+
+     findRoutes :: WordMap -> [Combo]
+     findRoutes wordMap = findRoutes' (findWordEntries wordMap) 0 totalLength
+
+     findRoutes' :: WordList -> Int -> Int -> [Combo]
+     findRoutes' words start end =
+         do
+           ((p,len),ws) <- filter (\((p,_),_) -> p == start) words
+           w <- ws
+           if p + len == end
+              then return [w]
+              else do
+                c <- findRoutes' words (start + len) end
+                guard (hasWord w || (hasWord $ head c))
+                return (w:c)
+
+     --
+     -- Shared definitions between different parts of algorithm
+     --
+
+     totalLength = B.length digits
+
+     -- The following functions, (makeMap, updateMap) may have trivial
+     -- implementations, but they allow us to change the datastructure WordMap
+     -- with minimal changes to the rest.
+     makeMap :: WordList -> WordMap
+     makeMap = id
+
+     updateMap :: WordMap -> WordList -> WordMap
+     updateMap wordMap additional = wordMap ++ additional
+
+     -- get a list of all (position,length) where there are actually words
+     findWordPositions :: WordMap -> [(Int,Int)]
+     findWordPositions wordMap = nub [ x | (x, words) <- wordMap, not $ null words ]
+
+     -- get the entries where there are actually words
+     findWordEntries :: WordMap -> WordList
+     findWordEntries wordMap = filter (not . null . snd) wordMap
 
 
 --
 
 countWords t = length (nodeWords t) + (sum $ map countWords (nodeSubForest t))
 
-
 main = do
   ws <- getWords
   let wordTree = buildWordTree ws
   putStrLn ""
 
   mainLoop wordTree
-
-
--- TODO - fix 07956121485 - need to cache parts like '56121' to avoid boring outputs