Commits

Konstantine Rybnikov committed a3aefb6

ok, now seems to work fine

  • Participants
  • Parent commits d21ab3e

Comments (0)

Files changed (4)

File Duplicates/Duplicates.hs

   ) where
 
 import System.IO         ( IOMode(ReadMode), openFile, hFileSize )
-import System.FilePath   ( splitDirectories )
+import System.FilePath   ( splitDirectories, joinPath )
 import Data.Data         ( Data, Typeable )
-import Data.List         (intercalate)
+import Data.List         ( intercalate, inits )
 import Data.ByteString   ( hGetContents )
 import Data.Digest.CRC32 ( crc32 )
 import Data.Word         ( Word32 )
 import Data.IxSet        ( Indexable, IxSet, ixSet, ixGen, ixFun,
                            Proxy(Proxy), empty, insert,
-                           (@=), (|||), getOne, toList, size )
+                           (@=), (&&&), getOne, toList, fromList, size )
 import Duplicates.Utils  ( allPossibleFiles )
 
 newtype NodePath     = NodePath { unNodePath :: FilePath }
 getNodeFolders :: NodeInfo -> [NodeFolder]
 getNodeFolders info =
   let path = unNodePath $ getPath info
-  in map NodeFolder $ splitDirectories path
+  in map NodeFolder $ map joinPath $ drop 1 $ inits $ splitDirectories path
 
 instance Indexable NodeInfo where
   empty = ixSet
 indexPath path = do
   files <- allPossibleFiles path
   nodes <- getInfos files
-  let entries = foldr insert empty nodes
+  let entries = fromList nodes
   return entries
 
 getDuplicatesCount :: NodesIndex -> FilePath -> Integer
 getDuplicatesCount index folder = do
   let infos = getInfosInsideFolder index folder
-      counts = map (getDuplicatesCountByInfo index) infos
-    in sum counts
+      counts = filter (>0) $ map (getDuplicatesCountByInfo index) infos
+    in toInteger $ length counts
 
 getInfosInsideFolder :: NodesIndex -> FilePath -> [NodeInfo]
 getInfosInsideFolder index path =
   toList elems
   where elems = index @= (NodeFolder path)
 
--- getDuplicatesCountForFile :: NodesIndex -> FilePath -> Integer
--- getDuplicatesCountForFile index path =
---   let infos = index @= (NodePath path)
---   in case getOne infos of
---     Nothing -> error $ "Path " ++ path ++ " could not be found in index."
---     Just info -> getDuplicatesCountByInfo index info
-
 getDuplicatesCountByInfo :: NodesIndex -> NodeInfo -> Integer
 getDuplicatesCountByInfo index info =
-  toInteger $ size similarItems
-  -- TODO: check if there's better way to do "OR" query
-  where similarItems = index @= (getChecksum info)
-                       ||| index @= (getSize info)
+  toInteger $ (size similarItems) - 1
+  where similarItems = (index @= (getChecksum info))
+                       &&& (index @= (getSize info))
 
 printIndex :: NodesIndex -> IO ()
 printIndex index = do

File duplicates.hs

           printIndex index
           dirs' <- allPossibleDirs canonicalized
           let dirs = canonicalized : dirs'
+          putStrLn "Dirs:"
+          putStrLn $ intercalate "\n" dirs
           let dirCounts = map (getDuplicatesCount index) dirs
           let dirsWithCounts = zip dirs dirCounts
           putStrLn "Number of duplicate files:"

File testdata/2/john_1.txt

-john

File tests/Test.hs

+{-# LANGUAGE DeriveDataTypeable #-}
+
 import Test.HUnit
-import Data.IxSet ( insert, empty )
+import Data.IxSet
 import Duplicates.Duplicates
+import Data.List.Split ( splitOn )
+import Data.Data ( Data, Typeable )
 
 test1 = 
-  let items = [ NodeInfo { getPath=NodePath "/foo/bar.txt"
-                         , getSize=NodeSize 10
-                         , getChecksum=NodeChecksum 22}
-              , NodeInfo { getPath=NodePath "/foo/baz.txt"
-                         , getSize=NodeSize 10
-                         , getChecksum=NodeChecksum 22 } ]
-      index = foldr insert empty items
-      info = NodeInfo { getPath=NodePath "/foo/bar.txt"
-                      , getSize=NodeSize 10
-                      , getChecksum=NodeChecksum 22 }
-  in TestCase (
+  TestCase (
     assertEqual "simple"
     1
     (getDuplicatesCountByInfo index info))
+  where items = [ NodeInfo { getPath=NodePath "/foo/bar.txt"
+                           , getSize=NodeSize 10
+                           , getChecksum=NodeChecksum 22}
+                , NodeInfo { getPath=NodePath "/foo/baz.txt"
+                           , getSize=NodeSize 10
+                           , getChecksum=NodeChecksum 22 } ]
+        index = fromList items
+        info = NodeInfo { getPath=NodePath "/foo/bar.txt"
+                        , getSize=NodeSize 10
+                        , getChecksum=NodeChecksum 22 }
+
+
+test2 =
+  TestCase (
+    assertEqual "simple"
+    2
+    (getDuplicatesCount index dir))
+  where items = [ NodeInfo { getPath=NodePath "/foo/bar.txt"
+                           , getSize=NodeSize 10
+                           , getChecksum=NodeChecksum 22}
+                , NodeInfo { getPath=NodePath "/foo/baz.txt"
+                           , getSize=NodeSize 10
+                           , getChecksum=NodeChecksum 22 } ]
+        index = fromList items
+        dir = "/foo"
+
+data TaggedItem = TaggedItem { getTags :: String }
+                deriving ( Show, Ord, Eq, Data, Typeable )
+
+data Tag = Tag String
+           deriving ( Show, Ord, Eq, Data, Typeable )
+
+getTagStrings :: TaggedItem -> [Tag]
+getTagStrings = map Tag . splitOn "," . getTags
+
+instance Indexable TaggedItem where
+  empty = ixSet
+            [ ixFun getTagStrings ]
+
+test3 = TestCase (
+  assertEqual "ixFun multiple tags test"
+  3
+  (size (index @= (Tag "tag"))) )
+  where items = [ TaggedItem "tag,tag1,tag2"
+                , TaggedItem "tag,tag3,tag4"
+                , TaggedItem "tag,tag5,tag6" ]
+        index = fromList items
+
+test4 =
+  TestCase (
+    assertEqual "getDuplicatesCountByInfo to return 0"
+    0
+    (getDuplicatesCountByInfo index info))
+  where items = [ NodeInfo { getPath=NodePath "/foo/bar.txt"
+                           , getSize=NodeSize 10
+                           , getChecksum=NodeChecksum 22}
+                , NodeInfo { getPath=NodePath "/foo/baz.txt"
+                           , getSize=NodeSize 11
+                           , getChecksum=NodeChecksum 22 } ]
+        index = fromList items
+        info = NodeInfo { getPath=NodePath "/foo/bar.txt"
+                        , getSize=NodeSize 10
+                        , getChecksum=NodeChecksum 22 }
+        dir = "/foo"
+
+test5 =
+  TestCase (
+    assertEqual "different files 0 duplicates"
+    0
+    (getDuplicatesCount index dir))
+  where items = [ NodeInfo { getPath=NodePath "/foo/bar.txt"
+                           , getSize=NodeSize 10
+                           , getChecksum=NodeChecksum 22}
+                , NodeInfo { getPath=NodePath "/foo/baz.txt"
+                           , getSize=NodeSize 11
+                           , getChecksum=NodeChecksum 22 } ]
+        index = fromList items
+        dir = "/foo"
 
 tests = TestList [
-  TestLabel "test1" test1]
+  TestLabel "test1" test1,
+  TestLabel "test2" test2,
+  TestLabel "test3" test3,
+  TestLabel "test4" test4,
+  TestLabel "test5" test5]
 
 main = do
   runTestTT tests