Commits

Stefan Saasen  committed 42d9bb5

Split Git module into Git.Store and Git.Pack and rename the object type to be pack specific

  • Participants
  • Parent commits 72e0810

Comments (0)

Files changed (12)

 
 executable  hgit
   main-is:          Main.hs
-  other-modules:    Git.TcpClient, Git.ObjectStore, Git.Packfile, Git.PackProtocol, Git.Common, Git.Remote, Git.Delta
   build-depends:
                     base < 5 && >= 3,
                     filepath >= 1.3,

File src/Git/Common.hs

   , isMsbSet
   , eitherToMaybe
   , GitRepository(..)
-  , ObjectType(..)
 ) where
 
 import Text.Printf      (printf)
 import Numeric          (showHex)
 import Data.Bits        (Bits, (.&.))
-import Data.Word        (Word8)
-
-data ObjectType = Commit | -- 1
-        Tree |  -- 2
-        Blob | -- 3
-        Tag | -- 4
-        OfsDelta Int | -- 6 -- offset is interpreted as a negative offset from the type-byte of the header of the ofs-delta entry 
-        RefDelta [Word8] deriving (Eq, Show, Ord) -- 7
 
 data GitRepository = GitRepository {
     getName         :: String

File src/Git/Delta.hs

-{-# LANGUAGE  DoAndIfThenElse #-}
-
-
--- | Implementation of the delta encoding algorithm used in git. 
--- The delta encoding used is a `copy/insert` based encoding algorithm inspired
--- by the xdelta\/libxdiff (see <http://www.xmailserver.org/xdiff-lib.html> and
--- <http://xdelta.org/>) algorithms.
---
--- The current implementation is based on the
--- <https://github.com/git/git/blob/v1.8.1/patch-delta.c> implementation from
--- the git source.
--- 
--- The delta implementation can be tested using the @test-delta@ binary from
--- the git source. Create the binary in the git source using:
---
--- @
---  make configure
---  ./configure
---  make test-delta
--- @
--- 
--- To generate a test delta file run:
---
--- @
---  ./test-delta -d test-delta-old.c test-delta-new.c out.delta
--- @
---
--- The delta file @out.delta@ can be used to restore the content of the
--- @test-delta-new.c@ file based on the source file @test-delta-old.c@.
-module Git.Delta (
-    patch
-) where
-
-import qualified Data.ByteString as B
-import Data.Binary.Strict.Get
-import Control.Monad                    (liftM, foldM)
-import Data.Bits                        (Bits, (.&.), (.|.), shiftL)
-import Git.Common                       (isMsbSet)
-import Data.Word
-
--- | The DeltaHeader contains the length of the source, the length off the
--- target and the offset of the actual delta payload in the delta buffer.
-data DeltaHeader = DeltaHeader {
-    sourceLength :: Int
-  , targetLength :: Int
-  , getOffset    :: Int
-} deriving (Show)
-
-{-
-runhaskell Git/Delta.hs ../test-delta.c ../out.delta
-diff target.file ../test-delta-new.c
--}
-{-
-main :: IO ()
-main = do
-    (sourceFile:deltaFile:_) <- getArgs
-    source <- B.readFile sourceFile
-    delta <- B.readFile deltaFile
-    print $ B.length source
-    either putStrLn (B.writeFile "target.file") $ patch source delta
--}
-
--- | Patch the base with the given delta to produce a new target version
--- This will return either the patched target or a Left with an appropriate
--- error message.
---
--- @
---     source <- B.readFile sourceFile
---     delta <- B.readFile deltaFile
---     either putStrLn (B.writeFile \"target.file\") $ patch source delta
--- @
-patch :: B.ByteString -> B.ByteString -> Either String B.ByteString
-patch base delta = do
-        header <- decodeDeltaHeader delta
-        if B.length base == sourceLength header then
-            fst $ runGet (run (getOffset header) base delta) delta
-        else Left "Source length check failed"
-
-
--- | Parse the delta file and transform the source into the target ByteString
-run :: Int -> B.ByteString -> B.ByteString -> Get B.ByteString
-run offset source delta = do
-    skip offset
-    cmd <- getWord8
-    runCommand cmd B.empty source delta
-
--- =================================================================================
-
--- | Return the delta header information consisting of the size of the source
--- buffer, the size of the target buffer and the delta header length (offset
--- from the start).
-decodeDeltaHeader :: Monad m => B.ByteString -> m DeltaHeader
-decodeDeltaHeader delta = do
-    let res1 = runGet (decodeSize 0) delta
-        (sourceBufferSize, offset) = either (const (0,0)) id $ fst res1
-        res2 = runGet (decodeSize offset) delta
-        (targetBufferSize, offset') = either (const (0,0)) id $ fst res2
-    return (DeltaHeader sourceBufferSize targetBufferSize offset')
-    where   decodeSize offset = do
-                skip offset
-                byte <- getWord8
-                let rv = fromIntegral $ byte .&. 0x7f
-                    shift = 7
-                next rv shift byte $ succ offset
-            next rv shift byte count | isMsbSet byte = do
-                 b <- getWord8
-                 let rv2 = rv .|. ((fromIntegral b .&. 127) `shiftL` shift)
-                 next rv2 (shift + 7) b $ succ count
-            next rv _ _ count                        = return (rv, count)
-
--- =================================================================================
-
-
--- | Execute the @copy/insert@ instructions defined in the delta buffer to
--- restore the target buffer
-runCommand :: Word8 -> B.ByteString -> B.ByteString -> t -> Get B.ByteString
-runCommand cmd acc source delta = do
-    toAdd <- if isMsbSet cmd then
-        copyCommand cmd source
-    else
-        insertCommand cmd
-    finished <- isEmpty
-    let acc' = B.append acc toAdd
-    if finished then return acc'
-       else do
-        cmd' <- getWord8
-        runCommand cmd' acc' source delta
-
--- | Read @n@ bytes from the delta and insert them into the target buffer
-insertCommand :: Integral a => a -> Get B.ByteString
-insertCommand = getByteString . fromIntegral
-
--- | Copy from the source into the target buffer
-copyCommand :: Word8 -> B.ByteString -> Get B.ByteString
-copyCommand cmd source = do
-        -- off -> offset in the source buffer where the copy will start
-        offset <- foldM f 0 $ zip [0x01, 0x02, 0x04, 0x08] [0,8..]
-        -- bytes to copy
-        size   <- foldM f 0 $ zip [0x10, 0x20, 0x40] [0,8..]
-        let size3 = if coerce size == 0 then 0x10000 else size
-        -- FIXME add guard condition from `patch-delta.c`: if (unsigned_add_overflows(cp_off, cp_size) || ...
-        return $ B.take (coerce size3) $ B.drop (coerce offset) source
-    where calculateVal off shift = if shift /= 0 then (\x -> off .|. (x `shiftL` shift)::Int) . fromIntegral else fromIntegral
-          f off (x, shift)       = if cmd .&. x /= 0 then liftM (calculateVal off shift) getWord8 else return off
-          coerce                 = toEnum . fromEnum

File src/Git/ObjectStore.hs

-{-# LANGUAGE OverloadedStrings, RecordWildCards, DoAndIfThenElse #-}
-
-module Git.ObjectStore (
-    createEmptyGitRepository
-  , encodeObject
-  , pathForObject
-  , pathForPack
-  , createGitRepositoryFromPackfile
-  , updateHead
-) where
-
-import qualified Data.ByteString.Char8 as BC
-import qualified Data.ByteString as B
-import qualified Data.ByteString.Lazy as L
-import qualified Codec.Compression.Zlib as Z
-import qualified Crypto.Hash.SHA1 as SHA1
-import qualified Data.Attoparsec.ByteString.Char8 as AC
-import Control.Applicative ((<|>))
--- FIXME -> don't use isJust/fromJust
-import Data.Maybe                                           (isJust, fromJust)
-import Text.Printf                                          (printf)
-import Git.Packfile
-import Git.Delta                                            (patch)
-import Git.Common                                           (GitRepository(..), ObjectType(..), eitherToMaybe)
-import System.FilePath
-import System.Directory
-import Control.Monad                                        (unless, liftM)
-
-type ObjectId = String
-
-data Object = ResolvedObject {
-    getObjectType   :: ObjectType
-  , getContent      :: B.ByteString
-  , getSize         :: Int
-  , sha1            :: String
-} | UnresolvedObject {
-    getObjectType   :: ObjectType
-  , deltaData       :: B.ByteString
-  , getSize         :: Int
-}deriving (Show, Eq, Ord)
-
-createGitRepositoryFromPackfile :: GitRepository -> FilePath -> IO ()
-createGitRepositoryFromPackfile target packFile = do
-    pack <- packRead packFile
-    let repoName = getName target
-        repo = GitRepository repoName (repoName </> ".git")
-    unpackPackfile repo pack
-    updateHead repo pack
-
--- TODO properly handle the error condition here
-unpackPackfile :: GitRepository -> Packfile -> IO ()
-unpackPackfile _ InvalidPackfile = error "Attempting to unpack an invalid packfile"
-unpackPackfile repo@GitRepository{..} (Packfile _ _ objs) = do
-        let encodedObjects = encodeObjects objs
-        unresolvedObjects <- writeObjects encodedObjects
-        _ <- writeDeltas repo unresolvedObjects
-        putStrLn "Done"
-    where   writeObjects (r@(ResolvedObject objType content size sha1):xs) = do
-                let (path, name) = pathForObject getName sha1
-                _ <- writeObject repo r
-                writeObjects xs
-            writeObjects (x@(UnresolvedObject{}):xs) = liftM (x:) (writeObjects xs)
-            writeObjects []     = return []
-            encodeObjects       = map encodeObject
-
-writeDeltas :: GitRepository -> [Object] -> IO ()
-writeDeltas repo (x:xs) = do
-    f <- writeDelta repo x
-    writeDeltas repo xs
-writeDeltas _ [] = return ()
-
-writeDelta :: GitRepository -> Object -> IO (Maybe FilePath)
-writeDelta repo (UnresolvedObject ty@(RefDelta baseObject) content size) = do
-        base <- case toObjectId ty of
-            Just sha -> readObject repo sha
-            _        -> return Nothing
-        if isJust base then
-            case patch (getContent $ fromJust base) content of
-                Right target -> do
-                                let base'        = fromJust base
-                                    (path, name) = pathForObject (getName repo) (sha1 base')
-                                    filename     = path </> name
-                                    header       = headerForBlob (objectTypeToString $ getObjectType base') target
-                                    blob         = header `BC.append` target
-                                    obj          = ResolvedObject (getObjectType base') blob (BC.length target) $ hsh blob
-                                _ <- writeObject repo obj
-                                return $ Just filename
-                Left msg     -> return Nothing
-        else return Nothing -- FIXME - base object doesn't exist yet
-
-
-updateHead :: GitRepository -> Packfile -> IO ()
-updateHead repo (Packfile _ _ objs) = do
-    let commits = filter isCommit objs
-    unless (null commits) $
-        let commit = head commits
-            ref = "refs/heads/master"
-            in
-            do
-                let obj = encodeObject commit
-                createRef repo ref (sha1 obj)
-                createSymRef repo "HEAD" ref
-    where isCommit ob = objectType ob == Commit
-
--- ref: refs/heads/master
-createSymRef :: GitRepository -> String -> String -> IO ()
-createSymRef GitRepository{..} symName ref =
-        writeFile (getGitDirectory </> symName) $ "ref: " ++ ref ++ "\n"
-
-
-createRef :: GitRepository -> String -> String -> IO ()
-createRef GitRepository{..} ref sha = do
-    let (path, name) = splitFileName ref
-        dir          = getGitDirectory </> path
-    _ <- createDirectoryIfMissing True dir
-    writeFile (dir </> name) (sha ++ "\n")
-
-pathForPack :: GitRepository -> FilePath
-pathForPack GitRepository{..} = getGitDirectory </> ".git" </> "objects" </> "pack"
-
-pathForObject :: String -> String -> (FilePath, String)
-pathForObject repoName sha | length sha == 40 = (repoName </> ".git" </> "objects" </> pre, rest)
-    where pre  = take 2 sha
-          rest = drop 2 sha
-pathForObject _ _ = ("", "")
-
-type Repository = String
-
--- header: "type size\0"
--- sha1 $ header ++ content
-readObject :: GitRepository -> ObjectId -> IO (Maybe Object)
-readObject GitRepository{..} sha = do
-    let (path, name) = pathForObject getName sha
-        filename     = path </> name
-    exists <- doesFileExist filename
-    if exists then do
-        bs <- BC.readFile filename
-        return $ parseBlob $ inflate bs
-    else return Nothing
-    where parseBlob blob = eitherToMaybe $ AC.parseOnly (blobParser sha) blob
-          inflate blob = B.concat $ L.toChunks $ Z.decompress $ L.fromChunks [blob]
-
--- header: "type size\0"
--- sha1 $ header ++ content
-blobParser :: ObjectId -> AC.Parser Object
-blobParser sha1 = do
-   objType <- AC.string "commit" <|> AC.string "tree" <|> AC.string "blob" <|> AC.string "tag"
-   AC.char ' '
-   size <- AC.takeWhile AC.isDigit
-   AC.char '\0'
-   blob <- AC.takeByteString
-   return $ ResolvedObject (obj objType) blob (read $ BC.unpack size) sha1
-   where obj "commit"   = Commit
-         obj "tree"     = Tree
-         obj "blob"     = Blob
-         obj "tag"      = Tag
-
-
-
--- header: "type size\0"
--- sha1 $ header ++ content
-writeObject :: GitRepository -> Object -> IO ()
-writeObject _ (UnresolvedObject {}) = error "Can't write an UnresolvedObject"
-writeObject GitRepository{..} obj = do
-    let (path, name) = pathForObject getName $ sha1 obj
-        filename     = path </> name
-    _ <- createDirectoryIfMissing True path
-    L.writeFile filename $ compress (getContent obj)
-    where compress data' = Z.compress $ L.fromChunks [data'] -- FIXME should data be lazy in the first place?
-
-hsh :: B.ByteString -> String
-hsh = toHex . SHA1.hash
-
-objectTypeToString :: ObjectType -> B.ByteString
-objectTypeToString Commit = "commit"
-objectTypeToString Tree   = "tree"
-objectTypeToString Blob   = "blob"
-objectTypeToString Tag    = "tag"
-
-encodeObject :: PackfileObject -> Object
-encodeObject obj@(PackfileObject ot@(RefDelta _) size content) = UnresolvedObject ot content size
-encodeObject obj@(PackfileObject ot size _) =
-        ResolvedObject ot blob size (hsh blob)
-    where header obj' =
-            let blobType = objType obj'
-            in  headerForBlob blobType $ objectData obj'
-          blob                                   = header obj `BC.append` objectData obj
-          objType (PackfileObject Commit _ _)    = "commit"
-          objType (PackfileObject Blob _ _)      = "blob"
-          objType (PackfileObject Tree _ _)      = "tree"
-          objType (PackfileObject Tag _ _)       = "tag"
-
-headerForBlob :: B.ByteString -> B.ByteString -> B.ByteString
-headerForBlob objType content = objType `BC.append` " " `BC.append` BC.pack (show $ BC.length content) `BC.append` "\0"
-
-createEmptyGitRepository :: FilePath -> IO ()
-createEmptyGitRepository gitDir =
-        mapM_ (\dir -> createDirectoryIfMissing True (gitDir </> dir)) topLevelDirectories
-        where topLevelDirectories = ["objects", "refs", "hooks", "info"]
-
-toObjectId :: ObjectType -> Maybe ObjectId
-toObjectId (RefDelta base) = Just $ toHex $ B.pack base
-toObjectId _               = Nothing
-
-toHex :: BC.ByteString -> String
-toHex bytes = BC.unpack bytes >>= printf "%02x"

File src/Git/Pack/Delta.hs

+{-# LANGUAGE  DoAndIfThenElse #-}
+
+
+-- | Implementation of the delta encoding algorithm used in git. 
+-- The delta encoding used is a `copy/insert` based encoding algorithm inspired
+-- by the xdelta\/libxdiff (see <http://www.xmailserver.org/xdiff-lib.html> and
+-- <http://xdelta.org/>) algorithms.
+--
+-- The current implementation is based on the
+-- <https://github.com/git/git/blob/v1.8.1/patch-delta.c> implementation from
+-- the git source.
+-- 
+-- The delta implementation can be tested using the @test-delta@ binary from
+-- the git source. Create the binary in the git source using:
+--
+-- @
+--  make configure
+--  ./configure
+--  make test-delta
+-- @
+-- 
+-- To generate a test delta file run:
+--
+-- @
+--  ./test-delta -d test-delta-old.c test-delta-new.c out.delta
+-- @
+--
+-- The delta file @out.delta@ can be used to restore the content of the
+-- @test-delta-new.c@ file based on the source file @test-delta-old.c@.
+module Git.Pack.Delta (
+    patch
+) where
+
+import qualified Data.ByteString as B
+import Data.Binary.Strict.Get
+import Control.Monad                    (liftM, foldM)
+import Data.Bits                        (Bits, (.&.), (.|.), shiftL)
+import Git.Common                       (isMsbSet)
+import Data.Word
+
+-- | The DeltaHeader contains the length of the source, the length off the
+-- target and the offset of the actual delta payload in the delta buffer.
+data DeltaHeader = DeltaHeader {
+    sourceLength :: Int
+  , targetLength :: Int
+  , getOffset    :: Int
+} deriving (Show)
+
+{-
+runhaskell Git/Delta.hs ../test-delta.c ../out.delta
+diff target.file ../test-delta-new.c
+-}
+{-
+main :: IO ()
+main = do
+    (sourceFile:deltaFile:_) <- getArgs
+    source <- B.readFile sourceFile
+    delta <- B.readFile deltaFile
+    print $ B.length source
+    either putStrLn (B.writeFile "target.file") $ patch source delta
+-}
+
+-- | Patch the base with the given delta to produce a new target version
+-- This will return either the patched target or a Left with an appropriate
+-- error message.
+--
+-- @
+--     source <- B.readFile sourceFile
+--     delta <- B.readFile deltaFile
+--     either putStrLn (B.writeFile \"target.file\") $ patch source delta
+-- @
+patch :: B.ByteString -> B.ByteString -> Either String B.ByteString
+patch base delta = do
+        header <- decodeDeltaHeader delta
+        if B.length base == sourceLength header then
+            fst $ runGet (run (getOffset header) base delta) delta
+        else Left "Source length check failed"
+
+
+-- | Parse the delta file and transform the source into the target ByteString
+run :: Int -> B.ByteString -> B.ByteString -> Get B.ByteString
+run offset source delta = do
+    skip offset
+    cmd <- getWord8
+    runCommand cmd B.empty source delta
+
+-- =================================================================================
+
+-- | Return the delta header information consisting of the size of the source
+-- buffer, the size of the target buffer and the delta header length (offset
+-- from the start).
+decodeDeltaHeader :: Monad m => B.ByteString -> m DeltaHeader
+decodeDeltaHeader delta = do
+    let res1 = runGet (decodeSize 0) delta
+        (sourceBufferSize, offset) = either (const (0,0)) id $ fst res1
+        res2 = runGet (decodeSize offset) delta
+        (targetBufferSize, offset') = either (const (0,0)) id $ fst res2
+    return (DeltaHeader sourceBufferSize targetBufferSize offset')
+    where   decodeSize offset = do
+                skip offset
+                byte <- getWord8
+                let rv = fromIntegral $ byte .&. 0x7f
+                    shift = 7
+                next rv shift byte $ succ offset
+            next rv shift byte count | isMsbSet byte = do
+                 b <- getWord8
+                 let rv2 = rv .|. ((fromIntegral b .&. 127) `shiftL` shift)
+                 next rv2 (shift + 7) b $ succ count
+            next rv _ _ count                        = return (rv, count)
+
+-- =================================================================================
+
+
+-- | Execute the @copy/insert@ instructions defined in the delta buffer to
+-- restore the target buffer
+runCommand :: Word8 -> B.ByteString -> B.ByteString -> t -> Get B.ByteString
+runCommand cmd acc source delta = do
+    toAdd <- if isMsbSet cmd then
+        copyCommand cmd source
+    else
+        insertCommand cmd
+    finished <- isEmpty
+    let acc' = B.append acc toAdd
+    if finished then return acc'
+       else do
+        cmd' <- getWord8
+        runCommand cmd' acc' source delta
+
+-- | Read @n@ bytes from the delta and insert them into the target buffer
+insertCommand :: Integral a => a -> Get B.ByteString
+insertCommand = getByteString . fromIntegral
+
+-- | Copy from the source into the target buffer
+copyCommand :: Word8 -> B.ByteString -> Get B.ByteString
+copyCommand cmd source = do
+        -- off -> offset in the source buffer where the copy will start
+        offset <- foldM f 0 $ zip [0x01, 0x02, 0x04, 0x08] [0,8..]
+        -- bytes to copy
+        size   <- foldM f 0 $ zip [0x10, 0x20, 0x40] [0,8..]
+        let size3 = if coerce size == 0 then 0x10000 else size
+        -- FIXME add guard condition from `patch-delta.c`: if (unsigned_add_overflows(cp_off, cp_size) || ...
+        return $ B.take (coerce size3) $ B.drop (coerce offset) source
+    where calculateVal off shift = if shift /= 0 then (\x -> off .|. (x `shiftL` shift)::Int) . fromIntegral else fromIntegral
+          f off (x, shift)       = if cmd .&. x /= 0 then liftM (calculateVal off shift) getWord8 else return off
+          coerce                 = toEnum . fromEnum

File src/Git/Pack/Packfile.hs

+{-# LANGUAGE OverloadedStrings, DoAndIfThenElse #-}
+
+module Git.Pack.Packfile (
+    packRead
+  , Packfile(..)
+  , PackfileObject(..)
+  , PackObjectType(..)
+) where
+
+
+import Control.Applicative
+import Data.ByteString (ByteString)
+import qualified Data.Iteratee as I
+import Data.Iteratee.Binary
+import Data.Iteratee.ZLib
+import Control.Monad                    (replicateM)
+import Data.Maybe
+import Data.Char                        (ord)
+import Data.Word                        (Word8,Word32)
+import Data.Bits
+import Git.Common                       (isMsbSet)
+
+type Content = ByteString
+
+-- | The pack file header that usually represents the first 12 bytes of a pack
+-- file. The first 4 bytes that contain the magic byte for the pack file
+-- ('PACK') are not included, the version is contained in the next 4 bytes and
+-- the number of objects in the last 4 bytes.
+data Packfile = Packfile {
+      version       :: Word32
+    , numObjects    :: Word32
+    , objects       :: [PackfileObject]
+    } | InvalidPackfile deriving (Show)
+
+data PackfileObject = PackfileObject {
+     objectType     :: PackObjectType
+   , size           :: Int
+   , objectData     :: Content
+    } deriving (Show)
+
+-- From cache.h
+data PackObjectType =   OBJ_BAD | -- -1
+                        OBJ_NONE | -- 0
+                        OBJ_COMMIT | -- 1
+                        OBJ_TREE |  -- 2
+                        OBJ_BLOB | -- 3
+                        OBJ_TAG | -- 4
+                        OBJ_OFS_DELTA Int | -- 6 -- offset is interpreted as a negative offset from the type-byte of the header of the ofs-delta entry 
+                        OBJ_REF_DELTA [Word8] | 
+                        OBJ_ANY |
+                        OBJ_MAX deriving (Eq, Show, Ord) -- 7
+
+-- | Parse the given pack file into a "Packfile" representation
+packRead :: FilePath -> IO Packfile
+packRead = I.fileDriverRandom parsePackFileObjectHeader
+
+-- ============================================================================== --
+
+parsePackFileObjectHeader :: I.Iteratee ByteString IO Packfile
+parsePackFileObjectHeader = do
+    magic       <- endianRead4 MSB -- 4 bytes
+    version     <- endianRead4 MSB
+    numObjects  <- endianRead4 MSB
+    if packMagic == magic
+                then parseObjects version numObjects
+                else return InvalidPackfile
+  where packMagic = fromOctets $ map (fromIntegral . ord) "PACK"
+
+parseObjects :: Word32 -> Word32 -> I.Iteratee ByteString IO Packfile
+parseObjects version num = do
+    objs <- catMaybes <$> replicateM (fromIntegral num) parsePackObject
+    return $ Packfile version num objs
+
+
+parsePackObject :: I.Iteratee ByteString IO (Maybe PackfileObject)
+parsePackObject = do
+    byte <- I.head -- read 1 byte
+    let objectType  = byte `shiftR` 4 .&. 7 -- shift right and masking the 4th least significtan bit
+        initial     = fromIntegral $ byte .&. 15
+    size <- if isMsbSet byte then parseObjectSize initial 0 else return initial
+    obj <- toPackObjectType objectType
+    content <- I.joinI $ enumInflate Zlib defaultDecompressParams I.stream2stream
+    return $ (\t -> PackfileObject t size content) <$> obj
+
+-- Parse the variable length size header part of the object entry
+parseObjectSize :: Int -> Int -> I.Iteratee ByteString IO Int
+parseObjectSize size iter = do
+    nextByte <- I.head
+    let add           = (coerce (nextByte .&. 127) :: Int) `shiftL` (4 + (iter * 7))
+        acc           = size + fromIntegral add
+    if isMsbSet nextByte then
+        parseObjectSize acc (iter + 1)
+    else
+        return acc
+    where coerce = toEnum . fromEnum
+
+
+-- =================================================================================
+
+
+fromOctets :: [Word8] -> Word32
+fromOctets = foldl accum 0
+  where
+    accum a o = (a `shiftL` 8) .|. fromIntegral o
+
+-- Map the internal representation of the object type to the PackObjectType
+toPackObjectType :: (Show a, Integral a) => a -> I.Iteratee ByteString IO (Maybe PackObjectType)
+toPackObjectType 1  = return $ Just OBJ_COMMIT
+toPackObjectType 2  = return $ Just OBJ_TREE
+toPackObjectType 3  = return $ Just OBJ_BLOB
+toPackObjectType 4  = return $ Just OBJ_TAG
+toPackObjectType 6  = do
+    offset <- readOffset 0 0
+    return $ Just (OBJ_OFS_DELTA offset)
+toPackObjectType 7  = do 
+    baseObj <- replicateM 20 I.head -- 20-byte base object name SHA1
+    return $ Just (OBJ_REF_DELTA baseObj)
+toPackObjectType _  = return Nothing
+
+
+-- offset encoding:
+--  n bytes with MSB set in all but the last one.
+--  The offset is then the number constructed by
+--  concatenating the lower 7 bit of each byte, and
+--  for n >= 2 adding 2^7 + 2^14 + ... + 2^(7*(n-1))
+--  to the result.
+readOffset :: Int -> Int -> I.Iteratee ByteString IO Int
+readOffset shft acc = do
+    x <- I.head
+    let bs = acc + ((coerce (x .&. 0x7f) :: Int) `shiftL` shft)
+    if isMsbSet x
+        then readOffset (shft+7) (bs+1)
+        else return bs
+    where coerce = toEnum . fromEnum

File src/Git/Packfile.hs

-{-# LANGUAGE OverloadedStrings, DoAndIfThenElse #-}
-
-module Git.Packfile (
-    packRead
-  , Packfile(..)
-  , PackfileObject(..)
-) where
-
-
-import Control.Applicative
-import Data.ByteString (ByteString)
-import qualified Data.Iteratee as I
-import Data.Iteratee.Binary
-import Data.Iteratee.ZLib
-import Control.Monad                    (replicateM)
-import Data.Maybe
-import Data.Char                        (ord)
-import Data.Word                        (Word8,Word32)
-import Data.Bits
-import Git.Common                       (isMsbSet, ObjectType(..))
-
-type Content = ByteString
-
--- | The pack file header that usually represents the first 12 bytes of a pack
--- file. The first 4 bytes that contain the magic byte for the pack file
--- ('PACK') are not included, the version is contained in the next 4 bytes and
--- the number of objects in the last 4 bytes.
-data Packfile = Packfile {
-      version       :: Word32
-    , numObjects    :: Word32
-    , objects       :: [PackfileObject]
-    } | InvalidPackfile deriving (Show)
-
-data PackfileObject = PackfileObject {
-     objectType     :: ObjectType
-   , size           :: Int
-   , objectData     :: Content
-    } deriving (Show)
-
-parsePackFileObjectHeader :: I.Iteratee ByteString IO Packfile
-parsePackFileObjectHeader = do
-    magic       <- endianRead4 MSB -- 4 bytes
-    version     <- endianRead4 MSB
-    numObjects  <- endianRead4 MSB
-    if packMagic == magic
-                then parseObjects version numObjects
-                else return InvalidPackfile
-  where packMagic = fromOctets $ map (fromIntegral . ord) "PACK"
-
-parseObjects :: Word32 -> Word32 -> I.Iteratee ByteString IO Packfile
-parseObjects version num = do
-    objs <- catMaybes <$> replicateM (fromIntegral num) parsePackObject
-    return $ Packfile version num objs
-
-
-parsePackObject :: I.Iteratee ByteString IO (Maybe PackfileObject)
-parsePackObject = do
-    byte <- I.head -- read 1 byte
-    let objectType  = byte `shiftR` 4 .&. 7 -- shift right and masking the 4th least significtan bit
-        initial     = fromIntegral $ byte .&. 15
-    size <- if isMsbSet byte then parseObjectSize initial 0 else return initial
-    obj <- toObjectType objectType
-    content <- I.joinI $ enumInflate Zlib defaultDecompressParams I.stream2stream
-    return $ (\t -> PackfileObject t size content) <$> obj
-
--- Parse the variable length size header part of the object entry
-parseObjectSize :: Int -> Int -> I.Iteratee ByteString IO Int
-parseObjectSize size iter = do
-    nextByte <- I.head
-    let add           = (coerce (nextByte .&. 127) :: Int) `shiftL` (4 + (iter * 7))
-        acc           = size + fromIntegral add
-    if isMsbSet nextByte then
-        parseObjectSize acc (iter + 1)
-    else
-        return acc
-    where coerce = toEnum . fromEnum
-
-
-packRead :: FilePath -> IO Packfile
-packRead = I.fileDriverRandom parsePackFileObjectHeader
-
-
-
--- =================================================================================
-
-
-fromOctets :: [Word8] -> Word32
-fromOctets = foldl accum 0
-  where
-    accum a o = (a `shiftL` 8) .|. fromIntegral o
-
--- Map the internal representation of the object type to the ObjectType
-toObjectType :: (Show a, Integral a) => a -> I.Iteratee ByteString IO (Maybe ObjectType)
-toObjectType 1  = return $ Just Commit
-toObjectType 2  = return $ Just Tree
-toObjectType 3  = return $ Just Blob
-toObjectType 4  = return $ Just Tag
-toObjectType 6  = do
-    offset <- readOffset 0 0
-    return $ Just (OfsDelta offset)
-toObjectType 7  = do 
-    baseObj <- replicateM 20 I.head -- 20-byte base object name SHA1
-    return $ Just (RefDelta baseObj)
-toObjectType _  = return Nothing
-
-
--- offset encoding:
---  n bytes with MSB set in all but the last one.
---  The offset is then the number constructed by
---  concatenating the lower 7 bit of each byte, and
---  for n >= 2 adding 2^7 + 2^14 + ... + 2^(7*(n-1))
---  to the result.
-readOffset :: Int -> Int -> I.Iteratee ByteString IO Int
-readOffset shft acc = do
-    x <- I.head
-    let bs = acc + ((coerce (x .&. 0x7f) :: Int) `shiftL` shft)
-    if isMsbSet x
-        then readOffset (shft+7) (bs+1)
-        else return bs
-    where coerce = toEnum . fromEnum

File src/Git/Remote.hs

 import Network.Socket                           (withSocketsDo)
 import Data.Maybe
 import Data.List
+import Git.Common
 import Git.TcpClient
 import Git.PackProtocol
-import Git.Common
-import Git.ObjectStore
+import Git.Store.ObjectStore
 
 refDiscovery :: String -> String -> String
 refDiscovery host repo = pktLine $ "git-upload-pack /" ++ repo ++ "\0host="++host++"\0" -- ++ flushPkt -- Tell the server to disconnect

File src/Git/Store/Blob.hs

+{-# LANGUAGE OverloadedStrings #-}
+
+module Git.Store.Blob (
+    parseTree
+  , parseCommit
+  , parsePerson
+  , Commit(..)
+) where
+
+import Prelude hiding (take, takeWhile)
+import qualified Data.ByteString.Char8 as C
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as L
+import qualified Codec.Compression.Zlib as Z
+import qualified Crypto.Hash.SHA1 as SHA1
+import Data.Attoparsec.ByteString.Char8
+import Data.Attoparsec.Combinator
+import Control.Applicative ((<|>))
+-- FIXME -> don't use isJust/fromJust
+import Data.Maybe                                           (isJust, fromJust)
+import Text.Printf                                          (printf)
+import Git.Pack.Packfile
+import Git.Pack.Delta                                      (patch)
+import Git.Common                                           (eitherToMaybe)
+import System.FilePath
+import System.Directory
+import Control.Monad                                        (unless, liftM)
+
+{-
+data Person = Person {
+    getPersonName     :: B.ByteString
+  , getPersonEmail    :: B.ByteString
+} deriving (Show, Eq)
+-}
+
+
+type ObjectId = String
+{-
+data TreeNode = TreeNode {
+    obj  :: GitObject
+  , name :: String
+} deriving (Show, Eq)
+
+data GitObject = GBlob {
+    content :: B.ByteString
+} | GTree {
+  nodes   :: [TreeNode]
+} | GTag deriving (Show, Eq)
+-}
+
+data Author = Author B.ByteString B.ByteString deriving (Eq, Show)
+data Commiter = Commiter String String deriving (Eq, Show)
+
+data Tree = Tree {
+    getObjectId :: ObjectId
+} deriving (Eq, Show)
+
+data Commit = Commit {
+    getTree        :: B.ByteString
+  , getParent      :: B.ByteString
+  , getSha         :: B.ByteString
+  , getAuthor      :: Author
+  , getCommiter    :: Commiter
+  , getMessage     :: B.ByteString
+} deriving (Eq,Show)
+
+
+parseTree :: C.ByteString -> Maybe Tree
+parseTree input = Nothing -- eitherToMaybe $ parseOnly commitParser input
+
+parseCommit :: C.ByteString -> Maybe Commit
+parseCommit input = eitherToMaybe $ parseOnly commitParser input
+
+{-
+tree b5213cb334e855fb5c89edc99d54606377e15d70
+parent 3c1d7b88edaf2119aff47104de389867cad0f0fb
+author Stefan Saasen <stefan@saasen.me> 1361272292 +1100
+committer Stefan Saasen <stefan@saasen.me> 1361272292 +1100
+
+Remove git INSTALL instructions
+
+-}
+commitParser :: Parser Commit
+commitParser = do
+    tree <- "tree " .*> take 40
+    space
+    parent <- "parent " .*> take 40
+    space
+    author <- "author " .*> parsePerson
+    space
+    commiter <- "committer " .*> parsePerson
+    space
+    space
+    message <- takeByteString
+    return $ Commit tree parent B.empty (Author (getPersonName author) (getPersonEmail author)) (Commiter "" "") message
+
+parsePerson :: Parser Person
+parsePerson = do
+    name <- takeWhile (/= '<')
+    email <- "<" .*> takeWhile (/= '>') <*. ">"
+    date <- takeTill (== '\n')
+    return $ Person name email date
+
+data Person = Person {
+    getPersonName   :: B.ByteString
+  , getPersonEmail  :: B.ByteString
+  , getDate         :: B.ByteString -- FIXME
+} deriving (Eq, Show)

File src/Git/Store/ObjectStore.hs

+{-# LANGUAGE OverloadedStrings, RecordWildCards, DoAndIfThenElse #-}
+
+module Git.Store.ObjectStore (
+    createEmptyGitRepository
+  , encodeObject
+  , pathForObject
+  , pathForPack
+  , createGitRepositoryFromPackfile
+  , updateHead
+  --  PRIVATE
+  , checkoutHead
+  , readHead
+  , resolveTree
+) where
+
+import qualified Data.ByteString.Char8 as C
+import qualified Data.ByteString as B
+import qualified Data.ByteString.Lazy as L
+import qualified Codec.Compression.Zlib as Z
+import qualified Crypto.Hash.SHA1 as SHA1
+import qualified Data.Attoparsec.ByteString.Char8 as AC
+import Control.Applicative ((<|>))
+-- FIXME -> don't use isJust/fromJust
+import Data.Maybe                                           (isJust, fromJust)
+import Text.Printf                                          (printf)
+import Git.Pack.Packfile
+import Git.Pack.Delta                                       (patch)
+import Git.Common                                           (GitRepository(..), eitherToMaybe)
+-- Tree
+import Git.Store.Blob
+import System.FilePath
+import System.Directory
+import Control.Monad                                        (unless, liftM)
+import Data.Char                                            (isSpace)
+import Debug.Trace
+
+type ObjectId = String
+
+
+data Object = ResolvedObject {
+    getObjectType   :: PackObjectType
+  , getContent      :: B.ByteString
+  , getSize         :: Int
+  , sha1            :: String
+} | UnresolvedObject {
+    getObjectType   :: PackObjectType
+  , deltaData       :: B.ByteString
+  , getSize         :: Int
+}deriving (Show, Eq, Ord)
+
+createGitRepositoryFromPackfile :: GitRepository -> FilePath -> IO ()
+createGitRepositoryFromPackfile target packFile = do
+    pack <- packRead packFile
+    let repoName = getName target
+        repo = GitRepository repoName (repoName </> ".git")
+    unpackPackfile repo pack
+    updateHead repo pack
+
+-- | Updates files in the working tree to match the given <tree-ish>
+--
+--
+--
+checkoutHead :: GitRepository -> IO ()
+checkoutHead repo = error "repo"
+
+-- | Resolve a tree given a <tree-ish>
+-- Similar to `parse_tree_indirect` defined in tree.c
+resolveTree :: GitRepository -> ObjectId -> IO String
+resolveTree repo sha = do
+        obj <- readObject repo sha
+        return $ show obj
+
+
+readHead :: GitRepository -> IO ObjectId
+readHead GitRepository{..}  = do
+    ref <- C.readFile (getGitDirectory </> ".git" </> "HEAD")
+    -- TODO check if valid HEAD
+    let unwrappedRef = C.unpack $ strip $ head $ tail $ C.split ':' ref
+    obj <- C.readFile (getGitDirectory </> ".git" </> unwrappedRef)
+    return $ C.unpack $ strip obj
+  where strip = C.takeWhile (not . isSpace) . C.dropWhile isSpace
+
+
+-- TODO properly handle the error condition here
+unpackPackfile :: GitRepository -> Packfile -> IO ()
+unpackPackfile _ InvalidPackfile = error "Attempting to unpack an invalid packfile"
+unpackPackfile repo@GitRepository{..} (Packfile _ _ objs) = do
+        let encodedObjects = encodeObjects objs
+        unresolvedObjects <- writeObjects encodedObjects
+        _ <- writeDeltas repo unresolvedObjects
+        putStrLn "Done"
+    where   writeObjects (r@(ResolvedObject objType content size sha1):xs) = do
+                let (path, name) = pathForObject getName sha1
+                _ <- writeObject repo r
+                writeObjects xs
+            writeObjects (x@(UnresolvedObject{}):xs) = liftM (x:) (writeObjects xs)
+            writeObjects []     = return []
+            encodeObjects       = map encodeObject
+
+writeDeltas :: GitRepository -> [Object] -> IO ()
+writeDeltas repo (x:xs) = do
+    f <- writeDelta repo x
+    writeDeltas repo xs
+writeDeltas _ [] = return ()
+
+writeDelta :: GitRepository -> Object -> IO (Maybe FilePath)
+writeDelta repo (UnresolvedObject ty@(OBJ_REF_DELTA baseObject) content size) = do
+        base <- case toObjectId ty of
+            Just sha -> readObject repo sha
+            _        -> return Nothing
+        if isJust base then
+            case patch (getContent $ fromJust base) content of
+                Right target -> do
+                                let base'        = fromJust base
+                                    (path, name) = pathForObject (getName repo) (sha1 base')
+                                    filename     = path </> name
+                                    header       = headerForBlob (objectTypeToString $ getObjectType base') target
+                                    blob         = header `C.append` target
+                                    obj          = ResolvedObject (getObjectType base') blob (C.length target) $ hsh blob
+                                _ <- writeObject repo obj
+                                return $ Just filename
+                Left msg     -> return Nothing
+        else return Nothing -- FIXME - base object doesn't exist yet
+
+
+updateHead :: GitRepository -> Packfile -> IO ()
+updateHead repo (Packfile _ _ objs) = do
+    let commits = filter isCommit objs
+    unless (null commits) $
+        let commit = head commits
+            ref = "refs/heads/master"
+            in
+            do
+                let obj = encodeObject commit
+                createRef repo ref (sha1 obj)
+                createSymRef repo "HEAD" ref
+    where isCommit ob = objectType ob == OBJ_COMMIT
+
+-- ref: refs/heads/master
+createSymRef :: GitRepository -> String -> String -> IO ()
+createSymRef GitRepository{..} symName ref =
+        writeFile (getGitDirectory </> symName) $ "ref: " ++ ref ++ "\n"
+
+
+createRef :: GitRepository -> String -> String -> IO ()
+createRef GitRepository{..} ref sha = do
+    let (path, name) = splitFileName ref
+        dir          = getGitDirectory </> path
+    _ <- createDirectoryIfMissing True dir
+    writeFile (dir </> name) (sha ++ "\n")
+
+pathForPack :: GitRepository -> FilePath
+pathForPack GitRepository{..} = getGitDirectory </> ".git" </> "objects" </> "pack"
+
+pathForObject :: String -> String -> (FilePath, String)
+pathForObject repoName sha | length sha == 40 = (repoName </> ".git" </> "objects" </> pre, rest)
+    where pre  = take 2 sha
+          rest = drop 2 sha
+pathForObject _ _ = ("", "")
+
+type Repository = String
+
+-- header: "type size\0"
+-- sha1 $ header ++ content
+readObject :: GitRepository -> ObjectId -> IO (Maybe Object)
+readObject GitRepository{..} sha = do
+    let (path, name) = pathForObject getGitDirectory sha
+        filename     = path </> name
+    exists <- trace ("readObject: " ++ filename) $ doesFileExist filename
+    if exists then do
+        bs <- C.readFile filename
+        return $ parseBlob $ inflate bs
+    else return Nothing
+    where parseBlob blob = eitherToMaybe $ AC.parseOnly (blobParser sha) blob
+          inflate blob = B.concat $ L.toChunks $ Z.decompress $ L.fromChunks [blob]
+
+-- header: "type size\0"
+-- sha1 $ header ++ content
+blobParser :: ObjectId -> AC.Parser Object
+blobParser sha1 = do
+   objType <- AC.string "commit" <|> AC.string "tree" <|> AC.string "blob" <|> AC.string "tag"
+   AC.char ' '
+   size <- AC.takeWhile AC.isDigit
+   AC.char '\0'
+   blob <- AC.takeByteString
+   return $ ResolvedObject (obj objType) blob (read $ C.unpack size) sha1
+   where obj "commit"   = OBJ_COMMIT
+         obj "tree"     = OBJ_TREE
+         obj "blob"     = OBJ_BLOB
+         obj "tag"      = OBJ_TAG
+
+
+
+-- header: "type size\0"
+-- sha1 $ header ++ content
+writeObject :: GitRepository -> Object -> IO ()
+writeObject _ (UnresolvedObject {}) = error "Can't write an UnresolvedObject"
+writeObject GitRepository{..} obj = do
+    let (path, name) = pathForObject getName $ sha1 obj
+        filename     = path </> name
+    _ <- createDirectoryIfMissing True path
+    L.writeFile filename $ compress (getContent obj)
+    where compress data' = Z.compress $ L.fromChunks [data'] -- FIXME should data be lazy in the first place?
+
+hsh :: B.ByteString -> String
+hsh = toHex . SHA1.hash
+
+objectTypeToString :: PackObjectType -> B.ByteString
+objectTypeToString OBJ_COMMIT = "commit"
+objectTypeToString OBJ_TREE   = "tree"
+objectTypeToString OBJ_BLOB   = "blob"
+objectTypeToString OBJ_TAG    = "tag"
+
+encodeObject :: PackfileObject -> Object
+encodeObject obj@(PackfileObject ot@(OBJ_REF_DELTA _) size content) = UnresolvedObject ot content size
+encodeObject obj@(PackfileObject ot size _) =
+        ResolvedObject ot blob size (hsh blob)
+    where header obj' =
+            let blobType = objType obj'
+            in  headerForBlob blobType $ objectData obj'
+          blob                              = header obj `C.append` objectData obj
+          objType (PackfileObject t _ _)    = objectTypeToString t
+
+headerForBlob :: B.ByteString -> B.ByteString -> B.ByteString
+headerForBlob objType content = objType `C.append` " " `C.append` C.pack (show $ C.length content) `C.append` "\0"
+
+createEmptyGitRepository :: FilePath -> IO ()
+createEmptyGitRepository gitDir =
+        mapM_ (\dir -> createDirectoryIfMissing True (gitDir </> dir)) topLevelDirectories
+        where topLevelDirectories = ["objects", "refs", "hooks", "info"]
+
+toObjectId :: PackObjectType -> Maybe ObjectId
+toObjectId (OBJ_REF_DELTA base) = Just $ toHex $ B.pack base
+toObjectId _                    = Nothing
+
+toHex :: C.ByteString -> String
+toHex bytes = C.unpack bytes >>= printf "%02x"

File tests/src/Git/Store/BlobTest.hs

+{-# LANGUAGE OverloadedStrings #-}
+module Git.Store.BlobTest
+  (
+    main
+  , test
+  ) where
+
+import qualified Test.HUnit as H
+import Git.Store.Blob
+import Test.QuickCheck hiding ((.&.))
+import Test.Framework (Test, defaultMain, testGroup)
+import Test.Framework.Providers.QuickCheck2 (testProperty)
+import Test.Framework.Providers.HUnit
+import Data.Maybe
+
+main :: IO ()
+main = defaultMain [test]
+
+commit_1 = "tree b5213cb334e855fb5c89edc99d54606377e15d70\nparent 3c1d7b88edaf2119aff47104de389867cad0f0fb\nauthor Stefan Saasen <stefan@saasen.me> 1361272292 +1100\ncommitter Stefan Saasen <stefan@saasen.me> 1361272292 +1100\n\nRemove git INSTALL instructions\n"
+
+test_parseValidCommit = H.assertBool
+    "A valid commit should be successfully parsed"
+    (isJust $ parseCommit commit_1)
+
+test_parseValidCommitTree = H.assertEqual
+    ""
+    "b5213cb334e855fb5c89edc99d54606377e15d70"
+    (getTree $ fromJust $ parseCommit commit_1)
+
+test_parseValidCommitParent = H.assertEqual
+    ""
+    "3c1d7b88edaf2119aff47104de389867cad0f0fb"
+    (getParent $ fromJust $ parseCommit commit_1)
+
+test :: Test
+test = testGroup "Objects"
+    [
+        testCase "parseCommit/1" test_parseValidCommit
+      , testCase "parseCommit/2" test_parseValidCommitTree
+      , testCase "parseCommit/3" test_parseValidCommitParent
+    ]
+
+
+

File tests/src/TestRunner.hs

 
 import qualified Git.CommonTests
 import qualified Git.RemoteTests
+import qualified Git.Store.BlobTest
 import Test.Framework
 
 main ::
   [
     testGroup "Tests"
       [
-         Git.CommonTests.test
-        ,Git.RemoteTests.test
+          Git.CommonTests.test
+        , Git.RemoteTests.test
+        , Git.Store.BlobTest.test
       ]
   ]