Commits

Bryan O'Sullivan  committed d1b28de

Implement hGetLine.

  • Participants
  • Parent commits e1fb9fa

Comments (0)

Files changed (2)

File Data/Text/IO.hs

-{-# LANGUAGE BangPatterns, CPP #-}
+{-# LANGUAGE BangPatterns, CPP, RecordWildCards #-}
 -- |
 -- Module      : Data.Text.IO
 -- Copyright   : (c) Bryan O'Sullivan 2009,
     , putStrLn
     ) where
 
+import Debug.Trace
 import Data.Text (Text)
 import Prelude hiding (appendFile, getContents, getLine, interact, putStr,
                        putStrLn, readFile, writeFile)
 import Data.Text.Encoding (decodeUtf8, encodeUtf8)
 #else
 import Data.IORef (readIORef, writeIORef)
-import Data.Text.Fusion (stream)
+import qualified Data.Text as T
+import Data.Text.Fusion (stream, unstream)
 import Data.Text.Fusion.Internal (Step(..), Stream(..))
+import Data.Text.Fusion.Size (exactSize, maxSize)
+import Data.Text.Unsafe (inlinePerformIO)
+import Foreign.Storable (peekElemOff)
 import GHC.IO.Buffer (Buffer(..), BufferState(..), CharBufElem, CharBuffer,
-                      RawCharBuffer, emptyBuffer, newCharBuffer, writeCharBuf)
-import GHC.IO.Handle.Internals (wantWritableHandle)
+                      RawCharBuffer, bufferAdjustL, bufferElems, charSize, emptyBuffer, isEmptyBuffer, newCharBuffer, readCharBuf, withRawBuffer,
+                      writeCharBuf)
+import GHC.IO.Handle.Internals (ioe_EOF, readTextDevice, wantReadableHandle_,
+                                wantWritableHandle)
 import GHC.IO.Handle.Text (commitBuffer')
 import GHC.IO.Handle.Types (BufferList(..), BufferMode(..), Handle__(..),
                             Newline(..))
 import System.IO (IOMode(..), openFile, withFile)
+import System.IO.Error (isEOFError)
 #endif
 
 -- | The 'readFile' function reads a file and returns the contents of
 #if __GLASGOW_HASKELL__ <= 610
 hGetLine = fmap decodeUtf8 . B.hGetLine
 #else
-hGetLine = undefined
+hGetLine h = wantReadableHandle_ "hGetLine" h go
+  where go hh@Handle__{..} = readIORef haCharBuffer >>= hGetLineLoop hh []
+
+hGetLineLoop :: Handle__ -> [Text] -> CharBuffer -> IO Text
+hGetLineLoop hh@Handle__{..} ts buf@Buffer{ bufL=r0, bufR=w, bufRaw=raw0 } = do
+  let findEOL raw r
+          | r == w    = return (False, w)
+          | otherwise = do
+        (c,r') <- readCharBuf raw r
+        if c == '\n'
+          then return (True, r)
+          else findEOL raw r'
+  (eol, off) <- findEOL raw0 r0
+  (t,r') <- if haInputNL == CRLF
+            then unpack_nl raw0 r0 off
+            else do t <- unpack raw0 r0 off
+                    return (t,off)
+  if eol
+    then do writeIORef haCharBuffer (bufferAdjustL (off+1) buf)
+            return $! T.concat (reverse (t:ts))
+    else do
+      let buf1 = bufferAdjustL r' buf
+      maybe_buf <- maybeFillReadBuffer hh buf1
+      case maybe_buf of
+         -- Nothing indicates we caught an EOF, and we may have a
+         -- partial line to return.
+         Nothing -> do
+              -- we reached EOF.  There might be a lone \r left
+              -- in the buffer, so check for that and
+              -- append it to the line if necessary.
+              -- 
+              let pre | isEmptyBuffer buf1 = T.empty
+                      | otherwise          = T.singleton '\r'
+              writeIORef haCharBuffer buf1{ bufL=0, bufR=0 }
+              let str = T.concat . reverse $ pre:t:ts
+              if T.null str
+                then ioe_EOF
+                else return str
+         Just new_buf ->
+              hGetLineLoop hh (t:ts) new_buf
+
+-- This function is lifted almost verbatim from GHC.IO.Handle.Text.
+maybeFillReadBuffer :: Handle__ -> CharBuffer -> IO (Maybe CharBuffer)
+maybeFillReadBuffer handle_ buf
+  = catch (Just `fmap` getSomeCharacters handle_ buf) $ \e ->
+      if isEOFError e 
+      then return Nothing 
+      else ioError e
+
+unpack :: RawCharBuffer -> Int -> Int -> IO Text
+unpack !buf !r !w
+ | charSize /= 4 = sizeError "unpack"
+ | r == w        = return T.empty
+ | otherwise     = withRawBuffer buf go
+ where
+  go pbuf = return $! unstream (Stream next r (exactSize (w-r)))
+   where
+    next !i | i >= w    = Done
+            | otherwise = Yield (let c = ix i in trace (show (w,i,c)) c) (i+1)
+    ix i = inlinePerformIO $ peekElemOff pbuf i
+
+unpack_nl :: RawCharBuffer -> Int -> Int -> IO (Text, Int)
+unpack_nl !buf !r !w
+ | charSize /= 4 = sizeError "unpack_nl"
+ | r == w        = return (T.empty, 0)
+ | otherwise     = withRawBuffer buf $ go
+ where
+  go pbuf = do
+    let t = unstream (Stream next r (maxSize (w-r)))
+        w' = w - 1
+    return $ if ix w' == '\r'
+             then (t,w')
+             else (t,w)
+   where
+    next !i | i >= w = Done
+            | c == '\r' = let i' = i + 1
+                          in if i' < w
+                             then if ix i' == '\n'
+                                  then Yield '\n' (i+2)
+                                  else Yield '\n' i'
+                             else Done
+            | otherwise = Yield c (i+1)
+            where c = ix i
+    ix i = inlinePerformIO $ peekElemOff pbuf i
+
+sizeError :: String -> a
+sizeError loc = error $ "Data.Text.IO." ++ loc ++ ": bad internal buffer size"
+
+-- This function is completely lifted from GHC.IO.Handle.Text.
+getSomeCharacters :: Handle__ -> CharBuffer -> IO CharBuffer
+getSomeCharacters handle_@Handle__{..} buf@Buffer{..} =
+  case bufferElems buf of
+    -- buffer empty: read some more
+    0 -> readTextDevice handle_ buf
+
+    -- if the buffer has a single '\r' in it and we're doing newline
+    -- translation: read some more
+    1 | haInputNL == CRLF -> do
+      (c,_) <- readCharBuf bufRaw bufL
+      if c == '\r'
+         then do -- shuffle the '\r' to the beginning.  This is only safe
+                 -- if we're about to call readTextDevice, otherwise it
+                 -- would mess up flushCharBuffer.
+                 -- See [note Buffer Flushing], GHC.IO.Handle.Types
+                 _ <- writeCharBuf bufRaw 0 '\r'
+                 let buf' = buf{ bufL=0, bufR=1 }
+                 readTextDevice handle_ buf'
+         else do
+                 return buf
+
+    -- buffer has some chars in it already: just return it
+    _otherwise ->
+      return buf
 #endif
 
 -- | Write a string to a handle.
 #if __GLASGOW_HASKELL__ <= 610
 hPutStr h = B.hPutStr h . encodeUtf8
 #else
+-- This function is lifted almost verbatim from GHC.IO.Handle.Text.
 hPutStr h t = do
   (buffer_mode, nl) <- 
        wantWritableHandle "hPutStr" h $ \h_ -> do
 -- Under GHC 6.10 and earlier, the system I\/O libraries do not
 -- support locale-sensitive I\/O.  All data read by functions in this
 -- module is decoded as UTF-8, and before data is written, it is first
--- encoded as UTF-8.
+-- encoded as UTF-8.  If you must use a non-UTF-8 locale on an older
+-- version of GHC, you will have to perform the transcoding yourself,
+-- e.g. as follows:
+--
+-- > import qualified Data.ByteString as B
+-- > import Data.Text (Text)
+-- > import Data.Text.Encoding (encodeUtf16)
+-- >
+-- > putStr_Utf16LE :: Text -> IO ()
+-- > putStr_Utf16LE t = B.putStr (encodeUtf16LE t)
 --
 -- Beginning with GHC 6.12, text I\/O is performed using the system or
 -- handle's current locale.

File Data/Text/Unsafe.hs

+{-# LANGUAGE CPP, MagicHash, UnboxedTuples #-}
 -- |
 -- Module      : Data.Text.Unsafe
 -- Copyright   : (c) Bryan O'Sullivan 2009
 -- use in heavily tested code.
 module Data.Text.Unsafe
     (
-      iter
+      inlinePerformIO
+    , iter
     , iter_
     , reverseIter
     , unsafeHead
 import Data.Text.UnsafeChar (unsafeChr)
 import Data.Text.Encoding.Utf16 (chr2)
 import qualified Data.Text.Array as A
+#if defined(__GLASGOW_HASKELL__)
+# if __GLASGOW_HASKELL__ >= 611
+import GHC.IO (IO(IO))
+# else
+import GHC.IOBase (IO(IO))
+# endif
+import GHC.Base (realWorld#)
+#endif
 
 -- | /O(1)/ A variant of 'head' for non-empty 'Text'. 'unsafeHead'
 -- omits the check for the empty case, so there is an obligation on
         j = assert (i >= 0)     $ off + i
         k =                       j - 1
 {-# INLINE reverseIter #-}
+
+-- | Just like unsafePerformIO, but we inline it. Big performance gains as
+-- it exposes lots of things to further inlining. /Very unsafe/. In
+-- particular, you should do no memory allocation inside an
+-- 'inlinePerformIO' block. On Hugs this is just @unsafePerformIO@.
+--
+{-# INLINE inlinePerformIO #-}
+inlinePerformIO :: IO a -> a
+#if defined(__GLASGOW_HASKELL__)
+inlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
+#else
+inlinePerformIO = unsafePerformIO
+#endif