Commits

Doug Burke committed 72f0306

Basic conversion from parsec to polyparse

Comments (0)

Files changed (9)

+0.4.0.0:
+
+  - Use polyparse rather than parsec-2 for parsing. As part of this, the
+    parsing is done using Text rather than String values. At present the
+    RDFLabel and RDFGraph classes have not been updated to use Text (so
+    some conversion between the two types is introduced). The eventual
+    plan is to use Text internally.
+
 0.3.2.1:
 
   - use foldl' rather than foldl in some modules

Swish/RDF/N3Parser.hs

 --  new 'RDFGraph' consisting of triples and namespace information parsed from
 --  the supplied N3 input string, or an error indication.
 --
---  Uses the Parsec monadic parser library.
---
 -- REFERENCES:
 --
 -- 1 <http://www.w3.org/TeamSubmission/2008/SUBM-n3-20080114/>
     , parseN3      
     , parseN3fromString
     , parseAnyfromString
+    , parseAnyfromText
     , parseTextFromString, parseAltFromString
     , parseNameFromString, parsePrefixFromString
     , parseAbsURIrefFromString, parseLexURIrefFromString, parseURIref2FromString
     
     -- * Exports for parsers that embed Notation3 in a bigger syntax
     , N3Parser, N3State(..), SpecialMap
-    , whiteSpace, symbol, lexeme, eof, identStart, identLetter
-    --                                                
+    
     , getPrefix -- a combination of the old defaultPrefix and namedPrefix productions
     , n3symbol -- replacement for uriRef2 -- TODO: check this is semantically correct      
     , quickVariable -- was varid      
 
 import Swish.RDF.RDFParser
     ( SpecialMap
+    , ParseResult
     , mapPrefix
-    , prefixTable, specialTable
-    , ParseResult, RDFParser
-    , n3Style, n3Lexer, ignore
-    , annotateParsecError
+    , prefixTable
+    , specialTable
+    , ignore
+      , notFollowedBy
+        , endBy
+          , sepEndBy
+            , manyTill
+        , noneOf
+      , char
+        , ichar
+          , string
+            , symbol
+              , lexeme
+                , whiteSpace
     , mkTypedLit
     )
 
                     relativeTo,
                     parseURI, parseURIReference, uriToString)
 
+import Data.Char (isSpace, isDigit, isHexDigit, chr) 
 import Data.Maybe (fromMaybe, fromJust)
 
-import Text.ParserCombinators.Parsec hiding (many, optional, (<|>))
-import qualified Text.ParserCombinators.Parsec as PC
-import qualified Text.ParserCombinators.Parsec.Token as P
-
-import Data.Char (isSpace, chr) 
-
-----------------------------------------------------------------------
---  Set up token parsers
-----------------------------------------------------------------------
-
-lexer :: P.TokenParser N3State
-lexer = n3Lexer
-
-whiteSpace :: N3Parser ()
-whiteSpace = P.whiteSpace lexer
-
-symbol :: String -> N3Parser String
-symbol     = P.symbol     lexer
-
-lexeme :: N3Parser a -> N3Parser a
-lexeme     = P.lexeme     lexer
-
-identStart , identLetter :: CharParser st Char
-identStart  = P.identStart  n3Style
-identLetter = P.identLetter n3Style
+import qualified Data.Text.Lazy as T
+import Text.ParserCombinators.Poly.StateText
 
 ----------------------------------------------------------------------
 -- Define parser state and helper functions
         , allowLocalNames :: Bool           -- True if @keywords used so that bare names are QNames in default namespace
         }
 
--- | Functions to update N3State vector (use with Parsec updateState)
+-- | Functions to update N3State vector (use with stUpdate)
 setPrefix :: String -> String -> N3State -> N3State
 setPrefix pre uri st =  st { prefixUris=p' }
     where
 setSName nam snam st =  st { syntaxUris=s' }
     where
         s' = mapReplaceOrAdd (nam,snam) (syntaxUris st)
+
 setSUri :: String -> String -> N3State -> N3State
 setSUri nam suri = setSName nam (makeScopedName "" suri "")
 
 
 --  Return function to update graph in N3 parser state,
 --  using the supplied function of a graph
---  (use returned function with Parsec updateState)
-updateGraph :: ( RDFGraph -> RDFGraph ) -> N3State -> N3State
+--
+updateGraph :: (RDFGraph -> RDFGraph) -> N3State -> N3State
 updateGraph f s = s { graphState = f (graphState s) }
 
 ----------------------------------------------------------------------
 --  accepts a string and returns a graph or error
 ----------------------------------------------------------------------
 
-type N3Parser a = RDFParser N3State a
+type N3Parser a = Parser N3State a
 
 -- | Parse a string as N3 (with no real base URI).
 -- 
 parseN3fromString ::
   String -- ^ input in N3 format.
   -> ParseResult
-parseN3fromString = parseAnyfromString document Nothing 
+parseN3fromString txt = parseN3 (T.pack txt) Nothing
 
 -- | Parse a string with an optional base URI.
 --            
 -- See also 'parseN3fromString'.            
 --
 parseN3 ::
-  String -- ^ input in N3 format.
+  T.Text -- ^ input in N3 format.
   -> Maybe QName -- ^ optional base URI
   -> ParseResult
-parseN3 = flip (parseAnyfromString document)
+parseN3 txt mbase = parseAnyfromText document mbase txt
 
 {-
 -- useful for testing
 
 -- | Function to supply initial context and parse supplied term.
 --
--- We augment the Parsec error with the context.
---
 parseAnyfromString :: N3Parser a      -- ^ parser to apply
                       -> Maybe QName  -- ^ base URI of the input, or @Nothing@ to use default base value
                       -> String       -- ^ input to be parsed
                       -> Either String a
-parseAnyfromString parser mbase input =
-  let pmap   = LookupMap [] -- [Namespace "" "#"] -- [] -- emptyLookupMap -- LookupMap prefixTable
+parseAnyfromString p mb = parseAnyfromText p mb . T.pack
+
+-- | Function to supply initial context and parse supplied term.
+--
+parseAnyfromText :: N3Parser a      -- ^ parser to apply
+                    -> Maybe QName  -- ^ base URI of the input, or @Nothing@ to use default base value
+                    -> T.Text       -- ^ input to be parsed
+                    -> Either String a
+parseAnyfromText parser mbase input =
+  let pmap   = LookupMap []
       muri   = fmap makeQNameScopedName mbase
       smap   = LookupMap $ specialTable muri
       pstate = N3State
         
   in case puri of
     Left emsg -> Left $ "Invalid base: " ++ emsg
-    Right p -> case runParser parser (setPrefix "" p pstate) "" input of
-      Right res -> Right res
-      Left  err -> Left $ annotateParsecError 1 (lines input) err
+    Right p -> let (result, _, _) = runParser parser (setPrefix "" p pstate) input
+               in result
 
 newBlankNode :: N3Parser RDFLabel
 newBlankNode = do
-  s <- getState
-  let n = succ (nodeGen s)
-  setState $ s { nodeGen = n } 
+  n <- stQuery (succ . nodeGen)
+  stUpdate $ \s -> s { nodeGen = n }
   return $ Blank (show n)
   
 --  Test functions for selected element parsing
 
+-- TODO: remove these
+  
 parseTextFromString :: String -> String -> Either String String
 parseTextFromString s =
     parseAnyfromString (string s) Nothing
 
 parseAltFromString :: String -> String -> String -> Either String String
 parseAltFromString s1 s2 =
-    parseAnyfromString ( string s1 <|> string s2 ) Nothing
+    parseAnyfromString (string s1 <|> string s2) Nothing
 
 parseNameFromString :: String -> Either String String
 parseNameFromString =
 -}
 
 addTestPrefixes :: N3Parser ()
-addTestPrefixes = updateState $ \st -> st { prefixUris = LookupMap prefixTable } -- should append to existing map
+addTestPrefixes = stUpdate $ \st -> st { prefixUris = LookupMap prefixTable } -- should append to existing map
 
 parsePrefixFromString :: String -> Either String Namespace
 parsePrefixFromString =
         p = do
           addTestPrefixes
           pref <- n3Name
-          st   <- getState
+          st   <- stGet
           return (getPrefixNs st pref)   -- map prefix to namespace
 
 parseAbsURIrefFromString :: String -> Either String String
 
 parseURIref2FromString :: String -> Either String ScopedName
 parseURIref2FromString = 
-    parseAnyfromString (addTestPrefixes >> n3symbol) Nothing
+    parseAnyfromString (addTestPrefixes *> n3symbol) Nothing
     -- parseAnyfromString uriRef2 Nothing
 
 ----------------------------------------------------------------------
 --  Syntax productions
 ----------------------------------------------------------------------
 
-{-
- TODO:
-    - this parser is a *lot* slower than the original one
-  
--}
-
 -- helper routines
 
 comma, semiColon , fullStop :: N3Parser ()
 
 -- a specialization of bracket/between 
 br :: String -> String -> N3Parser a -> N3Parser a
-br lsym rsym = between (symbol lsym) (symbol rsym)
+br lsym rsym = bracket (symbol lsym) (symbol rsym)
+
+-- to male porting from parsec to polyparse easier
+between :: Parser s lbr -> Parser s rbr -> Parser s a -> Parser s a
+between = bracket
 
 -- The @ character is optional if the keyword is in the
 -- keyword list
 --
 atSign :: String -> N3Parser ()
 atSign s = do
-  st <- getState
+  st <- stGet
   
-  let p = ignore $ char '@'
+  let p = ichar '@'
   
   if s `elem` getKeywordsList st
-    then PC.optional p
+    then ignore $ optional p
     else p
          
 atWord :: String -> N3Parser String
   -- apply to both cases even though should only really be necessary
   -- when the at sign is not given
   --
-  lexeme $ string s *> notFollowedBy (char ':')
+  lexeme $ string s *> notFollowedBy (== ':')
   return s
 
 showURI :: URI -> String
 -}
 operatorLabel :: ScopedName -> N3Parser RDFLabel
 operatorLabel snam@(ScopedName sns _) = do
-  st <- getState
+  st <- stGet
   let opmap = prefixUris st
       pkey = entryKey sns
       pval = entryVal sns
   case mapFindMaybe pkey opmap of
     Just val | val == pval -> return rval
              | otherwise   -> do
-               setState $ st { prefixUris = mapReplace opmap sns }
+               stUpdate $ \s -> s { prefixUris = mapReplace opmap sns }
                return rval
     
     _ -> do
-      setState $ st { prefixUris = mapAdd opmap sns }
+      stUpdate $ \s -> s { prefixUris = mapAdd opmap sns }
       return rval
         
 {-
 
 addStatement :: RDFLabel -> RDFLabel -> AddStatement
 addStatement s p o@(Lit _ (Just dtype)) | dtype `elem` [xsd_boolean, xsd_integer, xsd_decimal, xsd_double] = do 
-  st <- getState
+  ost <- stGet
   let stmt = arc s p o
-      oldp = prefixUris st
-      ogs = graphState st
+      oldp = prefixUris ost
+      ogs = graphState ost
       newp = mapReplaceOrAdd (snScope dtype) oldp
-  setState $ st { prefixUris = newp, graphState = addArc stmt ogs }
-addStatement s p o = updateState (updateGraph (addArc (arc s p o) ))
+  stUpdate $ \st -> st { prefixUris = newp, graphState = addArc stmt ogs }
+addStatement s p o = stUpdate (updateGraph (addArc (arc s p o) ))
 
 addStatementRev :: RDFLabel -> RDFLabel -> AddStatement
 addStatementRev o p s = addStatement s p o
 n3Name :: N3Parser String
 n3Name = (:) <$> n3Init <*> n3Body
   where
-    n3Init = oneOf initChar <?> "Initial character of a name"
-    n3Body = many (oneOf bodyChar) <?> "Body of the name"
+    n3Init = satisfy (`elem` initChar)
+    n3Body = T.unpack <$> manySatisfy (`elem` bodyChar)
 
 {-
 quickvariable ::=	\?[A-Z_a-z#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x02ff#x0370-#x037d#x037f-#x1fff#x200c-#x200d#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff][\-0-9A-Z_a-z#x00b7#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x037d#x037f-#x1fff#x200c-#x200d#x203f-#x2040#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff]*
 
 -- TODO: is mapping to Var correct?
 quickVariable :: N3Parser RDFLabel
-quickVariable = char '?' *> (Var <$> n3Name) <?> "quickvariable"
+quickVariable = char '?' *> (Var <$> n3Name) 
 
 {-
 string ::=	("""[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*""")|("[^"\\]*(?:\\.[^"\\]*)*")
 -}
 
 n3string :: N3Parser String
-n3string = tripleQuoted <|> singleQuoted <?> "string"
+n3string = tripleQuoted <|> singleQuoted 
 
 {-
 singleQuoted ::=  "[^"\\]*(?:\\.[^"\\]*)*"
 -- seen in the wild, so support them
 --
 ntHexDigit :: N3Parser Char
-ntHexDigit = oneOf $ ['0'..'9'] ++ ['A'..'F'] ++ ['a'..'f']
+ntHexDigit = satisfy isHexDigit
+
+digit :: N3Parser Char
+digit = satisfy isDigit
 
 hex4 :: N3Parser Char
 hex4 = do
-  digs <- count 4 ntHexDigit
+  digs <- exactly 4 ntHexDigit
   let dstr = "0x" ++ digs
       dchar = read dstr :: Int
   return $ chr dchar
         
 hex8 :: N3Parser Char
 hex8 = do
-  digs <- count 8 ntHexDigit
+  digs <- exactly 8 ntHexDigit
   let dstr = "0x" ++ digs
       dchar = read dstr :: Int
   if dchar <= 0x10FFFF
     then return $ chr dchar
-    else unexpected "\\UHHHHHHHH format is limited to a maximum of \\U0010FFFF"
+    else fail "\\UHHHHHHHH format is limited to a maximum of \\U0010FFFF"
 
 {-
 This is very similar to NTriples accept that also allow the escaping of '
 sQuot :: N3Parser Char
 sQuot = char '"'
 
+-- NOTE: changed from parsec between to polyparse bracket; is
+-- there a semantic difference?
 singleQuoted :: N3Parser String
-singleQuoted = between sQuot sQuot $ many n3Character
+singleQuoted = bracket sQuot sQuot $ many n3Character
     
 {-
 tripleQUoted ::=	"""[^"\\]*(?:(?:\\.|"(?!""))[^"\\]*)*"""
 tripleQuoted :: N3Parser String
 tripleQuoted = tQuot *> manyTill (n3Character <|> sQuot <|> char '\n') tQuot
   where
-    tQuot = try (count 3 sQuot)
+    -- tQuot = try (count 3 sQuot)
+    tQuot = exactly 3 sQuot
 
 getDefaultPrefix :: N3Parser Namespace
 getDefaultPrefix = do
-  s <- getState
+  s <- stGet
   return (getPrefixNs s "")
 
 addBase :: URI -> N3Parser ()
-addBase = updateState . setSUri "base" . getScopedNameURI'
+addBase = stUpdate . setSUri "base" . getScopedNameURI'
 
 addPrefix :: Maybe String -> URI -> N3Parser ()
-addPrefix p = updateState . setPrefix (fromMaybe "" p) . getScopedNameURI'
+addPrefix p = stUpdate . setPrefix (fromMaybe "" p) . getScopedNameURI'
 
 {-|
 Update the set of keywords that can be given without
 an \@ sign.
 -}
 updateKeywordsList :: [String] -> N3Parser ()
-updateKeywordsList = updateState . setKeywordsList
+updateKeywordsList = stUpdate . setKeywordsList
 
 {-
 document ::=		|	statements_optional EOF
 -}
 
 document :: N3Parser RDFGraph
-document = mkGr <$> (whiteSpace *> statementsOptional *> eof *> getState)
+document = mkGr <$> (whiteSpace *> statementsOptional *> eof *> stGet)
   where
     mkGr s = setNamespaces (prefixUris s) (graphState s)
 
 -- (if applicable) which should mean being able to get rid of try
 --
 declaration :: N3Parser ()
-declaration = 
+declaration = oneOf [
+  atWord "base" >> explicitURI >>= addBase,
+  atWord "keywords" >> bareNameCsl >>= updateKeywordsList,
+  atWord "prefix" *> getPrefix
+  ]
+
+  {-
   (try (atWord "base") >> explicitURI >>= addBase)
   <|>
   (try (atWord "keywords") >> bareNameCsl >>= updateKeywordsList)
   <|>
   (try (atWord "prefix") *> getPrefix)
-  <?> "declaration"
+  -}
   
 getPrefix :: N3Parser ()  
 getPrefix = do
       rb = char '>'
   
   -- TODO: do the whitespace definitions match?
-  ustr <- between lb (rb <?> "end of URI '>'") $ many (satisfy (/= '>'))
+  ustr <- between lb rb $ many (satisfy (/= '>'))
   let uclean = filter (not . isSpace) ustr
       
-  s <- getState
+  s <- stGet
   let base = getSUri s "base"
       
   case appendUris base uclean of 
 bareNameCsl = sepBy (lexeme bareName) comma
 
 bareName :: N3Parser String
-bareName = n3Name <?> "barename"
+bareName = n3Name 
 
 {-
 prefix ::=	([A-Z_a-z#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x02ff#x0370-#x037d#x037f-#x1fff#x200c-#x200d#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff][\-0-9A-Z_a-z#x00b7#x00c0-#x00d6#x00d8-#x00f6#x00f8-#x037d#x037f-#x1fff#x200c-#x200d#x203f-#x2040#x2070-#x218f#x2c00-#x2fef#x3001-#xd7ff#xf900-#xfdcf#xfdf0-#xfffd#x00010000-#x000effff]*)?:
 
 prefix :: N3Parser (Maybe String)
 prefix = optional (lexeme n3Name) <* char ':'
-         <?> "prefix name"
+         
 
 {-
 symbol ::=		|	explicituri
 n3symbol = 
   ((makeUriScopedName . showURI) <$> explicitURI)
   <|> qname
-  <?> "symbol"
 
 symbolCsl :: N3Parser [ScopedName]
 symbolCsl = sepBy (lexeme n3symbol) comma
 qname =
   (char ':' *> toSN getDefaultPrefix)
   <|> (n3Name >>= fullOrLocalQName)
-  <?> "QName"
     where
       toSN p = ScopedName <$> p <*> (n3Name <|> return "")
           
   
 findPrefix :: String -> N3Parser Namespace
 findPrefix pre = do
-  st <- getState
+  st <- stGet
   case mapFindMaybe pre (prefixUris st) of
     Just uri -> return $ Namespace pre uri
-    Nothing  -> unexpected $ "Prefix '" ++ pre ++ ":' not bound."
+    Nothing  -> failBad $ "Prefix '" ++ pre ++ ":' not bound."
   
 localQName :: String -> N3Parser ScopedName
 localQName name = do
-  st <- getState
+  st <- stGet
   if getAllowLocalNames st
-    then do
-      pre <- getDefaultPrefix
-      return $ ScopedName pre name
-    
+    then ScopedName <$> getDefaultPrefix <*> pure name
     else fail "Invalid 'bare' word" -- TODO: not ideal error message; can we handle this case differently?
 
 {-
 -}
 
 existential :: N3Parser ()
-existential = try (atWord "forSome") *> symbolCsl >> return ()
+-- existential = try (atWord "forSome") *> symbolCsl >> return ()
+existential = atWord "forSome" *> symbolCsl *> pure ()
 
 {-
 simpleStatement ::=		|	subject propertylist
   br "(" ")" pathList
   <|> br "[" "]" propertyListBNode
   <|> br "{" "}" formulaContent
-  <|> try boolean
+  -- <|> try boolean
+  <|> boolean
   <|> literal
   <|> numericLiteral
   <|> quickVariable
   <|> Blank <$> (string "_:" *> n3Name) -- TODO a hack that needs fixing
   <|> Res <$> n3symbol
-  <?> "pathitem"
   
 {-  
 we create a blank node for the list and return it, whilst
 -}
 pathList :: N3Parser RDFLabel
 pathList = do
-  cts <- many (lexeme expression) <?> "pathlist"
+  cts <- many (lexeme expression)
   eNode <- operatorLabel rdf_nil
   case cts of
     [] -> return eNode
 
 restoreState :: N3State -> N3Parser N3State
 restoreState origState = do
-  oldState <- getState
-  setState $ origState { nodeGen = nodeGen oldState }
+  oldState <- stGet
+  stUpdate $ \_ -> origState { nodeGen = nodeGen oldState }
   return oldState
 
 {-
 formulaContent :: N3Parser RDFLabel
 formulaContent = do
   bNode <- newBlankNode
-  pstate <- getState
-  let fstate = pstate { graphState = emptyRDFGraph, thisNode = bNode }
-  setState fstate
+  pstate <- stGet
+  stUpdate $ \st -> st { graphState = emptyRDFGraph, thisNode = bNode }
   statementList
   oldState <- restoreState pstate
-  updateState $ updateGraph $ setFormula (Formula bNode (graphState oldState))
+  stUpdate $ updateGraph $ setFormula (Formula bNode (graphState oldState))
   return bNode
   
 subgraph :: RDFLabel -> N3Parser RDFGraph
 subgraph this = do
-  pstate <- getState
-  let fstate = pstate { graphState = emptyRDFGraph, thisNode = this }
-  setState fstate       -- switch new state into parser
+  pstate <- stGet
+  stUpdate $ \st -> st { graphState = emptyRDFGraph, thisNode = this }
   statementsOptional    -- parse statements of formula
   oldState <- restoreState pstate  
   return $ graphState oldState
 
 boolean :: N3Parser RDFLabel
 boolean = mkTypedLit xsd_boolean <$> 
-          (try (atWord "false") <|> atWord "true")
+          (atWord "false" <|> atWord "true")
+          -- (try (atWord "false") <|> atWord "true")
            
 {-
 dtlang ::=		|	 "@"  langcode
 -}
 
 literal :: N3Parser RDFLabel
-literal = Lit <$> n3string <*> optionMaybe dtlang
+literal = Lit <$> n3string <*> optional dtlang
   
 dtlang :: N3Parser ScopedName
 dtlang = 
-  (char '@' *> langcode <?> "langcode")
-  <|> (try (string "^^") *> n3symbol)
-  <?> "dtlang"
+  (char '@' *> langcode)
+  <|> string "^^" *> n3symbol
+  -- <|> (try (string "^^") *> n3symbol)
 
 langcode :: N3Parser ScopedName
 langcode = do
-  h <- many1 (oneOf ['a'..'z']) <?> "start of langcode (a to z)"
-  mt <- optionMaybe ( (:) <$> char '-' <*> many1 (oneOf (['a'..'z'] ++ ['0'..'9']))) <?> "a to z or 0 to 9 (langcode after the hyphen)"
-  return $ langName $ h ++ fromMaybe "" mt
+  h <- many1Satisfy (`elem` ['a'..'z'])
+  mt <- optional ( T.append <$> (char '-' *> pure (T.singleton '-')) <*> many1Satisfy (`elem` ['a'..'z'] ++ ['0'..'9']))
+  return $ langName $ T.unpack $ T.append h (fromMaybe T.empty mt)
     
 {-
 decimal ::=	[-+]?[0-9]+(\.[0-9]+)?
 
 numericLiteral :: N3Parser RDFLabel
 numericLiteral =
-  -- try (mkTypedLit xsd_double <$> n3double)
-  try (d2s <$> n3double)
-  <|> try (mkTypedLit xsd_decimal <$> n3decimal)
+  -- -- try (mkTypedLit xsd_double <$> n3double)
+  -- try (d2s <$> n3double)
+  -- <|> try (mkTypedLit xsd_decimal <$> n3decimal)
+  d2s <$> n3double
+  <|> mkTypedLit xsd_decimal <$> n3decimal
   <|> mkTypedLit xsd_integer <$> n3integer
-  <?> "numericliteral"
 
 n3sign :: N3Parser Char
 n3sign = char '+' <|> char '-'
 
 n3integer :: N3Parser String
 n3integer = do
-  ms <- optionMaybe n3sign
+  ms <- optional n3sign
   ds <- many1 digit
   case ms of
     Just s -> return $ s : ds
 n3decimal = (++) <$> n3integer <*> ( (:) <$> char '.' <*> many1 digit )
            
 n3double :: N3Parser String  
-n3double = (++) <$> n3decimal <*> ( (:) <$> oneOf "eE" <*> n3integer )
+n3double = (++) <$> n3decimal <*> ( (:) <$> satisfy (`elem` "eE") <*> n3integer )
 
 -- convert a double, as returned by n3double, into it's
 -- canonical XSD form
   -- we check reverse first so that <= is tried before looking for a URI via expression rule
   (,) addStatementRev <$> verbReverse
   <|> (,) addStatement <$> verbForward
-  <?> "verb"
 
 -- those verbs for which subject is on the right and object on the left
 verbReverse :: N3Parser RDFLabel
 verbReverse =
+  string "<=" *> operatorLabel log_implies
+  <|> between (atWord "is") (atWord "of") (lexeme expression)
+
+{-
   try (string "<=") *> operatorLabel log_implies
   <|> between (try (atWord "is")) (atWord "of") (lexeme expression)
+-}
 
 -- those verbs with subject on the left and object on the right
 verbForward :: N3Parser RDFLabel
 verbForward =  
-  (try (string "=>") *> operatorLabel log_implies)
+  -- (try (string "=>") *> operatorLabel log_implies)
+  (string "=>" *> operatorLabel log_implies)
   <|> (string "=" *> operatorLabel owl_sameAs)
-  <|> (try (atWord "a") *> operatorLabel rdf_type)
+  -- <|> (try (atWord "a") *> operatorLabel rdf_type)
+  <|> (atWord "a" *> operatorLabel rdf_type)
   <|> (atWord "has" *> lexeme expression)
   <|> lexeme expression
 
 -}
 universal :: N3Parser ()
 universal = 
-  try (atWord "forAll") *> 
-  unexpected "universal (@forAll) currently unsupported." 
+  -- try (atWord "forAll") *> 
+  atWord "forAll" *> 
+  failBad "universal (@forAll) currently unsupported." 
   -- will be something like: *> symbolCsl
 
 {-

Swish/RDF/NTParser.hs

 --  new 'RDFGraph' consisting of triples and namespace information parsed from
 --  the supplied NTriples input string, or an error indication.
 --
---  Uses the Parsec monadic parser library.
---
 -- REFERENCES:
 --
 -- 1 <http://www.w3.org/TR/rdf-testcases/#ntriples>
     , parseNT      
     , parsefromString
     
+      {-
     -- * Exports for parsers that embed NTriples in a bigger syntax
     , NTParser, NTState(..)
     , ntripleDoc
     , subject, predicate, object
     , uriref, urirefLbl
     , nodeID, literal, language
-
+      -}
+      
     )
 where
 
     , emptyRDFGraph
     )
 
-import Swish.RDF.GraphClass
-    ( arc )
+import Swish.RDF.GraphClass (arc)
 
-import Swish.Utils.Namespace
-    ( ScopedName(..)
-    , makeUriScopedName
-    )
+import Swish.Utils.Namespace (ScopedName(..), makeUriScopedName)
 
 import Swish.RDF.Vocabulary (langName)
 
+import Swish.RDF.RDFParser ( ParseResult
+    , ignore
+    , skipMany
+               , noneOf
+    , char
+      , string
+    , eoln
+    , fullStop
+    )
+  
+{-
 import Swish.RDF.RDFParser
     ( ParseResult, RDFParser
     , ignore
     , annotateParsecError
     )
+-}
 
 import Control.Applicative
 import Control.Monad (when)
 
 import Network.URI (parseURI)
 
+import qualified Data.Text.Lazy as T
 import Data.Char (chr) 
 import Data.Maybe (fromMaybe, isNothing)
 
-import Text.ParserCombinators.Parsec hiding (many, optional, (<|>))
+import Text.ParserCombinators.Poly.StateText
 
 ----------------------------------------------------------------------
 -- Define parser state and helper functions
         }
 
 --  Return function to update graph in NT parser state,
---  using the supplied function of a graph
---  (use returned function with Parsec updateState)
-updateGraph :: ( RDFGraph -> RDFGraph ) -> NTState -> NTState
+--  using the supplied function of a graph. This is for use
+--  with stUpdate.
+--
+updateGraph :: (RDFGraph -> RDFGraph) -> NTState -> NTState
 updateGraph f s = s { graphState = f (graphState s) }
 
 ----------------------------------------------------------------------
 --  accepts a string and returns a graph or error
 ----------------------------------------------------------------------
 
-type NTParser a = RDFParser NTState a
+type NTParser a = Parser NTState a
 
 -- | Parse a string.
 -- 
 parseNT ::
-  String -- ^ input in NTriples format.
+  T.Text -- ^ input in NTriples format.
   -> ParseResult
-parseNT = parsefromString ntripleDoc
--- parseNT = either Error Result . parsefromString ntripleDoc
+parseNT = parsefromText ntripleDoc
 
 {-
 -- useful for testing
 test :: String -> RDFGraph
-test = either error id . parsefromString ntripleDoc
+test = either error id . parseNT
 -}
 
 -- | Function to supply initial context and parse supplied term.
     NTParser a      -- ^ parser to apply
     -> String       -- ^ input to be parsed
     -> Either String a
-parsefromString parser input =
-        let
-            pstate = NTState
+parsefromString parser = parsefromText parser . T.pack
+
+-- | Function to supply initial context and parse supplied term.
+--
+parsefromText :: 
+    NTParser a      -- ^ parser to apply
+    -> T.Text       -- ^ input to be parsed
+    -> Either String a
+parsefromText parser input =
+        let istate = NTState
                     { graphState = emptyRDFGraph
                     }
-            result = runParser parser pstate "" input
-        in
-            case result of
-                Right res -> Right res
-                Left  err -> Left $ annotateParsecError 1 (lines input) err
-
+            (result, _, _) = runParser parser istate input
+        in result 
+           
 -- helper routines
 
-fullStop :: NTParser ()
-fullStop = ignore (char '.')
-
 {-
 lineFeed :: NTParser ()
 lineFeed = ignore (char '\r')
 -- Add statement to graph in NT parser state
 
 addStatement :: RDFLabel -> RDFLabel -> RDFLabel -> NTParser ()
-addStatement s p o = updateState (updateGraph (addArc (arc s p o) ))
+addStatement s p o = stUpdate (updateGraph (addArc (arc s p o) ))
 
 ----------------------------------------------------------------------
 --  Syntax productions
 -}
 
 ntripleDoc :: NTParser RDFGraph
-ntripleDoc = graphState <$> (sepBy line eoln *> optional eoln *> skipMany ws *> eof *> getState)
+ntripleDoc = graphState <$> (sepBy line eoln *> optional eoln *> skipWS *> eof *> stGet)
 
 line :: NTParser ()
-line = skipMany ws *> ignore (optional (comment <|> triple))
+line = skipWS *> ignore (optional (comment <|> triple))
 
 {-
 ws	::=	space | tab	
 Could use whiteSpace rule here, but that would permit
 constructs (e.g. comments) where we do not support them.
 -}
+
+isWS :: Char -> Bool
+isWS = (`elem` " \t")
+
+{-
 ws :: NTParser ()
-ws = ignore (char ' ' <|> tab) <?> "white space (' ' or tab)"
+-- ws = ignore (char ' ' <|> tab)
+ws = ignore $ satisfy isWS
+-}
+           
+skipWS :: NTParser ()
+skipWS = ignore $ manySatisfy isWS
+
+skip1WS :: NTParser ()
+skip1WS = ignore $ many1Satisfy isWS
 
 {-
 comment	::=	'#' ( character - ( cr | lf ) )*	
 -}
 
 comment :: NTParser ()
-comment = char '#' *> skipMany (noneOf "\r\n") <?> "comment line"
+comment = char '#' *> skipMany (noneOf "\r\n")
 
 {-
 eoln	::=	cr | lf | cr lf	
 -}
 
-eoln :: NTParser ()
--- eoln = ignore (newline <|> (lineFeed *> optional newline))
-eoln = ignore (try (string "\r\n") <|> string "\r" <|> string "\n")
-       <?> "new line"
-       
 {-
 name	::=	[A-Za-z][A-Za-z0-9]*	
 -}
 hChars = ['a'..'z'] ++ ['A'..'Z']
 bChars = hChars ++ ['0'..'9']
 
+-- cons is not particularly efficient
 name :: NTParser String
-name = (:) <$> oneOf hChars <*> many (oneOf bChars)
+-- name = (:) <$> satisfy (`elem` hChars) <*> manySatisfy (`elem` bChars)
+name = T.unpack <$> (T.cons <$> satisfy (`elem` hChars) <*> manySatisfy (`elem` bChars))
 
 {-
 triple	::=	subject ws+ predicate ws+ object ws* '.' ws*	
 -}
 
 triple :: NTParser ()
-triple = do
-  s <- subject
-  skipMany1 ws
-  p <- predicate
-  skipMany1 ws
-  o <- object
-  skipMany ws
-  fullStop
-  skipMany ws
-  addStatement s p o
+triple = 
+  {- tryin to be fancy but addStatement is a Parser not a pure function
+  addStatement 
+  <$> (subject <* skip1WS)
+  <*> (predicate <* skip1WS)
+  <*> (object <* (skipWS *> fullStop *> skipWS))
+  -}
+  
+  do
+    s <- subject
+    skip1WS
+    p <- predicate
+    skip1WS
+    o <- object
+    skipWS
+    fullStop
+    skipWS
+    addStatement s p o
 
 {-
 subject	::=	uriref | nodeID	
 
 uriref :: NTParser ScopedName
 uriref = do
-  ustr <- char '<' *> manyTill character (char '>')
+  -- not ideal, as want to reject invalid characters immediately rather than via parseURI
+  ustr <- T.unpack <$> (bracket (char '<') (char '>') $ many1Satisfy (/= '>'))
+  -- ustr <- bracket (char '<') (char '>') $ many1 character -- looks like need to exclude > from character
+  -- ustr <- char '<' *> manyTill character (char '>')
   when (isNothing (parseURI ustr)) $
-    fail ("Invalid URI: <" ++ ustr ++ ">")
+    failBad ("Invalid URI: <" ++ ustr ++ ">")
   return $ makeUriScopedName ustr
 
 urirefLbl :: NTParser RDFLabel
 -}
 
 nodeID :: NTParser RDFLabel
-nodeID = Blank <$> (string "_:" *> name) <?> "blank node (_:label)"
+nodeID = Blank <$> (string "_:" *> name)
 
 {-  
 literal	::=	langString | datatypeString	
 -}
 
 literal :: NTParser RDFLabel
-literal = Lit <$> between (char '"') (char '"') (many character) <*> optionMaybe dtlang
+literal = Lit <$> bracket (char '"') (char '"') (many character) <*> optional dtlang
 
 dtlang :: NTParser ScopedName
 dtlang = 
 
 language :: NTParser ScopedName
 language = do
-  h <- many1 (oneOf ['a'..'z'])
-  mt <- optionMaybe ( (:) <$> char '-' <*> many1 (oneOf (['a'..'z'] ++ ['0'..'9'])) )
-  return $ langName $ h ++ fromMaybe "" mt
+  h <- many1Satisfy (`elem` ['a'..'z'])
+  mt <- optional ( T.cons <$> char '-' <*> many1Satisfy (`elem` (['a'..'z'] ++ ['0'..'9'])) )
+  return $ langName $ T.unpack $ T.append h $ fromMaybe T.empty mt
 
 {-
 String handling: 
 asciiCharsNT = filter (`notElem` "\\\"") asciiChars
 
 ntHexDigit :: NTParser Char
-ntHexDigit = oneOf $ ['0'..'9'] ++ ['A'..'F']
+ntHexDigit = satisfy (`elem` ['0'..'9'] ++ ['A'..'F'])
 
 hex4 :: NTParser Char
 hex4 = do
-  digs <- count 4 ntHexDigit
+  digs <- exactly 4 ntHexDigit
   let dstr = "0x" ++ digs
       dchar = read dstr :: Int
   return $ chr dchar
         
 hex8 :: NTParser Char
 hex8 = do
-  digs <- count 8 ntHexDigit
+  digs <- exactly 8 ntHexDigit
   let dstr = "0x" ++ digs
       dchar = read dstr :: Int
   if dchar <= 0x10FFFF
     then return $ chr dchar
-    else unexpected "\\UHHHHHHHH format is limited to a maximum of \\U0010FFFF"
+    else failBad "\\UHHHHHHHH format is limited to a maximum of \\U0010FFFF"
 
 protectedChar :: NTParser Char
 protectedChar =
 
 character :: NTParser Char
 character = (char '\\' *> protectedChar)
-      <|> (oneOf asciiCharsNT <?> "ASCII character")
+      <|> (satisfy (`elem` asciiCharsNT))
 
 --------------------------------------------------------------------------------
 --

Swish/RDF/RDFParser.hs

     , prefixTable, specialTable
 
     -- parser
-    , ParseResult, RDFParser
-    , n3Style, n3Lexer
+    , ParseResult
     , ignore
-    , annotateParsecError
+    , char
+    , ichar
+    , string
+    , symbol
+    , lexeme
+    , notFollowedBy
+    , whiteSpace
+    , skipMany
+    , skipMany1
+    , endBy
+    , sepEndBy
+    , sepEndBy1
+    , manyTill
+    , noneOf
+    , eoln
+    , fullStop
     , mkTypedLit
     )
 where
     , default_base
     )
 
-import Control.Applicative
-import Control.Monad (MonadPlus(..), ap)
+import qualified Data.Text.Lazy as T
+import Text.ParserCombinators.Poly.StateText
 
-import Text.ParserCombinators.Parsec (GenParser, ParseError, char, letter, alphaNum, errorPos, sourceLine, sourceColumn)
-import Text.ParserCombinators.Parsec.Error (errorMessages, showErrorMessages)
-import Text.ParserCombinators.Parsec.Language (emptyDef)
-import qualified Text.ParserCombinators.Parsec.Token as P
-
+import Data.Char (isSpace)
 import Data.Maybe (fromMaybe)
 
 -- Code
 
-{-|
-The language definition for N3-style formats.
--}
-
-n3Style :: P.LanguageDef st
-n3Style =
-        emptyDef
-            { P.commentStart   = ""
-            , P.commentEnd     = ""
-            , P.commentLine    = "#"
-            , P.nestedComments = True
-            , P.identStart     = letter <|> char '_'      -- oneOf "_"
-            , P.identLetter    = alphaNum <|> char '_'
-            , P.reservedNames  = []
-            , P.reservedOpNames= []
-            , P.caseSensitive  = True
-            }
-
-{-|
-The lexer for N3 style languages.
--}
-n3Lexer :: P.TokenParser st
-n3Lexer = P.makeTokenParser n3Style
-
 -- | Type for special name lookup table
 type SpecialMap = LookupMap (String,ScopedName)
 
     ("base",      fromMaybe default_base mbase ) 
   ]
 
-----------------------------------------------------------------------
---  Define top-level parser function:
---  accepts a string and returns a graph or error
-----------------------------------------------------------------------
+-- Parser routines, heavily based on Parsec
 
-type RDFParser a b = GenParser Char a b
-
--- Applicative/Alternative are defined for us in Parsec 3
-instance Applicative (GenParser a b) where
-  pure = return
-  (<*>) = ap
-  
-instance Alternative (GenParser a b) where
-  empty = mzero
-  (<|>) = mplus
-  
 type ParseResult = Either String RDFGraph
 
-ignore :: (Monad m) => m a -> m ()
-ignore p = p >> return ()
+ignore :: (Applicative f) => f a -> f ()
+ignore f = f *> pure ()
+
+char :: Char -> Parser s Char
+char c = satisfy (==c)
+
+ichar :: Char -> Parser s ()
+ichar = ignore . char
+
+-- TODO: is there a better way to do this?
+string :: String -> Parser s String
+string s = mapM char s
+  
+skipMany :: Parser s a -> Parser s ()
+skipMany = ignore . many
+  
+skipMany1 :: Parser s a -> Parser s ()
+skipMany1 = ignore . many1
+  
+endBy :: Parser s a -> Parser s b -> Parser s [a]
+endBy p sep = many (p <* sep)
+
+sepEndBy :: Parser s a -> Parser s b -> Parser s [a]
+sepEndBy p sep = sepEndBy1 p sep <|> pure []
+
+-- is the separator optional?
+sepEndBy1 :: Parser s a -> Parser s b -> Parser s [a]
+sepEndBy1 p sep = do
+  x <- p
+  (sep *> ((x:) <$> sepEndBy p sep)) <|> return [x]
+  
+manyTill :: Parser s a -> Parser s b -> Parser s [a]
+manyTill p end = go
+  where
+    go = (end *> return [])
+         <|>
+         ((:) <$> p <*> go)
+
+
+noneOf :: String -> Parser s Char           
+noneOf istr = satisfy (`notElem` istr)
+           
+fullStop :: Parser s ()
+fullStop = ichar '.'
+
+eoln :: Parser s ()
+-- eoln = ignore (newline <|> (lineFeed *> optional newline))
+-- eoln = ignore (try (string "\r\n") <|> string "\r" <|> string "\n")
+eoln = ignore (oneOf [string "\r\n", string "\r", string "\n"])
+       
+notFollowedBy :: (Char -> Bool) -> Parser s ()
+notFollowedBy p = do
+  c <- next
+  if p c 
+    then fail $ "Unexpected character: " ++ show [c]
+    else reparse $ T.singleton c
+
+symbol :: String -> Parser s String
+symbol = lexeme . string
+
+lexeme :: Parser s a -> Parser s a
+lexeme p = p <* whiteSpace
+
+whiteSpace :: Parser s ()
+whiteSpace = skipMany (simpleSpace <|> oneLineComment)
+
+simpleSpace :: Parser s ()
+simpleSpace = ignore $ many1Satisfy isSpace
+
+oneLineComment :: Parser s ()
+oneLineComment = ichar '#' *> manySatisfy (/= '\n') *> pure ()
+
+{-
+
+Not sure we can get this with polyparse
 
 -- | Annotate a Parsec error with the local context - i.e. the actual text
 -- that caused the error and preceeding/succeeding lines (if available)
 
     in unlines eHdr ++ eMsg
 
+-}
+
 -- | Create a typed literal.
 mkTypedLit ::
   ScopedName -- ^ the type

Swish/RDF/SwishCommands.hs

     , reportLine
     )
 
-import Swish.RDF.SwishScript
-    ( parseScriptFromString
-    )
+import Swish.RDF.SwishScript (parseScriptFromText)
 
 import Swish.RDF.GraphPartition
     ( GraphPartition(..)
 import qualified Swish.RDF.N3Formatter as N3F
 import qualified Swish.RDF.NTFormatter as NTF
 
-import Swish.RDF.N3Parser (parseN3) -- (parseN3fromString)
+import Swish.RDF.N3Parser (parseN3)
 import Swish.RDF.NTParser (parseNT)
 
 import Swish.RDF.GraphClass
 
 import System.IO
     ( Handle, openFile, IOMode(..)
-    , hPutStr, hPutStrLn, hClose, hGetContents
+    , hPutStr, hPutStrLn, hClose
     , hIsReadable, hIsWritable
     , stdin, stdout
     )
 import Control.Monad.State (modify, gets)
 import Control.Monad (liftM, when)
 
+import qualified Data.Text.Lazy as T
+import qualified Data.Text.Lazy.IO as IO
 import System.IO.Error
 
 import Data.Maybe (isJust, fromMaybe)
       
 swishParseScript ::
   Maybe String -- file name (or "stdin" if Nothing)
-  -> String  -- script contents
+  -> T.Text    -- script contents
   -> SwishStateIO [SwishStateIO ()]
 swishParseScript mfpath inp = do
   buri <- calculateBaseURI mfpath
-  case parseScriptFromString (Just buri) inp of
+  case parseScriptFromText (Just buri) inp of
     Left err -> do
       let inName = maybe "standard input" ("file " ++) mfpath
       swishError ("Script syntax error in " ++ inName ++ ": "++err) SwishDataInputError
 -- | Open a file (or stdin), read its contents, and process them.
 --
 swishReadFile :: 
-  (Maybe String -> String -> SwishStateIO a) -- ^ Convert filename and contents into desired value
+  (Maybe String -> T.Text -> SwishStateIO a) -- ^ Convert filename and contents into desired value
   -> a -- ^ the value to use if the file can not be read in
   -> Maybe String -- ^ the file name or @stdin@ if @Nothing@
   -> SwishStateIO a
 swishReadFile conv errVal fnam = 
   let reader (h,f,i) = do
         res <- conv fnam i
-        when f $ lift $ hClose h
+        when f $ lift $ hClose h -- given that we use IO.hGetContents not sure the close is needed
         return res
   
   in swishOpenFile fnam >>= maybe (return errVal) reader
 -- | Open and read file, returning its handle and content, or Nothing
 -- WARNING:  the handle must not be closed until input is fully evaluated
 --
-swishOpenFile :: Maybe String -> SwishStateIO (Maybe (Handle, Bool, String))
+swishOpenFile :: Maybe String -> SwishStateIO (Maybe (Handle, Bool, T.Text))
 swishOpenFile Nothing     = readFromHandle stdin Nothing
 swishOpenFile (Just fnam) = do
   o <- lift $ try $ openFile fnam ReadMode
       
     Right hnd -> readFromHandle hnd $ Just ("file: " ++ fnam)
 
-readFromHandle :: Handle -> Maybe String -> SwishStateIO (Maybe (Handle, Bool, String))
+readFromHandle :: Handle -> Maybe String -> SwishStateIO (Maybe (Handle, Bool, T.Text))
 readFromHandle hdl mlbl = do
   hrd <- lift $ hIsReadable hdl
   if hrd
     then do
-      fc <- lift $ hGetContents hdl
+      fc <- lift $ IO.hGetContents hdl
       return $ Just (hdl, isJust mlbl, fc)
   
     else do
 
 swishParse :: 
   Maybe String -- ^ filename (if not stdin)
-  -> String  -- ^ contents of file
+  -> T.Text    -- ^ contents of file
   -> SwishStateIO (Maybe RDFGraph)
 swishParse mfpath inp = do
   fmt <- gets format

Swish/RDF/SwishScript.hs

       
       -- * Parsing
       
-      parseScriptFromString 
+      parseScriptFromText 
     )
 where
 
     , merge, add
     )
 
+import Swish.RDF.RDFParser (whiteSpace, lexeme, symbol, eoln, manyTill)
+
 import Swish.RDF.N3Parser
-    ( parseAnyfromString
+    ( parseAnyfromText
     , parseN3      
     , N3Parser, N3State(..)
-    , whiteSpace, symbol, lexeme
-    , eof, identLetter
     , getPrefix
     , subgraph
     , n3symbol -- was uriRef2,
 import Swish.Utils.ListHelpers
     ( equiv, flist )
 
-import Text.ParserCombinators.Parsec
-    ( (<?>) 
-    -- , (<|>)
-    -- , many
-    , manyTill
-    , option, sepBy, between, try, notFollowedBy
-    , string, anyChar
-    , getState
-    )
-
-import Control.Applicative
-
-import Control.Monad.State
-    ( modify, gets, lift
-    )
+import qualified Data.Text.Lazy as T
+import qualified Data.Text.Lazy.IO as IO
+import Text.ParserCombinators.Poly.StateText
 
 import Control.Monad (unless, when, liftM)
+import Control.Monad.State (modify, gets, lift)
 
 import Data.List (isPrefixOf)
 
 -- 
 
 -- | Parser for Swish script processor
-parseScriptFromString :: 
+parseScriptFromText :: 
   Maybe QName -- ^ Default base for the script
-  -> String -- ^ Swish script
+  -> T.Text   -- ^ Swish script
   -> Either String [SwishStateIO ()]
-parseScriptFromString = parseAnyfromString script 
+parseScriptFromText = parseAnyfromText script 
 
 ----------------------------------------------------------------------
 --  Syntax productions
 ----------------------------------------------------------------------
 
+between :: Parser s lbr -> Parser s rbr -> Parser s a -> Parser s a
+between = bracket
+
 n3SymLex :: N3Parser ScopedName
 n3SymLex = lexeme n3symbol
 
   <|> checkProofCmd
   <|> fwdChain
   <|> bwdChain
-  <?> "script command"
 
 prefixLine :: N3Parser (SwishStateIO ())
 prefixLine = do
-  try $ isymbol "@prefix"
+  -- try $ isymbol "@prefix"
+  isymbol "@prefix"
   getPrefix
   whiteSpace
   isymbol "."
             ; ags <- graphOrList
             ; isymbol "=>"
             ; cg  <- graphExpr
-            ; vms <- option [] varModifiers
+            ; vms <- varModifiers <|> pure []
             ; return $ ssDefineRule rn ags cg vms
             }
 
             ; ags <- graphOrList
             ; isymbol "=>"
             ; cn  <- n3SymLex
-            ; s <- getState             :: N3Parser N3State
-            ; let prefs = prefixUris s  :: NamespaceMap
+            ; s <- stGet
+            ; let prefs = prefixUris s
             ; return $ ssFwdChain sn rn ags cn prefs
             }
 
             ; cg  <- graphExpr
             ; isymbol "<="
             ; an  <- n3SymLex
-            ; s <- getState             :: N3Parser N3State
-            ; let prefs = prefixUris s  :: NamespaceMap
+            ; s <- stGet
+            ; let prefs = prefixUris s
             ; return $ ssBwdChain sn rn cg an prefs
             }
 
 --  Syntax clause helpers
 ----------------------------------------------------------------------
 
+-- TODO: is the loss of identLetter a problem?
 commandName :: String -> N3Parser ()
-commandName cmd = try (string cmd *> notFollowedBy identLetter *> whiteSpace)
-
--- taken from NTParser
-eoln :: N3Parser ()
--- eoln = ignore (newline <|> (lineFeed *> optional newline))
-eoln = (try (string "\r\n") <|> string "\r" <|> string "\n") >> return ()
-       <?> "new line"
+-- commandName cmd = try (string cmd *> notFollowedBy identLetter *> whiteSpace)
+commandName cmd = symbol cmd *> pure ()
 
 restOfLine :: N3Parser String
-restOfLine = manyTill anyChar eoln <* whiteSpace
+restOfLine = manyTill (satisfy (const True)) eoln <* whiteSpace
   
 br :: N3Parser a -> N3Parser a
 br = between (symbol "(") (symbol ")")
 nameOrList =
   (toList <$> n3SymLex)      
   <|> nameList
-  <?> "Name, or list of names"
   
 graphExpr :: N3Parser (SwishStateIO (Either String RDFGraph))
 graphExpr =
         do  { f <- formulaExpr
             ; return $ liftM (liftM formExpr) f
             }
-    <?>
-        "Graph expression, graph name or named graph definition"
 
 graphOnly :: N3Parser (SwishStateIO (Either String RDFGraph))
 graphOnly =
         do  { isymbol "{"
             ; b <- newBlankNode
-            ; g <- subgraph b       :: N3Parser RDFGraph
+            ; g <- subgraph b
             ; isymbol "}"
-            ; s <- getState
+            ; s <- stGet
             ; let gp = setNamespaces (prefixUris s) g
             ; return $ return (Right gp)
             }
 
 graphList :: N3Parser [SwishStateIO (Either String RDFGraph)]
 graphList = br (many graphExpr)
-    <?> "List of graphs"
 
 graphOrList :: N3Parser [SwishStateIO (Either String RDFGraph)]
 graphOrList =
   (toList <$> graphExpr)
   <|> graphList
-  <?> "Graph, or list of graphs"
 
 formulaExpr :: N3Parser (SwishStateIO (Either String RDFFormula))
 formulaExpr = 
   (n3SymLex >>= namedGraph)
-  <?> "Formula (name or named graph)"
 
 namedGraph :: ScopedName -> N3Parser (SwishStateIO (Either String RDFFormula))
 namedGraph n =
 
 formulaList :: N3Parser [SwishStateIO (Either String RDFFormula)]
 formulaList = between (symbol "(") (symbol ")") (many formulaExpr)
-    <?> "List of formulae (names or named graphs)"
 
 varModifiers :: N3Parser [(ScopedName,[RDFLabel])]
 varModifiers = symbol "|" *> varModList
 --  Temporary implementation:  just read local file WNH     
 --  (Add logic to separate filenames from URIs, and
 --  attempt HTTP GET, or similar.)
-getResourceData :: Maybe String -> SwishStateIO (Either String String)
+getResourceData :: Maybe String -> SwishStateIO (Either String T.Text)
 getResourceData muri =
     case muri of
         Nothing  -> fromStdin
         Just uri -> fromUri uri
     where
     fromStdin =
-        do  { dat <- lift getContents
+        do  { dat <- lift IO.getContents
             ; return $ Right dat
             }
     fromUri = fromFile
     fromFile uri | "file://" `isPrefixOf` uri = do
-      dat <- lift $ readFile $ drop 7 uri
+      dat <- lift $ IO.readFile $ drop 7 uri
       return $ Right dat
                  | otherwise = error $ "Unsupported file name for read: " ++ uri
                                
 Name:               swish
-Version:            0.3.2.1
+Version:            0.4.0.0
 Stability:          experimental
 License:            LGPL
 License-file:       LICENSE 
   .
   Changes:
   .
+  [Version 0.4.0.0] Moving to using polyparse for parsing and @Text@ rather than
+  @String@ where appropriate.
+  .
   [Version 0.3.2.1] Marked a number of routines from the Swish.Utils modules
   as deprecated. Use foldl' rather than foldl.
   .
 Library
    Build-Depends:
       base >=3 && < 5,
+      text == 0.11.*,
       binary == 0.5.*,
       bytestring == 0.9.*,
       containers == 0.3.*,
       array == 0.3.*,
       parallel == 2.2.*,
-      parsec == 2.1.*,
+      polyparse == 1.6.*,
       random == 1.0.*,
       old-time == 1.0.*, old-locale == 1.0.*, time == 1.1.*,
       mtl >= 1 && < 3,

tests/N3ParserTest.hs

 module Main where
 
 import Swish.RDF.N3Parser
-    ( parseN3fromString, parseN3
+    ( parseN3fromString -- , parseN3
     , parseTextFromString, parseAltFromString
     , parseNameFromString, parsePrefixFromString
     , parseAbsURIrefFromString, parseLexURIrefFromString
     , parseURIref2FromString
     )
 
+import qualified Swish.RDF.N3Parser as N3P
+
 import Swish.RDF.RDFGraph
     ( RDFGraph, RDFLabel(..), NSGraph(..)
     , LookupFormula(..)
 import Test.HUnit (Test(TestCase,TestList), assertEqual, runTestTT)
 
 import Data.Monoid (Monoid(..))
-import Data.List (intercalate)
+
+import qualified Data.Text.Lazy as T
+
+-- temporary routine during text conversion
+
+parseN3 :: String -> Maybe QName -> N3P.ParseResult
+parseN3 inp mbase = N3P.parseN3 (T.pack inp) mbase
 
 ------------------------------------------------------------
 --  Generic item parsing test wrapper
     " base3:s3 base3:p3 \"<em>chat</em>\"^^rdf:XMLLiteral . \n "
 
 emsg16 :: String
+{- parsec error
 emsg16 = intercalate "\n" [
   "",
   "@prefix base1 : <http://id.ninebynine.org/wip/2003/test/graph1/node/> . base1:s1 base1:p1 base1:o1 .  **** ",
   "unexpected \"*\"",
   "expecting declaration, \"@\", pathitem or end of input"
   ]
-
+-}
+emsg16 = "Expected end of input (EOF)"
 
 simpleTestSuite :: Test
 simpleTestSuite = TestList
     " base1:s1 base2:p2 unknown3:o3 . "
 
 fail1 :: String
+{- parsec error
 fail1 = intercalate "\n" [
          "",
          "@prefix base3 : <http://id.ninebynine.org/wip/2003/test/graph3/node> . ",
          "",
          "unexpected Prefix 'unknown3:' not bound."
         ]
+-}
+fail1 = "When looking for a non-empty sequence with separators:\n\tPrefix 'unknown3:' not bound."
 
 failTestSuite :: Test
 failTestSuite = TestList
 
 module Main where
 
-import Swish.RDF.NTParser (parseNT)
+-- import Swish.RDF.NTParser (parseNT)
+import qualified Swish.RDF.NTParser as NTP
 import Swish.RDF.NTFormatter (formatGraphAsString)
 
 import Swish.RDF.RDFGraph
     ( Test(TestCase,TestList)
     , assertEqual, runTestTT )
 
+import qualified Data.Text.Lazy as T
+
+-- temporary routine during text conversion
+
+parseNT :: String -> NTP.ParseResult
+parseNT = NTP.parseNT . T.pack
+
 ------------------------------------------------------------
 --  Parser tests
 ------------------------------------------------------------
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.