Doug Burke avatar Doug Burke committed 9d6a4ed

LangLit now uses the new LanguageTag type ratehr than ScopedName to represent language tags

Comments (0)

Files changed (16)

 0.7.0.0:
 
   - RDF literals are now stored using the Lit, LangLit, or TypedLit
-    constructors (from RDFLabel) rather than using just Lit.
+    constructors (from RDFLabel) rather than using just Lit. Language
+    codes are now represented by Swish.RDF.Vocabulary.LanguageTag
+    rather than as a ScopedName.
 
   - Removed mkTypedLit from Swish.RDF.RDFParser; use
     Swish.RDF.RDFDatatype.makeDataTypedLiteral instead.
 - Should there be a type-level constaint that an RDF Arc can only have
   a literal in the object position?
 
-- Change LangLit to have a language-code datatype rather than use
-  a ScopedName.
-
 - can items be moved out of Swidh.RDF.RDFGraph (e.g. the RDFlabel
   re-exports)?
 

src/Swish/RDF/N3Formatter.hs

   )
 
 import Swish.RDF.Vocabulary (
-  langTag, 
+  fromLangTag, 
   rdfType,
   rdfNil,
   owlSameAs, logImplies
     | dtype `elem` [xsdBoolean, xsdDecimal, xsdInteger] = return $ B.fromText lit
     | otherwise = return $ quoteText lit `mappend` "^^" `mappend` showScopedName dtype
 formatLabel _ (LangLit lit lcode) =
-    return $ quoteText lit `mappend` "@" `mappend` B.fromText (langTag lcode)
+    return $ quoteText lit `mappend` "@" `mappend` B.fromText (fromLangTag lcode)
 formatLabel _ (Lit lit) = return $ quoteText lit
 
 formatLabel _ lab = return $ B.fromString $ show lab

src/Swish/RDF/N3Parser.hs

 import Swish.Utils.QName (QName)
 
 import Swish.RDF.Vocabulary
-    ( langName
+    ( LanguageTag
+    , toLangTag
     , rdfType
     , rdfFirst, rdfRest, rdfNil
     , owlSameAs, logImplies
                Just (Right dtype) -> TypedLit lit dtype
                _                  -> Lit lit
   
-dtlang :: N3Parser (Either ScopedName ScopedName)
+dtlang :: N3Parser (Either LanguageTag ScopedName)
 dtlang = 
   (char '@' *> (Left <$> langcode))
   <|> string "^^" *> (Right <$> n3symbol)
 
-langcode :: N3Parser ScopedName
+-- Note that toLangTag may fail since it does some extra
+-- validation not done by the parser (mainly on the length of the
+-- primary and secondary tags).
+--
+-- NOTE: This parser does not accept multiple secondary tags which RFC3066
+-- does.
+--
+langcode :: N3Parser LanguageTag
 langcode = do
-  h <- many1Satisfy isaz
-  mt <- optional ( L.append <$> (char '-' *> pure (L.singleton '-')) <*> many1Satisfy isaz09)
-  return $ langName $ L.toStrict $ L.append h (fromMaybe L.empty mt)
+    h <- many1Satisfy isaz
+    mt <- optional (L.cons <$> char '-' <*> many1Satisfy isaz09)
+    let lbl = L.toStrict $ L.append h $ fromMaybe L.empty mt
+    case toLangTag lbl of
+        Just lt -> return lt
+        _ -> fail ("Invalid language tag: " ++ T.unpack lbl) -- should this be failBad?
     
 {-
 decimal ::=	[-+]?[0-9]+(\.[0-9]+)?

src/Swish/RDF/NTFormatter.hs

     ( Arc(..) )
 
 import Swish.Utils.Namespace (ScopedName, getQName)
-import Swish.RDF.Vocabulary (langTag)
+import Swish.RDF.Vocabulary (fromLangTag)
 
 import Swish.Utils.LookupMap
     ( LookupMap, emptyLookupMap
 formatLabel lab@(Blank _) = mapBlankNode lab
 formatLabel (Res sn) = return $ showScopedName sn
 formatLabel (Lit lit) = return $ quoteText lit
-formatLabel (LangLit lit lang) = return $ mconcat [quoteText lit, at, B.fromText (langTag lang)]
+formatLabel (LangLit lit lang) = return $ mconcat [quoteText lit, at, B.fromText (fromLangTag lang)]
 formatLabel (TypedLit lit dt)  = return $ mconcat [quoteText lit, carets, showScopedName dt]
 
 -- do not expect to get the following, but include

src/Swish/RDF/NTParser.hs

 
 import Swish.Utils.Namespace (ScopedName, makeURIScopedName)
 
-import Swish.RDF.Vocabulary (langName)
+import Swish.RDF.Vocabulary (LanguageTag, toLangTag)
 
 import Swish.RDF.RDFParser ( ParseResult
     , runParserWithError
 ntstring :: NTParser String
 ntstring = bracket (char '"') (char '"') (many character)
 
-dtlang :: NTParser (Either ScopedName ScopedName)
+dtlang :: NTParser (Either LanguageTag ScopedName)
 dtlang = 
     (char '@' *> (Left <$> language))
     <|> (string "^^" *> (Right <$> uriref))
 
-language :: NTParser ScopedName
+-- Note that toLangTag may fail since it does some extra
+-- validation not done by the parser (mainly on the length of the
+-- primary and secondary tags).
+--
+-- NOTE: This parser does not accept multiple secondary tags which RFC3066
+-- does.
+--
+language :: NTParser LanguageTag
 language = do
-  h <- many1Satisfy isaz
-  mt <- optional ( L.cons <$> char '-' <*> many1Satisfy (\c -> isaz c || is09 c) )
-  return $ langName $ L.toStrict $ L.append h $ fromMaybe L.empty mt
+    h <- many1Satisfy isaz
+    mt <- optional ( L.cons <$> char '-' <*> many1Satisfy (\c -> isaz c || is09 c) )
+    let lbl = L.toStrict $ L.append h $ fromMaybe L.empty mt
+    case toLangTag lbl of
+        Just lt -> return lt
+        _ -> fail ("Invalid language tag: " ++ T.unpack lbl) -- should this be failBad?
 
 {-
 String handling: 

src/Swish/RDF/RDFGraph.hs

 --      This might otherwise be handled by @Maybe (RDFLabel g)@.
 --
 -- Prior to version @0.7.0.0@, literals were represented by a
--- single constructor, @Lit@, with an optional argument.
+-- single constructor, @Lit@, with an optional argument. Language
+-- codes for literals was also stored as a 'ScopedName' rather than
+-- as a 'LanguageTag'.
 --
 data RDFLabel =
       Res ScopedName                    -- ^ resource
     | Lit T.Text                        -- ^ a plain literal
-    | LangLit T.Text ScopedName         -- ^ a literal with an associated language
+    | LangLit T.Text LanguageTag        -- ^ a literal with an associated language
     | TypedLit T.Text ScopedName        -- ^ a literal with an associated data type
     | Blank String                      -- ^ blank node
     | Var String                        -- ^ variable (not used in ordinary graphs)
     | NoNode                            -- ^ no node  (not used in ordinary graphs)
 
--- TODO: LangLit should use a language encoding type rather than a scoped name.
-
-langCompare :: ScopedName -> ScopedName -> Bool
-langCompare = (==) `on` (T.toLower . langTag)
-
 -- | Define equality of nodes possibly based on different graph types.
 --
 -- The equality of literals is taken from section 6.5.1 ("Literal
     Var v1   == Var v2   = v1 == v2
 
     Lit s1         == Lit s2         = s1 == s2
-    LangLit s1 t1  == LangLit s2 t2  = s1 == s2 && langCompare t1 t2
+    LangLit s1 l1  == LangLit s2 l2  = s1 == s2 && l1 == l2
     TypedLit s1 t1 == TypedLit s2 t2 = s1 == s2 && t1 == t2
 
     _  == _ = False
 instance Show RDFLabel where
     show (Res sn)           = show sn
     show (Lit st)           = quote1Str st
-    show (LangLit st lang)  = quote1Str st ++ "@"  ++ T.unpack (langTag lang)
+    show (LangLit st lang)  = quote1Str st ++ "@"  ++ T.unpack (fromLangTag lang)
     show (TypedLit st dtype) 
         | dtype `elem` [xsdBoolean, xsdDouble, xsdDecimal, xsdInteger] = T.unpack st
         | otherwise  = quote1Str st ++ "^^" ++ show dtype
 showCanon :: RDFLabel -> String
 showCanon (Res sn)           = "<"++show (getScopedNameURI sn)++">"
 showCanon (Lit st)           = show st
-showCanon (LangLit st lang)  = quote1Str st ++ "@"  ++ T.unpack (langTag lang)
+showCanon (LangLit st lang)  = quote1Str st ++ "@"  ++ T.unpack (fromLangTag lang)
 showCanon (TypedLit st dt)   = quote1Str st ++ "^^" ++ show (getScopedNameURI dt)
 showCanon s                  = show s
 

src/Swish/RDF/TurtleFormatter.hs

   , resRdfFirst, resRdfRest, resRdfNil
   )
 
-import Swish.RDF.Vocabulary (
-  langTag 
-  , rdfType
-  , rdfNil
-  , xsdBoolean, xsdDecimal, xsdInteger, xsdDouble 
-  )
+import Swish.RDF.Vocabulary ( fromLangTag 
+                            , rdfType
+                            , rdfNil
+                            , xsdBoolean, xsdDecimal, xsdInteger, xsdDouble 
+                            )
 
 import Swish.RDF.GraphClass (Arc(..))
 
 -- we just convert E to e for now.      
 --      
 formatLabel _ (Lit lit) = return $ quoteText lit
-formatLabel _ (LangLit lit lcode) = return $ quoteText lit `mappend` "@" `mappend` B.fromText (langTag lcode)
+formatLabel _ (LangLit lit lcode) = return $ quoteText lit `mappend` "@" `mappend` B.fromText (fromLangTag lcode)
 formatLabel _ (TypedLit lit dtype)
     | dtype == xsdDouble = return $ B.fromText $ T.toLower lit
     | dtype `elem` [xsdBoolean, xsdDecimal, xsdInteger] = return $ B.fromText lit

src/Swish/RDF/TurtleParser.hs

     )
 
 import Swish.RDF.Vocabulary
-    ( langName
+    ( LanguageTag
+    , toLangTag
     , rdfType
     , rdfFirst, rdfRest, rdfNil
     , xsdBoolean, xsdInteger, xsdDecimal, xsdDouble
 I am ignoring the BASE and PREFIX lines here as they don't make sense to me.
 -}
 
-_langTag :: TurtleParser ScopedName
+-- Note that toLangTag may fail since it does some extra
+-- validation not done by the parser (mainly on the length of the
+-- primary and secondary tags).
+--
+-- NOTE: This parser does not accept multiple secondary tags which RFC3066
+-- does.
+--
+_langTag :: TurtleParser LanguageTag
 _langTag = do
-  ichar '@'
-  h <- many1Satisfy isaZ
-  mt <- optional (L.cons <$> char '-' <*> many1Satisfy isaZ09)
-  return $ langName $ L.toStrict $ L.append h (fromMaybe L.empty mt)
+    ichar '@'
+    h <- many1Satisfy isaZ
+    mt <- optional (L.cons <$> char '-' <*> many1Satisfy isaZ09)
+    let lbl = L.toStrict $ L.append h $ fromMaybe L.empty mt
+    case toLangTag lbl of
+        Just lt -> return lt
+        _ -> fail ("Invalid language tag: " ++ T.unpack lbl) -- should this be failBad?
   
 {-
 [77s] <INTEGER> ::= [0-9]+ 

src/Swish/RDF/Vocabulary.hs

     , namespaceDAML
     , namespaceDefault
     , namespaceSwish 
-    , namespaceLang
+
     -- ** RDF rules                                     
     -- | The namespaces refer to RDF rules and axioms.                                     
     , scopeRDF
     , scopeRDFS
     , scopeRDFD
+
+    -- * Language tags
+    --
+    -- | Support for language tags that follow RFC 3066.
+    -- 
+    -- This replaces the use of @ScopedName@ and @langName@, @langTag@,
+    -- and @isLang@ in versions prior to @0.7.0.0@.
+    --
+    , LanguageTag
+    , toLangTag
+    , fromLangTag
     
     -- * Miscellaneous routines
-    , langName, langTag, isLang
     , swishName
-      
-    -- * Miscellaneous     
     , rdfdGeneralRestriction
     , rdfdOnProperties, rdfdConstraint, rdfdMaxCardinality
     , logImplies
 import Swish.RDF.Vocabulary.OWL
 import Swish.RDF.Vocabulary.XSD
 
-import Swish.Utils.Namespace (Namespace, makeNamespace, ScopedName, getScopeLocal, getScopeNamespace, makeNSScopedName)
+import Swish.Utils.Namespace (Namespace, makeNamespace, ScopedName, makeNSScopedName)
 
+import Data.Char (isDigit)
 import Data.Monoid (mappend, mconcat)
 import Data.Maybe (fromMaybe)
 import Network.URI (URI, parseURI)
 namespaceDefault :: Namespace
 namespaceDefault = toNSU "default" namespaceDefaultURI
 
--- | Maps @lang@ to @http:\/\/id.ninebynine.org\/2003\/Swish\/Lang\/@.
-namespaceLang :: Namespace
-namespaceLang    = toNSU "lang"   namespaceLangURI
-
-
 tU :: String -> URI
 tU = fromMaybe (error "Internal error processing namespace URI") . parseURI
 
 namespaceRDFDURI, 
   namespaceLOGURI,
   namespaceSwishURI, 
-  namespaceLangURI, namespaceDefaultURI :: URI
+  namespaceDefaultURI :: URI
 namespaceRDFDURI  = tU "http://id.ninebynine.org/2003/rdfext/rdfd#"
 namespaceLOGURI   = tU "http://www.w3.org/2000/10/swap/log#"
 namespaceSwishURI = tU "http://id.ninebynine.org/2003/Swish/"
-namespaceLangURI  = tU "http://id.ninebynine.org/2003/Swish/Lang/" -- To be replaced by urn:ietf:params:lang?  
 namespaceDefaultURI = tU "http://id.ninebynine.org/default/"
 
 -- | Convert a local name to a scoped name in the @swish@ namespace (`namespaceSwish`).
 --  Fortunately, they do not currently need to appear in Notation3 as
 --  distinct labels (but future developments may change that).
 
--- | Convert the label to a scoped name in the @lang@ namespace (`namespaceLang`).
-langName :: 
-  T.Text  -- ^ The lower-case version of this label is used.
-  -> ScopedName
-langName = makeNSScopedName namespaceLang . T.toLower
+-- | Represent the language tag for a literal string, following
+-- RFC 3066 <http://www.ietf.org/rfc/rfc3066.txt>.
+--
+-- Use 'toLangTag' to create a tag and 'fromLangTag' to
+-- convert back. The only guarantee is that
+--
+-- > (fromLangTag . toLangTag) lt == T.toLower lt
+--
+data LanguageTag = 
+    LanguageTag 
+    T.Text       -- ^ full value
+    T.Text       -- ^ primary tag
+    [T.Text]     -- ^ sub tags
 
--- | Get the name of the language tag (note that the result is
--- only guaranteed to be semantically valid if 'isLang' returns @True@
--- but that there is no enforcement of this requirement).
-langTag :: ScopedName -> T.Text
-langTag = getScopeLocal
+instance Show LanguageTag where
+    show = T.unpack . fromLangTag
 
--- | Is the scoped name in the `namespaceLang` namespace?
-isLang :: ScopedName -> Bool
-isLang sname = getScopeNamespace sname == namespaceLang
+-- | The equality test matches on the full definition, so
+-- @en-GB@ does not match @en@.
+instance Eq LanguageTag where
+    LanguageTag f1 _ _ == LanguageTag f2 _ _ = f1 == f2
+
+-- | Create a 'LanguageTag' element from the label.
+-- 
+-- Valid tags follow the ABNF from RCF 3066, which is
+--
+--    Language-Tag = Primary-subtag *( "-" Subtag )
+--    Primary-subtag = 1*8ALPHA
+--    Subtag = 1*8(ALPHA / DIGIT)
+--
+-- There are no checks that the primary or secondary sub tag
+-- values are defined in any standard, such as ISO 639,
+-- or obey any other syntactical restriction than given above.
+-- 
+toLangTag :: T.Text -> Maybe LanguageTag
+toLangTag lbl = 
+    let tag = T.toLower lbl
+        toks = T.split (=='-') tag
+    in if all (\s -> let l = T.length s in l > 0 && l < 9) toks
+       then let primtag : subtags = toks
+                isVChar c = c >= 'a' && c <= 'z'
+            in if T.all isVChar primtag && all (T.all (\c -> isVChar c || isDigit c)) subtags
+               then Just $ LanguageTag tag primtag subtags
+               else Nothing
+       else Nothing
+
+-- | Convert a language tag back into text form.
+fromLangTag :: LanguageTag -> T.Text
+fromLangTag (LanguageTag f _ _) = f
+
+-- TODO: use Language Range (section 2.5 of RFC 3066) to support
+-- language comparison
 
 ------------------------------------------------------------
 --  Define namespaces for RDF rules, axioms, etc
   Changes in version @0.7.0.0@:
   .
   * RDF literals are now stored using the @Lit@, @LangLit@, or @TypedLit@ constructors
-  (@RDFLabel@) rather than using just @Lit@.
+  (@RDFLabel@) rather than using just @Lit@. Language codes are now represented
+  by @Swish.RDF.Vocabulary.LanguageTag@ rather than as a @ScopedName@.
   .
   * Removed @mkTypedLit@ from @Swish.RDF.RDFParser@; use
   @Swish.RDF.RDFDatatype.makeDataTypedLiteral@ instead.

tests/N3FormatterTest.hs

 
 import Swish.RDF.GraphClass (Arc, arc)
 
-import Swish.RDF.Vocabulary (langName, namespaceRDF, namespaceXSD)
+import Swish.RDF.Vocabulary (toLangTag, namespaceRDF, namespaceXSD)
 
 import Network.URI (URI, parseURI)
 
 l14 = toL l14txt
 
 lfr, lfoobar :: RDFLabel
-lfr = LangLit "chat et chien" (langName "fr")
+lfr = LangLit "chat et chien" (fromJust $ toLangTag "fr")
 lfoobar = TypedLit "foo bar" (makeNSScopedName base1 "o1")
   
 f1, f2 :: RDFLabel

tests/N3ParserTest.hs

 
 import Swish.RDF.Vocabulary
     ( namespaceRDF
-    , langName
+    , toLangTag
     , rdfXMLLiteral
     , xsdBoolean 
     , xsdInteger
 import Network.URI (URI, nullURI, parseURIReference)
 
 import Data.Monoid (Monoid(..))
-import Data.Maybe (fromMaybe)
+import Data.Maybe (fromMaybe, fromJust)
 import Data.List (intercalate)
 
 import qualified Data.Text as T
 l3 = Lit "l3--\r\"'\\--\x0020\&--\x00A0\&--"
 
 lfr, lxml, lfrxml :: RDFLabel
-lfr    = LangLit "chat"           $ langName "fr"
+lfr    = LangLit "chat"           $ fromJust $ toLangTag "fr"
 lxml   = TypedLit "<br/>"         rdfXMLLiteral
 lfrxml = TypedLit "<em>chat</em>" rdfXMLLiteral
 
 
 -- import Swish.Utils.Namespace (makeURIScopedName)
 
-import Swish.RDF.Vocabulary (langName, rdfXMLLiteral)
+import Swish.RDF.Vocabulary (toLangTag, rdfXMLLiteral)
 
 import Swish.RDF.GraphClass (arc)
 
 
 import qualified Data.Text.Lazy as T
 
+import Data.Maybe (fromJust)
 import TestHelpers (runTestSuite)
 
 ------------------------------------------------------------
 l4 = Lit "l4 \\"
 
 lfr, lgben, lxml1, lxml2 :: RDFLabel
-lfr    = LangLit "chat"           $ langName "fr"
-lgben  = LangLit "football"       $ langName "en-gb"
+lfr    = LangLit "chat"           $ fromJust $ toLangTag "fr"
+lgben  = LangLit "football"       $ fromJust $ toLangTag "en-gb"
 lxml1  = TypedLit "<br/>"         rdfXMLLiteral
 lxml2  = TypedLit "<em>chat</em>" rdfXMLLiteral
 

tests/RDFGraphTest.hs

 
 import Swish.RDF.Vocabulary
   ( namespaceRDF
-  , langName 
+  , LanguageTag
+  , toLangTag 
   , rdfXMLLiteral
   , xsdBoolean
   , xsdInteger
 --  Test language tag comparisons
 ------------------------------------------------------------
 
-type Lang = Maybe ScopedName
+type Lang = Maybe LanguageTag
+
+toLT :: T.Text -> LanguageTag
+toLT = fromJust . toLangTag
 
 lt0, lt1, lt2, lt3, lt4, lt5, lt6,
   lt7, lt8 :: Lang
 lt0 = Nothing
-lt1 = Just (langName "en")
-lt2 = Just (langName "EN")
-lt3 = Just (langName "fr")
-lt4 = Just (langName "FR")
-lt5 = Just (langName "en-us")
-lt6 = Just (langName "en-US")
-lt7 = Just (langName "EN-us")
-lt8 = Just (langName "EN-US")
+lt1 = Just (toLT "en")
+lt2 = Just (toLT "EN")
+lt3 = Just (toLT "fr")
+lt4 = Just (toLT "FR")
+lt5 = Just (toLT "en-us")
+lt6 = Just (toLT "en-US")
+lt7 = Just (toLT "EN-us")
+lt8 = Just (toLT "EN-US")
 
 langlist :: [(String, Lang)]
 langlist =
 l1, l2, l2gb, l3, l4, l5, l6, l7, l8,
   l9, l10, l11, l12 :: RDFLabel
 l1   = "l1" -- use IsString instance
-l2   = LangLit "l2"  (langName "en")
-l2gb = LangLit "l2"  (langName "en-gb")
-l3   = LangLit "l2"  (langName "fr")
+l2   = LangLit "l2"  (toLT "en")
+l2gb = LangLit "l2"  (toLT "en-gb")
+l3   = LangLit "l2"  (toLT "fr")
 l4   = TypedLit "l4"  qb1t1    
 l5   = TypedLit "l4"  qb1t1           
 l6   = TypedLit "l4"  qb1t1           

tests/RDFQueryTest.hs

     )
 
 import Swish.Utils.Namespace (getNamespaceURI, ScopedName, makeScopedName)
-import Swish.RDF.Vocabulary (namespaceRDF, langName, swishName, rdfType, rdfXMLLiteral)
+import Swish.RDF.Vocabulary (namespaceRDF, toLangTag, swishName, rdfType, rdfXMLLiteral)
 import Swish.RDF.N3Parser (parseN3)
 
 import Test.HUnit ( Test(TestList) )
 
 l_1, l_2, l_3, l_4, l_5 :: RDFLabel
 l_1   = Lit "l1"
-l_2   = LangLit "l2" $ langName "fr"
+l_2   = LangLit "l2" $ fromJust $ toLangTag "fr"
 l_3   = TypedLit "l3" q_dattyp
 l_4   = TypedLit "l4" q_dattyp
 l_5   = TypedLit "l5" rdfXMLLiteral
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.