Commits

Bryan O'Sullivan committed 44bb247

Record leading comments from Unicode data files

This lets us easily see which version of Unicode we're up to date with.

Comments (0)

Files changed (4)

 module Arsec
     (
-      comment
+      Comment
+    , comment
     , semi
     , showC
     , unichar
 import Text.ParserCombinators.Parsec.Error
 import Text.ParserCombinators.Parsec.Prim hiding ((<|>), many)
 
+type Comment = String
+
 unichar :: Parser Char
 unichar = chr . fst . head . readHex <$> many1 hexDigit
 
 semi :: Parser ()
 semi = char ';' *> spaces *> pure ()
 
-comment :: Parser String
+comment :: Parser Comment
 comment = (char '#' *> manyTill anyToken (char '\n')) <|> string "\n"
 
 showC :: Char -> String

scripts/CaseFolding.hs

 
 module CaseFolding
     (
-      Fold(..)
+      CaseFolding(..)
+    , Fold(..)
     , parseCF
     , mapCF
     ) where
     , name :: String
     } deriving (Eq, Ord, Show)
 
-entries :: Parser [Fold]
-entries = many comment *> many (entry <* many comment)
+data CaseFolding = CF { cfComments :: [Comment], cfFolding :: [Fold] }
+                 deriving (Show)
+
+entries :: Parser CaseFolding
+entries = CF <$> many comment <*> many (entry <* many comment)
   where
     entry = Fold <$> unichar <* semi
                  <*> oneOf "CFST" <* semi
                  <*> unichars
                  <*> (string "# " *> manyTill anyToken (char '\n'))
 
-parseCF :: FilePath -> IO (Either ParseError [Fold])
+parseCF :: FilePath -> IO (Either ParseError CaseFolding)
 parseCF name = parse entries name <$> readFile name
 
-mapCF :: [Fold] -> [String]
-mapCF ms = typ ++ (map nice . filter p $ ms) ++ [last]
+mapCF :: CaseFolding -> [String]
+mapCF (CF _ ms) = typ ++ (map nice . filter p $ ms) ++ [last]
   where
     typ = ["foldMapping :: forall s. Char -> s -> Step (CC s) Char"
            ,"{-# INLINE foldMapping #-}"]

scripts/CaseMapping.hs

            Left err -> print err >> return undefined
            Right ms -> return ms
   h <- openFile oname WriteMode
-  mapM_ (hPutStrLn h) ["{-# LANGUAGE Rank2Types #-}"
+  let comments = map ("--" ++) $
+                 take 2 (cfComments cfs) ++ take 2 (scComments scs)
+  mapM_ (hPutStrLn h) $
+                      ["{-# LANGUAGE Rank2Types #-}"
                       ,"-- AUTOMATICALLY GENERATED - DO NOT EDIT"
-                      ,"-- Generated by scripts/SpecialCasing.hs"
+                      ,"-- Generated by scripts/SpecialCasing.hs"] ++
+                      comments ++
+                      [""
                       ,"module Data.Text.Fusion.CaseMapping where"
                       ,"import Data.Char"
                       ,"import Data.Text.Fusion.Internal"

scripts/SpecialCasing.hs

 
 module SpecialCasing
     (
-      Case(..)
+      SpecialCasing(..)
+    , Case(..)
     , parseSC
     , mapSC
     ) where
 
 import Arsec
 
+data SpecialCasing = SC { scComments :: [Comment], scCasing :: [Case] }
+                   deriving (Show)
+
 data Case = Case {
       code :: Char
     , lower :: [Char]
     , name :: String
     } deriving (Eq, Ord, Show)
 
-entries :: Parser [Case]
-entries = many comment *> many (entry <* many comment)
+entries :: Parser SpecialCasing
+entries = SC <$> many comment <*> many (entry <* many comment)
   where
     entry = Case <$> unichar <* semi
                  <*> unichars
                  <*> manyTill anyToken (string "# ")
                  <*> manyTill anyToken (char '\n')
 
-parseSC :: FilePath -> IO (Either ParseError [Case])
+parseSC :: FilePath -> IO (Either ParseError SpecialCasing)
 parseSC name = parse entries name <$> readFile name
 
-mapSC :: String -> (Case -> String) -> (Char -> Char) -> [Case] -> [String]
-mapSC which access twiddle ms = typ ++ (map nice . filter p $ ms) ++ [last]
+mapSC :: String -> (Case -> String) -> (Char -> Char) -> SpecialCasing
+         -> [String]
+mapSC which access twiddle (SC _ ms) =
+    typ ++ (map nice . filter p $ ms) ++ [last]
   where
     typ = [which ++ "Mapping :: forall s. Char -> s -> Step (CC s) Char"
            ,"{-# INLINE " ++ which ++ "Mapping #-}"]