Source

text / scripts / CaseFolding.hs

The default branch has multiple heads

-- This script processes the following source file:
--
--   http://unicode.org/Public/UNIDATA/CaseFolding.txt

module CaseFolding
    (
      Fold(..)
    , parseCF
    , mapCF
    ) where

import Arsec

data Fold = Fold {
      code :: Char
    , status :: Char
    , mapping :: [Char]
    , name :: String
    } deriving (Eq, Ord, Show)

entries :: Parser [Fold]
entries = many comment *> many (entry <* many comment)
  where
    entry = Fold <$> unichar <* semi
                 <*> oneOf "CFST" <* semi
                 <*> unichars
                 <*> (string "# " *> manyTill anyToken (char '\n'))

parseCF :: FilePath -> IO (Either ParseError [Fold])
parseCF name = parse entries name <$> readFile name

mapCF :: [Fold] -> [String]
mapCF ms = typ ++ (map nice . filter p $ ms) ++ [last]
  where
    typ = ["foldMapping :: forall s. Char -> s -> Step (CC s) Char"
           ,"{-# INLINE foldMapping #-}"]
    last = "foldMapping c s = Yield (toLower c) (CC s '\\0' '\\0')"
    nice c = "-- " ++ name c ++ "\n" ++
             "foldMapping " ++ showC (code c) ++ " s = Yield " ++ x ++ " (CC s " ++ y ++ " " ++ z ++ ")"
       where [x,y,z] = (map showC . take 3) (mapping c ++ repeat '\0')
    p f = status f `elem` "CF" &&
          mapping f /= [toLower (code f)]
Tip: Filter by directory path e.g. /media app.js to search for public/media/app.js.
Tip: Use camelCasing e.g. ProjME to search for ProjectModifiedEvent.java.
Tip: Filter by extension type e.g. /repo .js to search for all .js files in the /repo directory.
Tip: Separate your search with spaces e.g. /ssh pom.xml to search for src/ssh/pom.xml.
Tip: Use ↑ and ↓ arrow keys to navigate and return to view the file.
Tip: You can also navigate files with Ctrl+j (next) and Ctrl+k (previous) and view the file with Ctrl+o.
Tip: You can also navigate files with Alt+j (next) and Alt+k (previous) and view the file with Alt+o.