diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index c6c7fc0..e48ce89 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -59,8 +59,16 @@ jobs: restore-keys: | ${{ runner.os }}-cabal-cache-${{ env.CURR_MONTH }}-${{ matrix.ghc_version }}- - - name: Build + Test - run: cabal test + name: Build + run: cabal build + - + name: Install dotslash + run: > + curl -fsSL https://github.com/facebook/dotslash/releases/latest/download/dotslash-ubuntu-22.04.$(uname -m).tar.gz + | tar xzf - -C /usr/local/bin/ + - + name: Test + run: cabal exec cabal test lint: runs-on: ubuntu-latest diff --git a/.gitignore b/.gitignore index 2d8bfce..c9c6abe 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ .DS_Store dist-newstyle/ +cabal.project.local* diff --git a/CHANGELOG.md b/CHANGELOG.md index fd05e1e..d3cd63f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,6 @@ ## Unreleased +* Implement KDL v2 parser * Implement `KDL.render`, which is format-preserving * Improve rendering parse errors * Include filepath in error messages when `decodeFileWith` fails diff --git a/kdl-hs.cabal b/kdl-hs.cabal index 72cf410..8162355 100644 --- a/kdl-hs.cabal +++ b/kdl-hs.cabal @@ -15,6 +15,8 @@ build-type: Simple extra-source-files: README.md CHANGELOG.md + test/KDL/__snapshots__/DecoderSpec.snap.md + test/KDL/__snapshots__/ParserSpec.snap.md source-repository head type: git @@ -34,14 +36,9 @@ library KDL.Decoder.Monad KDL.Decoder.Schema KDL.Parser + KDL.Parser.Internal KDL.Render KDL.Types - other-modules: - KDL.Parser.Hustle - KDL.Parser.Hustle.Formatter - KDL.Parser.Hustle.Internal - KDL.Parser.Hustle.Parser - KDL.Parser.Hustle.Types build-depends: base < 5 , containers @@ -53,10 +50,25 @@ library default-language: GHC2021 ghc-options: -Wall -Wcompat +executable kdl-hs-test-decoder + main-is: test/kdl-hs-test-decoder.hs + build-depends: + base < 5 + , aeson + , bytestring + , containers + , kdl-hs + , scientific + , text + default-language: GHC2021 + ghc-options: -Wall -Wcompat + test-suite kdl-tests type: exitcode-stdio-1.0 ghc-options: -F -pgmF=skeletest-preprocessor - build-tool-depends: skeletest:skeletest-preprocessor + build-tool-depends: + , skeletest:skeletest-preprocessor + , kdl-hs:kdl-hs-test-decoder hs-source-dirs: test main-is: Main.hs other-modules: @@ -65,12 +77,17 @@ test-suite kdl-tests KDL.Decoder.ArrowSpec KDL.Decoder.MonadSpec KDL.ParserSpec + KDL.TestUtils.AST KDL.TestUtils.Error build-depends: base , containers + , directory , filepath , kdl-hs + , pretty-show + , process + , scientific , skeletest , temporary , text diff --git a/scripts/kdl-test b/scripts/kdl-test new file mode 100755 index 0000000..1790873 --- /dev/null +++ b/scripts/kdl-test @@ -0,0 +1,54 @@ +#!/usr/bin/env dotslash +{ + "name": "kdl-test-0.2.0", + "platforms": { + "linux-x86_64": { + "size": 583741, + "hash": "sha256", + "digest": "1627ea50594c7c322ed5c7fb0d0063b2ac20a7a9460acc9363fa1777c92364c3", + "format": "tar.gz", + "path": "kdl-test", + "providers": [ + { + "url": "https://github.com/brandonchinn178/kdl-test/releases/download/v0.2.0/kdl-test-0.2.0-linux-x86_64.tar.gz" + } + ] + }, + "linux-aarch64": { + "size": 568646, + "hash": "sha256", + "digest": "f3ecbdb2225b6abc0d69a451be76eca4d095d835ee2e7830a880b4bfa051e6ee", + "format": "tar.gz", + "path": "kdl-test", + "providers": [ + { + "url": "https://github.com/brandonchinn178/kdl-test/releases/download/v0.2.0/kdl-test-0.2.0-linux-arm64.tar.gz" + } + ] + }, + "macos-x86_64": { + "size": 555773, + "hash": "sha256", + "digest": "2d66b5409889992554dbd9bf04c2817552b2c0416a2793204e2e4eea06bc4da4", + "format": "tar.gz", + "path": "kdl-test", + "providers": [ + { + "url": "https://github.com/brandonchinn178/kdl-test/releases/download/v0.2.0/kdl-test-0.2.0-darwin-x86_64.tar.gz" + } + ] + }, + "macos-aarch64": { + "size": 529715, + "hash": "sha256", + "digest": "abfcade1d1ce02810dc155b95ef0720e9c679d62cf50c7ab5400741909978eb2", + "format": "tar.gz", + "path": "kdl-test", + "providers": [ + { + "url": "https://github.com/brandonchinn178/kdl-test/releases/download/v0.2.0/kdl-test-0.2.0-darwin-arm64.tar.gz" + } + ] + } + } +} diff --git a/src/KDL/Decoder/Internal/Error.hs b/src/KDL/Decoder/Internal/Error.hs index 8755691..8b68cf6 100644 --- a/src/KDL/Decoder/Internal/Error.hs +++ b/src/KDL/Decoder/Internal/Error.hs @@ -67,22 +67,22 @@ data BaseDecodeError renderDecodeError :: DecodeError -> Text renderDecodeError decodeError = Text.intercalate "\n" - . addPath decodeError.filepath - . map renderCtxErrors + . concatMap renderCtxErrors . groupCtxErrors $ decodeError.errors where -- Group errors with the same contexts together groupCtxErrors es = Map.toAscList $ Map.fromListWith (<>) [(ctx, [e]) | (ctx, e) <- es] - addPath = \case - Nothing -> id - Just fp -> let msg = "Failed to decode " <> Text.pack fp <> ":" in (msg :) + addPath = + case decodeError.filepath of + Nothing -> id + Just fp -> let msg = "Failed to decode " <> Text.pack fp <> ":" in (msg :) renderCtxErrors = \case -- Special case parse errors, which shouldn't have a context - (_, [DecodeError_ParseError msg]) -> msg - (ctx, errs) -> Text.intercalate "\n" $ ("At: " <> renderCtxItems ctx) : renderErrors errs + (_, [DecodeError_ParseError msg]) -> [msg] + (ctx, errs) -> addPath $ ("At: " <> renderCtxItems ctx) : renderErrors errs renderCtxItems items | null items = "" diff --git a/src/KDL/Parser.hs b/src/KDL/Parser.hs index 85556b8..b830e0d 100644 --- a/src/KDL/Parser.hs +++ b/src/KDL/Parser.hs @@ -1,91 +1,28 @@ {-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE RecordWildCards #-} +{-| +Implement the v2 parser specified at: https://kdl.dev/spec/#name-full-grammar +-} module KDL.Parser ( parse, parseFile, ) where -import Data.Map qualified as Map +import Data.Bifunctor (first) import Data.Text (Text) import Data.Text qualified as Text import Data.Text.IO qualified as Text -import KDL.Parser.Hustle qualified as Hustle -import KDL.Types ( - Ann (..), - Document, - Entry (..), - Identifier (..), - Node (..), - NodeList (..), - Value (..), - ValueData (..), - ValueFormat (..), - ) +import KDL.Parser.Internal (p_document) +import KDL.Types (Document) +import Text.Megaparsec qualified as Megaparsec --- TODO: Implement our own parser that implements the v2.0.0 spec + preserves formatting and comments parse :: Text -> Either Text Document -parse input = - case Hustle.parse Hustle.document "" input of - Left e -> Left . Text.strip . Text.pack . Hustle.errorBundlePretty $ e - Right (Hustle.Document nodes) -> Right $ fromNodes nodes - where - fromNodes nodes = - NodeList - { nodes = map fromNode nodes - , format = Nothing - } +parse = parse' "" - fromAnn identifier = - Ann - { identifier = fromIdentifier identifier - , format = Nothing - } - - fromNode Hustle.Node{..} = - Node - { ann = fromAnn <$> nodeAnn - , name = fromIdentifier nodeName - , entries = map fromArgEntry nodeArgs <> map fromPropEntry (Map.toList nodeProps) - , children = Just $ fromNodes nodeChildren - , format = Nothing - } - - fromArgEntry v = - Entry - { name = Nothing - , value = fromValue v - , format = Nothing - } - - fromPropEntry (name, v) = - Entry - { name = Just $ fromIdentifier name - , value = fromValue v - , format = Nothing - } - - fromValue Hustle.Value{..} = - Value - { ann = fromAnn <$> valueAnn - , data_ = - case valueExp of - Hustle.StringValue s -> Text s - Hustle.IntegerValue x -> Number (fromInteger x) - Hustle.SciValue x -> Number x - Hustle.BooleanValue x -> Bool x - Hustle.NullValue -> Null - , format = - case valueExp of - Hustle.IntegerValue x -> Just ValueFormat{repr = Text.pack $ show x} - _ -> Nothing - } - - fromIdentifier (Hustle.Identifier s) = - Identifier - { value = s - , format = Nothing - } +parse' :: FilePath -> Text -> Either Text Document +parse' fp input = + first (Text.strip . Text.pack . Megaparsec.errorBundlePretty) $ + Megaparsec.parse p_document fp input parseFile :: FilePath -> IO (Either Text Document) -parseFile = fmap parse . Text.readFile +parseFile fp = parse' fp <$> Text.readFile fp diff --git a/src/KDL/Parser/Hustle.hs b/src/KDL/Parser/Hustle.hs deleted file mode 100644 index 45f9b89..0000000 --- a/src/KDL/Parser/Hustle.hs +++ /dev/null @@ -1,28 +0,0 @@ -{- FOURMOLU_DISABLE -} - -{- | Vendered from https://github.com/fuzzypixelz/hustle -} -module KDL.Parser.Hustle - ( Parser - , Document(..) - , Node(..) - , Value(..) - , ValueType(..) - , Identifier(..) - , pretty - , document - , parse - , errorBundlePretty - ) where - -import KDL.Parser.Hustle.Formatter ( Pretty(pretty) ) -import KDL.Parser.Hustle.Parser ( document ) -import KDL.Parser.Hustle.Types ( Document(..) - , Identifier(..) - , Node(..) - , Parser - , Value(..) - , ValueType(..) - ) -import Text.Megaparsec ( errorBundlePretty - , parse - ) diff --git a/src/KDL/Parser/Hustle/Formatter.hs b/src/KDL/Parser/Hustle/Formatter.hs deleted file mode 100644 index e4d386d..0000000 --- a/src/KDL/Parser/Hustle/Formatter.hs +++ /dev/null @@ -1,83 +0,0 @@ -{- FOURMOLU_DISABLE -} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE FlexibleInstances #-} -{-# OPTIONS_GHC -Wno-orphans #-} - -{- | Vendered from https://github.com/fuzzypixelz/hustle -} -module KDL.Parser.Hustle.Formatter - ( Pretty(pretty) - ) where - -import Data.Map ( Map ) -import qualified Data.Map.Strict as Map -import Data.Maybe ( catMaybes ) -import Data.Scientific ( Scientific ) -import qualified Data.Text as T -import KDL.Parser.Hustle.Internal ( escChar - , match - ) -import KDL.Parser.Hustle.Parser ( identifier ) -import KDL.Parser.Hustle.Types -import Prettyprinter ( Pretty(pretty) - , braces - , dquotes - , enclose - , hsep - , nest - , parens - , viaShow - , vsep - ) - -instance Pretty Scientific where - pretty = viaShow - -instance Pretty Identifier where - pretty (Identifier i) = - if match identifier i then pretty i else dquotes (pretty i) - -instance Pretty Value where - pretty v = vann <> vexp - where - vann = case valueAnn v of - Nothing -> "" - Just a -> parens (pretty a) - vexp = case valueExp v of - StringValue s -> dquotes . pretty $ T.concatMap escChar s - IntegerValue i -> pretty i - SciValue s -> pretty s - BooleanValue b -> if b then "true" else "false" - NullValue -> "null" - -instance Pretty (Map Identifier Value) where - pretty ps = hsep . Map.elems $ Map.mapWithKey prop ps - where prop i v = pretty i <> "=" <> pretty v - -instance Pretty Node where - pretty n = hsep . catMaybes $ [nname, nargs, nprops, nchildren] - where - nann = case nodeAnn n of - Nothing -> "" - Just a -> parens (pretty a) - nname = Just $ nann <> pretty (nodeName n) - nargs = case nodeArgs n of - [] -> Nothing - nas -> Just . hsep . map pretty $ nas - nprops | nodeProps n == Map.empty = Nothing - | otherwise = Just (pretty (nodeProps n)) - nchildren = case nodeChildren n of - [] -> Nothing - ncs -> - Just - . nest 4 - . braces - . enclose "\n" (nest (-4) "\n") - . vsep - . map pretty - $ ncs - -instance Pretty Document where - pretty d = vsep (map pretty (docNodes d)) <> "\n" - -instance Show Document where - show d = show (pretty d) diff --git a/src/KDL/Parser/Hustle/Internal.hs b/src/KDL/Parser/Hustle/Internal.hs deleted file mode 100644 index fc2bad9..0000000 --- a/src/KDL/Parser/Hustle/Internal.hs +++ /dev/null @@ -1,96 +0,0 @@ -{- FOURMOLU_DISABLE -} -{-# LANGUAGE OverloadedStrings #-} - -{- | Vendered from https://github.com/fuzzypixelz/hustle -} -module KDL.Parser.Hustle.Internal where - -import Control.Monad ( void ) -import Data.Char ( digitToInt - , isDigit - ) -import Data.Either ( isRight ) -import Data.Scientific ( Scientific ) -import qualified Data.Scientific as Sci -import Data.Text ( Text ) -import qualified Data.Text as T -import KDL.Parser.Hustle.Types ( Parser ) -import Text.Megaparsec ( (<|>) - , MonadParsec - ( eof - , takeWhileP - , try - ) - , option - , runParser - , satisfy - ) -import Text.Megaparsec.Char ( char - , char' - , digitChar - , newline - ) -import qualified Text.Megaparsec.Char.Lexer as L - -signed :: Num a => Parser a -> Parser a -signed p = option id sign <*> p - where sign = (id <$ char '+') <|> (negate <$ char '-') - -lineComment :: Parser () -lineComment = L.skipLineComment "//" >> (void newline <|> eof) - -blockComment :: Parser () -blockComment = L.skipBlockCommentNested "/*" "*/" - -isBinDigit :: Char -> Bool -isBinDigit c = c `elem` ['0', '1'] - -data SP = SP Integer Int - -number :: Integer -> (Char -> Bool) -> Parser Integer -number b isNumDigit = mkNum . T.filter (/= '_') <$> digits - where - mkNum = T.foldl' step 0 - step a c = a * b + fromIntegral (digitToInt c) - digits = T.cons <$> satisfy isNumDigit <*> takeWhileP - (Just "digit") - (\c -> isNumDigit c || c == '_') - -decimal_ :: Parser Integer -decimal_ = number 10 isDigit - -scientific_ :: Parser Scientific -scientific_ = do - c' <- decimal_ - SP c e' <- dotDecimal_ c' - e <- option e' (try $ exponent_ e') - return (Sci.scientific c e) - -dotDecimal_ :: Integer -> Parser SP -dotDecimal_ c' = do - void (char '.') - let digits = T.cons <$> digitChar <*> takeWhileP - (Just "digit") - (\c -> isDigit c || c == '_') - let mkNum = T.foldl' step (SP c' 0) - step (SP a e') c = SP (a * 10 + fromIntegral (digitToInt c)) (e' - 1) - mkNum . T.filter (/= '_') <$> digits - -exponent_ :: Int -> Parser Int -exponent_ e' = do - void (char' 'e') - (+ e') <$> L.signed (return ()) (fromIntegral <$> decimal_) - -match :: Parser a -> Text -> Bool -match p t = isRight $ runParser (p >> eof) "" t - -escChar :: Char -> Text -escChar c = case c of - '\x08' -> "\\b" - '\x09' -> "\\t" - '\x0A' -> "\\n" - '\x0C' -> "\\f" - '\x0D' -> "\\r" - '\x22' -> "\\\"" - '\x2F' -> "\\/" - '\x5C' -> "\\\\" - _ -> T.singleton c diff --git a/src/KDL/Parser/Hustle/Parser.hs b/src/KDL/Parser/Hustle/Parser.hs deleted file mode 100644 index aa6e62f..0000000 --- a/src/KDL/Parser/Hustle/Parser.hs +++ /dev/null @@ -1,310 +0,0 @@ -{- FOURMOLU_DISABLE -} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE RecordWildCards #-} - -{- | Vendered from https://github.com/fuzzypixelz/hustle -} -module KDL.Parser.Hustle.Parser where - -import KDL.Parser.Hustle.Internal -import KDL.Parser.Hustle.Types - -import Control.Monad ( void ) -import Data.Char ( chr - , isHexDigit - , isOctDigit - , isDigit - , isSpace - ) -import qualified Data.Map.Strict as Map -import Data.Maybe ( catMaybes - , fromMaybe - , mapMaybe - , maybeToList - ) -import Data.Scientific ( Scientific ) -import Data.Text ( Text ) -import qualified Data.Text as T -import Text.Megaparsec ( () - , (<|>) - , MonadParsec(eof, label, try) - , anySingle - , between - , choice - , many - , manyTill - , noneOf - , optional - , satisfy - , some - ) -import Text.Megaparsec.Char ( char - , char' - , crlf - , newline - , string - ) - --- WHITESPACE - -escline :: Parser () -escline = - char '\\' >> many ws >> (try linebreak <|> lineComment) "Escape Line" - -linespace :: Parser () -linespace = try linebreak <|> try ws <|> try lineComment "Line Space" - -linebreak :: Parser () -linebreak = label "Newline" $ do - choice $ void crlf : map - void - [ char '\r' "carriage return" - , char '\n' "newline" - , char '\x85' "next line" - , char '\f' "form feed" - , char '\x2028' "line seperator" - , char '\x2029' "paragraph seperator" - ] - -ws :: Parser () -ws = bom <|> hspacechar <|> blockComment "Whitespace" - -bom :: Parser () -bom = void (char '\xFEFF') "BOM" - -hspacechar :: Parser () -hspacechar = label "Unicode Space" $ do - choice $ map - void - [ char '\x0009' "character tabulation" - , char '\x0020' "space" - , char '\x00A0' "bo-break space" - , char '\x1680' "ogham space mark" - , char '\x2000' "en quad" - , char '\x2001' "em quad" - , char '\x2002' "en space" - , char '\x2003' "em space" - , char '\x2004' "three-per-em space" - , char '\x2005' "four-per-em space" - , char '\x2006' "six-per-em space" - , char '\x2007' "figure space" - , char '\x2008' "punctuation space" - , char '\x2009' "thin space" - , char '\x200A' "hair space" - , char '\x202F' "narrow no-break space" - , char '\x205F' "medium mathmatical space" - , char '\x3000' "ideographic space" - ] - --- STRINGS - -anystring :: Parser Text -anystring = try unquotedstring <|> try rawstring <|> quotedstring - -unquotedstring :: Parser Text -unquotedstring = label "Unquoted String" $ do - c0 <- satisfy $ \c -> isValidChar c && not (isDigit c) && c /= '"' - rest <- many $ satisfy isValidChar - -- TODO: Forbid true, false, null, inf, -inf, nan, or "looks like a number" - pure $ T.pack (c0 : rest) - where - isValidChar c = not (isSpace c) && c `notElem` ("[]{}()\\/#\";=" :: [Char]) - -quotedstring :: Parser Text -quotedstring = label "Quoted String" $ do - T.concat <$> (char '"' *> manyTill character (char '"')) - -rawstring :: Parser Text -rawstring = label "Raw String" $ do - void (char 'r') - hs <- T.pack <$> many (char '#') - void (char '"') - s <- manyTill anySingle (string (T.cons '"' hs)) - return (T.pack s) - -character :: Parser Text -character = void (char '\\') *> escape <|> nonescape - -{- - As per the Haskell 2010 Language Report, - (https://www.haskell.org/onlinereport/haskell2010/haskellch2.html#x7-200002.6) - The '\/' Solidus escape isn't defined, so if we try to parse it directly the - compiler throws a lexical error. Hence this terribleness. --} -escape :: Parser Text -escape = - do - e <- choice - [ '\x08' <$ char 'b' - , '\x09' <$ char 't' - , '\x0A' <$ char 'n' - , '\x0C' <$ char 'f' - , '\x0D' <$ char 'r' - , '\x22' <$ char '\"' - , '\x2F' <$ char '/' - , '\x5C' <$ char '\\' - ] - return (T.singleton e) - <|> uescape - -uescape :: Parser Text -uescape = do - void (string "u{") - u <- fromInteger <$> number 16 isHexDigit - if u >= 0x10ffff - then fail "Exceeded Unicode code point limit." - else do - void (char '}') - let c = chr u - return (T.singleton c) - -nonescape :: Parser Text -nonescape = do - c <- noneOf ("\\\"" :: [Char]) - return (T.singleton c) - --- NUMBERS - -binary :: Parser Integer -binary = signed $ char '0' >> char' 'b' >> number 2 isBinDigit - -octal :: Parser Integer -octal = signed $ char '0' >> char 'o' >> number 8 isOctDigit - -hexadecimal :: Parser Integer -hexadecimal = signed $ char '0' >> char 'x' >> number 16 isHexDigit - -integer :: Parser Integer -integer = signed decimal_ - -scientific :: Parser Scientific -scientific = signed scientific_ - --- CONTENT - -name :: Parser Identifier -name = Identifier <$> (try anystring <|> identifier) - -identifier :: Parser Text -identifier = label "Identifier" $ do - i <- satisfy iichar - is <- many (satisfy ichar) - let result = T.pack (i : is) - case result of - "true" -> fail "keyword true in identifier" - "false" -> fail "keyword false in identifier" - "null" -> fail "keyword null in identifier" - _ -> return result - where - ichar c = - (c > '\x20') - && (c <= '\x10FFFF') - && (c `notElem` ("\\/(){}<>;[]=,\"" :: [Char])) - && not (match linespace (T.singleton c)) - iichar c = ichar c && c `notElem` ['0' .. '9'] - -nullvalue :: Parser Text -nullvalue = string "#null" - -bool :: Parser Bool -bool = True <$ string "#true" <|> False <$ string "#false" - -property :: Parser (Identifier, Value) -property = label "Property" $ do - propKey <- name - void (char '=') - propValue <- value - return (propKey, propValue) - -value :: Parser Value -value = label "Value" $ do - valueAnn <- optional typeAnnotation - valueExp <- choice - [ IntegerValue <$> try binary "Binary" - , IntegerValue <$> try octal "Octal" - , IntegerValue <$> try hexadecimal "Hexadecimal" - , SciValue <$> try scientific "Decimal" - , IntegerValue <$> try integer "Integer" - , BooleanValue <$> try bool "Boolean" - , NullValue <$ try nullvalue "Null" - , StringValue <$> anystring "String" - ] - return Value { .. } - -typeAnnotation :: Parser Identifier -typeAnnotation = label "Type Annotation" $ do - void (char '(') - i <- name - void (char ')') - return i - --- NODES - -nodes :: Parser [Node] -nodes = between (many linespace) (many linespace) (fromMaybe [] <$> body) - where - body = optional $ do - n <- maybeToList <$> node - ns <- fromMaybe [] <$> optional nodes - return (n ++ ns) - -node :: Parser (Maybe Node) -node = label "Node" $ do - discard <- optional comment - nodeAnn <- optional typeAnnotation - nodeName <- name - nodeContent <- catMaybes <$> content - nodeChildren <- fromMaybe [] <$> optional children - _ <- many nodespace - _ <- terminator - let nodeArgs = mapMaybe isArg nodeContent - nodeProps = Map.fromList $ mapMaybe isProp nodeContent - case discard of - Just _ -> return Nothing - Nothing -> return $ Just Node { .. } - where - isArg c = case c of - NodeValue v -> Just v - _ -> Nothing - isProp c = case c of - NodeProperty p -> Just p - _ -> Nothing - -content :: Parser [Maybe Content] -content = many . try $ do - void (some nodespace) - discard <- optional $ comment <* many nodespace - c <- choice [NodeProperty <$> try property, NodeValue <$> try value] - case discard of - Just _ -> return Nothing - Nothing -> return $ Just c - -children :: Parser [Node] -children = label "Node Child" . try $ do - void (many nodespace) - discard <- optional comment - void (char '{') - ns <- nodes - void (char '}') - void (many ws) - case discard of - Just _ -> return [] - Nothing -> return ns - -comment :: Parser () -comment = label "/-Comment" . try $ do - void (string "/-") - void $ many nodespace - -nodespace :: Parser () -nodespace = label "Node Space" $ do - try (many ws *> escline <* many ws) <|> try (void $ some ws) - -terminator :: Parser () -terminator = label "Node Terminator" $ do - choice [try (void (char ';')), try (void newline), try lineComment, eof] - -document :: Parser Document -document = do - docNodes <- nodes - void eof - return Document { .. } diff --git a/src/KDL/Parser/Hustle/Types.hs b/src/KDL/Parser/Hustle/Types.hs deleted file mode 100644 index 08786b6..0000000 --- a/src/KDL/Parser/Hustle/Types.hs +++ /dev/null @@ -1,73 +0,0 @@ -{- FOURMOLU_DISABLE -} -{- | Vendered from https://github.com/fuzzypixelz/hustle -} -module KDL.Parser.Hustle.Types - ( Parser - , Document(..) - , Content(..) - , Node(..) - , Value(..) - , ValueType(..) - , Identifier(..) - ) where - -import Data.Map ( Map ) -import Data.Scientific ( Scientific ) -import Data.Text ( Text ) -import Data.Void ( Void ) -import Text.Megaparsec ( Parsec ) - -{- - String has exactly one use, - and that’s showing Hello World in tutorials. - -- Albert Einstein --} -type Parser = Parsec Void Text - -newtype Document = Document - { docNodes :: [Node] - } - deriving (Eq) - -{- - This data type serves as an abstraction over Values - and Properties of a Node, in order simplify the the node - Parser, i.e group the two types together to - consume any number of them in any order. --} -data Content - = NodeValue { getValue :: Value } - | NodeProperty { getProp :: (Identifier, Value) } - deriving (Eq) - -data Node = Node - { nodeAnn :: Maybe Identifier - , nodeName :: Identifier - , nodeArgs :: [Value] - , nodeProps :: Map Identifier Value - , nodeChildren :: [Node] - } - deriving (Show, Eq) - -newtype Identifier = Identifier Text - deriving (Show, Eq) - -data Value = Value - { valueAnn :: Maybe Identifier - , valueExp :: ValueType - } - deriving (Show, Eq) - -data ValueType - = StringValue Text - | IntegerValue Integer - | SciValue Scientific - | BooleanValue Bool - | NullValue - deriving (Show, Eq) - -{- - This allows for querying properties in alphabetical order - upon printing out, the rest is handled automatically by Map. --} -instance Ord Identifier where - Identifier t1 `compare` Identifier t2 = t1 `compare` t2 diff --git a/src/KDL/Parser/Internal.hs b/src/KDL/Parser/Internal.hs new file mode 100644 index 0000000..ae67949 --- /dev/null +++ b/src/KDL/Parser/Internal.hs @@ -0,0 +1,1191 @@ +{-# LANGUAGE CPP #-} +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedRecordDot #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE RecordWildCards #-} +{-# LANGUAGE TypeFamilies #-} +{-# LANGUAGE NoFieldSelectors #-} + +{-| +Implement the v2 parser specified at: https://kdl.dev/spec/#name-full-grammar +-} +module KDL.Parser.Internal ( + Parser, + + -- * (1) Compatibility + p_bom, + p_version, + + -- * (3.1) Document + p_document, + p_nodes, + p_line_space, + p_node_space, + + -- * (3.2) Node + p_node, + p_base_node, + p_node_prop_or_arg, + p_node_terminator, + + -- * (3.3) Line Continuation + p_escline, + + -- * (3.4) Property + p_prop, + + -- * (3.5) Argument + p_value'Entry, + + -- * (3.6) Children Block + p_node_children, + + -- * (3.7) Value + p_value, + p_keyword, + + -- * (3.8) Type Annotation + p_type, + + -- * (3.9) String + p_string'Identifier, + p_string, + + -- * (3.10) Identifier String + p_identifier_string, + isValidUnquotedString, + p_unambiguous_ident, + p_signed_ident, + disallowed_keyword_identifiers, + p_dotted_ident, + p_identifier_char, + + -- * (3.11) Quoted String + p_quoted_string, + p_single_line_string_body, + p_string_character, + p_hex_unicode, + p_ws_escape, + + -- * (3.12) Multi-line String + p_multi_line_string_body, + + -- * (3.13) Raw String + p_raw_string, + p_raw_string_quotes, + p_single_line_raw_string_body, + p_single_line_raw_string_char, + p_multi_line_raw_string_body, + + -- * (3.14) Number + p_number, + p_hex, + p_hex_digit, + p_octal, + p_binary, + p_decimal, + p_integer, + p_digits, + p_exponent, + p_sign, + p_keyword_number, + + -- * (3.15) Boolean + p_boolean, + + -- * (3.17) Whitespace + p_ws, + p_unicode_space, + p_single_line_comment, + p_multi_line_comment, + p_slashdash, + + -- * (3.18) Newline + p_newline, + + -- * (3.19) Disallowed Literal Code Points + is_disallowed_literal_code_points, + + -- * Unicode + p_unicode, + is_unicode_scalar_value, +) where + +import Control.Monad (guard, void, (>=>)) +import Control.Monad.Trans.State.Strict (StateT, evalStateT) +import Control.Monad.Trans.State.Strict qualified as State +import Data.Bifunctor (bimap) +import Data.Bits (toIntegralSized) +import Data.Char ( + chr, + digitToInt, + isDigit, + isHexDigit, + isOctDigit, + isSpace, + ord, + ) +import Data.Either (isRight) +import Data.Foldable (foldlM, traverse_) +import Data.Foldable qualified as Seq (toList) +import Data.Maybe (catMaybes, fromMaybe, isJust) +import Data.Scientific (Scientific) +import Data.Scientific qualified as Scientific +import Data.Sequence qualified as Seq +import Data.Set (Set) +import Data.Set qualified as Set +import Data.Text (Text) +import Data.Text qualified as Text +import Data.Void (Void) +import KDL.Types ( + Ann (..), + AnnFormat (..), + Document, + Entry (..), + EntryFormat (..), + Identifier (..), + IdentifierFormat (..), + Node (..), + NodeFormat (..), + NodeList (..), + NodeListFormat (..), + Value (..), + ValueData (..), + ValueFormat (..), + ) +import KDL.Types qualified as AnnFormat (AnnFormat (..)) +import KDL.Types qualified as EntryFormat (EntryFormat (..)) +import KDL.Types qualified as NodeFormat (NodeFormat (..)) +import KDL.Types qualified as NodeListFormat (NodeListFormat (..)) +import Text.Megaparsec +import Text.Megaparsec.Char + +#if !MIN_VERSION_base(4,20,0) +import Data.Foldable (foldl') +#endif + +type Parser = Parsec Void Text + +{----- (1) Compatibility -----} + +-- | ref: (1) +-- bom := '\u{FEFF}' +p_bom :: Parser () +p_bom = label "BOM" $ do + void $ string "\xFEFF" + +-- | ref: (1) +-- version := +-- '/-' unicode-space* 'kdl-version' unicode-space+ ('1' | '2') +-- unicode-space* newline +p_version :: Parser () +p_version = label "version" $ do + _ <- string "/-" + _ <- many p_unicode_space + _ <- string "kdl-version" + _ <- some p_unicode_space + _ <- oneOf ['1', '2'] + _ <- many p_unicode_space + _ <- p_newline + pure () + +{----- (3.1) Document -----} + +-- | ref: (3.1) +-- document := bom? version? nodes +p_document :: Parser Document +p_document = do + bom <- hidden . option "" . withSource_ $ p_bom + version <- hidden . option "" . withSource_ $ try p_version + nodes <- p_nodes + hidden eof + pure . prependLeading bom . prependLeading version $ nodes + +-- | ref: (3.1) +-- nodes := (line-space* node)* line-space* +p_nodes :: Parser NodeList +p_nodes = label "nodes" $ do + -- The grammar is left-associative, but we do right-associative to get + -- correct backtracking semantics + good parse errors + initialWS <- withSource_ $ many p_line_space + results <- fmap concat . manyWhile $ do + (sdash, parseNode) <- p_node + (result, continue) <- + resolveSlashdash sdash parseNode >>= \case + Left src -> pure (Left src, True) + Right (node, hasTerminator) -> pure (Right node, hasTerminator) + trailing <- withSource_ $ many p_line_space + pure ([result, Left trailing], continue) + let (nodes, leftoverWS) = mergeLeadingWS initialWS results + -- If there are no nodes, all the whitespace should be considered leading + -- whitespace. Otherwise, the leftover whitespace (e.g. slashdashed nodes + -- at the end) are trailing whitespace. + let (leading, trailing) = + if null nodes + then (leftoverWS, "") + else ("", leftoverWS) + pure NodeList{format = Just NodeListFormat{..}, ..} + where + manyWhile :: Parser (a, Bool) -> Parser [a] + manyWhile p = + optional p >>= \case + Nothing -> pure [] + Just (a, continue) -> fmap (a :) . option [] $ if continue then manyWhile p else empty + +-- | ref: (3.1) + (3.17) +-- // Whitespace where newlines are allowed. +-- line-space := node-space | newline | single-line-comment +p_line_space :: Parser () +p_line_space = hidden $ do + void . choice $ + [ p_node_space + , void p_newline + , void p_single_line_comment + ] + +-- | ref: (3.1) + (3.17) +-- // Whitespace within nodes, +-- // where newline-ish things must be esclined. +-- node-space := ws* escline ws* | ws+ +p_node_space :: Parser () +p_node_space = hidden $ do + void . choice . map try $ + [ many p_ws *> p_escline *> many p_ws + , some p_ws + ] + +{----- (3.2) Node -----} + +-- | ref: (3.2) +-- node := base-node node-terminator +-- final-node := base-node node-terminator? +p_node :: Parser (SlashdashResult, Parser (Node, Bool)) +p_node = label "node" $ do + (sdash, parseNode) <- p_base_node + pure . (sdash,) $ do + node <- parseNode + mTerminator <- optional p_node_terminator + pure (mapFormat (maybe id setTerminator mTerminator) node, isJust mTerminator) + where + setTerminator terminator format = + format + { NodeFormat.beforeTerminator = format.trailing + , NodeFormat.terminator = terminator + , NodeFormat.trailing = "" + } + +-- | ref: (3.2) +-- base-node := slashdash? type? node-space* string +-- (node-space* (node-space | slashdash) node-prop-or-arg)* +-- // slashdashed node-children must always be after props and args. +-- (node-space* slashdash node-children)* +-- (node-space* node-children)? +-- (node-space* slashdash node-children)* +-- node-space* +p_base_node :: Parser (SlashdashResult, Parser Node) +p_base_node = label "base node" $ do + sdash <- option NoSlashdash p_slashdash + pure . (sdash,) $ do + -- node ann + initialAnn <- optional p_type + postAnnWS <- withSource_ $ many p_node_space + let (ann, leading) = + case initialAnn of + Just a -> (Just $ appendTrailing postAnnWS a, "") + Nothing -> (Nothing, postAnnWS) + + -- node name + name <- p_string'Identifier + + -- node entries + (entries, postEntriesWS) <- mergeLeadingWS "" <$> p_entries + + -- slashdashed node-children #1 + slashdashedChildren1 <- p_slashdashed_children + + -- node children + let unzipMaybe = maybe (Nothing, Nothing) (\(a, b) -> (Just a, Just b)) + (preChildrenWS, children) <- fmap unzipMaybe . optional $ do + -- Make sure a children block is coming up before we commit to consuming + -- the whitespace as pre-children whitespace + _ <- lookAhead $ try $ many p_node_space *> label "children block" (char '{') + (,) + <$> withSource_ (many p_node_space) + <*> p_node_children + let beforeChildren = postEntriesWS <> slashdashedChildren1 <> fromMaybe "" preChildrenWS + + -- slashdashed node-children #2 + slashdashedChildren2 <- p_slashdashed_children + + -- trailing space + postChildrenWS <- withSource_ $ many p_node_space + let trailing = slashdashedChildren2 <> postChildrenWS + + -- set by caller + let beforeTerminator = "" + terminator = "" + + pure Node{format = Just NodeFormat{..}, ..} + where + -- (node-space* (node-space | slashdash) node-prop-or-arg)* + p_entries = fmap concat . many . label "node prop or arg" $ do + -- make sure this is not the pre-children whitespace or post-node whitespace + -- so that we can commit to this being node-prop-or-arg whitespace + notFollowedBy $ do + _ <- many p_node_space + _ <- optional p_slashdash + choice + [ void (char '{') + , void (char '}') + , void p_node_terminator + ] + leading <- withSource_ $ many p_node_space + -- if there was no node-space, it _must_ be a slashdash + sdash <- (if Text.null leading then id else option NoSlashdash) p_slashdash + entry <- resolveSlashdash sdash p_node_prop_or_arg + pure [Left leading, entry] + + -- (node-space* slashdash node-children)* + p_slashdashed_children = + withSource_ . many . try $ do + _ <- many p_node_space + _ <- p_slashdash + _ <- p_node_children + pure () + +-- | ref: (3.2) +-- node-prop-or-arg := prop | value +p_node_prop_or_arg :: Parser Entry +p_node_prop_or_arg = label "node entry" $ do + try p_prop <|> p_value'Entry + +-- | ref: (3.2) +-- node-terminator := single-line-comment | newline | ';' | eof +p_node_terminator :: Parser Text +p_node_terminator = label "end of node" $ do + choice + [ p_single_line_comment + , p_newline + , string ";" + , p_eof + ] + +{----- (3.3) Line Continuation -----} + +-- | ref: (3.3) +-- escline := '\\' ws* (single-line-comment | newline | eof) +p_escline :: Parser () +p_escline = label "escline" $ do + _ <- string "\\" + _ <- many p_ws + _ <- p_single_line_comment <|> p_newline <|> p_eof + pure () + +{----- (3.4) Property -----} + +-- | ref: (3.4) +-- prop := string node-space* '=' node-space* value +p_prop :: Parser Entry +p_prop = label "property" $ do + name <- p_string'Identifier + afterKey <- withSource_ $ many p_node_space + _ <- char '=' + afterEq <- withSource_ $ many p_node_space + (value, leading) <- p_value + let format = + EntryFormat + { leading + , afterKey + , afterEq + , trailing = "" + } + pure Entry{name = Just name, value, format = Just format} + +{----- (3.5) Argument -----} + +-- | ref: (3.5) +p_value'Entry :: Parser Entry +p_value'Entry = label "argument" $ do + (value, leading) <- p_value + let format = + EntryFormat + { leading + , afterKey = "" + , afterEq = "" + , trailing = "" + } + pure Entry{name = Nothing, value, format = Just format} + +{----- (3.6) Children Block -----} + +-- | ref: (3.6) +-- node-children := '{' nodes final-node? '}' +p_node_children :: Parser NodeList +p_node_children = label "children block" $ do + between (char '{') (char '}') $ do + p_nodes -- 'final-node?' logic is in p_nodes / p_node + +{----- (3.7) Value -----} + +-- | ref: (3.7) +-- value := type? node-space* (string | number | keyword) +p_value :: Parser (Value, Text) +p_value = label "value" $ do + initialAnn <- optional p_type + postAnnWS <- withSource_ $ many p_node_space + let (ann, leading) = + case initialAnn of + Just a -> (Just $ appendTrailing postAnnWS a, "") + Nothing -> (Nothing, postAnnWS) + + (data_, repr) <- + withSource . choice . map try $ + [ Text <$> p_string + , p_number + , p_keyword + ] + + pure (Value{ann, data_, format = Just ValueFormat{..}}, leading) + +-- | ref: (3.7) +p_keyword :: Parser ValueData +p_keyword = label "value keyword" $ do + choice . map try $ + [ Bool <$> p_boolean + , Null <$ string "#null" + ] + +{----- (3.8) Type Annotation -----} + +-- | ref: (3.8) +-- type := '(' node-space* string node-space* ')' +p_type :: Parser Ann +p_type = label "type annotation" $ do + between (char '(') (char ')') $ do + beforeId <- withSource_ $ many p_node_space + identifier <- p_string'Identifier + afterId <- withSource_ $ many p_node_space + let format = Just AnnFormat{leading = "", trailing = "", ..} + pure Ann{..} + +{----- (3.9) String -----} + +-- | ref: (3.9) +p_string'Identifier :: Parser Identifier +p_string'Identifier = do + (value, repr) <- withSource p_string + pure Identifier{value, format = Just IdentifierFormat{repr}} + +-- | ref: (3.9) +-- string := identifier-string | quoted-string | raw-string ¶ +p_string :: Parser Text +p_string = label "string" $ do + choice + [ p_identifier_string + , p_quoted_string + , p_raw_string + ] + +{----- (3.10) Identifier String -----} + +-- | ref: (3.10) +-- identifier-string := unambiguous-ident | signed-ident | dotted-ident +p_identifier_string :: Parser Text +p_identifier_string = label "unquoted string" $ do + choice . map try $ + [ p_unambiguous_ident + , p_signed_ident + , p_dotted_ident + ] + +isValidUnquotedString :: Text -> Bool +isValidUnquotedString = isRight . parse (p_identifier_string <* eof) "" + +-- | ref: (3.10) +-- unambiguous-ident := +-- ((identifier-char - digit - sign - '.') identifier-char*) +-- - disallowed-keyword-identifiers +p_unambiguous_ident :: Parser Text +p_unambiguous_ident = label "unquoted string" $ do + c <- p_identifier_char + guard $ not $ isDigit c || c `elem` ['-', '+', '.'] + cs <- many p_identifier_char + let s = Text.pack (c : cs) + guard $ s `Set.notMember` disallowed_keyword_identifiers + pure s + +-- | ref: (3.10) +-- signed-ident := +-- sign ((identifier-char - digit - '.') identifier-char*)? +p_signed_ident :: Parser Text +p_signed_ident = do + c0 <- p_sign + cs <- option "" $ do + c1 <- p_identifier_char + guard $ not $ isDigit c1 || c1 == '.' + cs <- many p_identifier_char + pure (c1 : cs) + pure $ Text.pack (c0 : cs) + +-- | ref: (3.10) +-- disallowed-keyword-identifiers := +-- 'true' | 'false' | 'null' | 'inf' | '-inf' | 'nan' +disallowed_keyword_identifiers :: Set Text +disallowed_keyword_identifiers = + Set.fromList + [ "true" + , "false" + , "null" + , "inf" + , "-inf" + , "nan" + ] + +-- | ref: (3.10) +-- dotted-ident := +-- sign? '.' ((identifier-char - digit) identifier-char*)? +p_dotted_ident :: Parser Text +p_dotted_ident = do + c0 <- option "" $ Text.singleton <$> p_sign + c1 <- single '.' + cs <- option "" $ do + c2 <- p_identifier_char + guard $ not $ isDigit c2 + cs <- many p_identifier_char + pure (c2 : cs) + pure $ c0 <> Text.pack (c1 : cs) + +-- | ref: (3.10.2) +-- identifier-char := +-- unicode - unicode-space - newline - [\\/(){};\[\]"#=] +-- - disallowed-literal-code-points +p_identifier_char :: Parser Char +p_identifier_char = do + satisfy $ \c -> Text.singleton c `Set.notMember` invalid && (not . is_disallowed_literal_code_points) c + where + invalid = + Set.unions + [ Set.map Text.singleton chars_unicode_space + , chars_newline + , Set.map Text.singleton $ Set.fromList "\\/(){};[]\"#=" + ] + +{----- (3.11) Quoted String -----} + +-- | ref: (3.11) + (3.12) +-- quoted-string := +-- '"' single-line-string-body '"' | +-- '"""' newline +-- (multi-line-string-body newline)? +-- (unicode-space | ws-escape)* '"""' +p_quoted_string :: Parser Text +p_quoted_string = label "quoted string" $ do + choice + [ between (string quotes3) (string quotes3) $ do + parseMultilineString + p_multi_line_string_body + ( repeat0 . choice $ + [ Text.singleton <$> p_unicode_space + , "" <$ p_ws_escape + ] + ) + , between (string quotes1) (string quotes1) $ do + p_single_line_string_body + ] + +-- | ref: (3.11) +-- single-line-string-body := (string-character - newline)* +p_single_line_string_body :: Parser Text +p_single_line_string_body = label "quoted string line" $ do + repeat0 $ p_string_character (Just p_newline) + +-- | ref: (3.11) +-- string-character := +-- '\\' (["\\bfnrts] | +-- 'u{' hex-unicode '}') | +-- ws-escape | +-- [^\\"] - disallowed-literal-code-points +p_string_character :: Maybe (Parser invalid) -> Parser Text +p_string_character invalid = label "string character" $ do + choice . map try $ + [ do + _ <- char '\\' + fmap Text.singleton . choice $ + [ char '"' + , char '\\' + , char 'b' *> pure '\b' + , char 'f' *> pure '\f' + , char 'n' *> pure '\n' + , char 'r' *> pure '\r' + , char 't' *> pure '\t' + , char 's' *> pure ' ' + , between (string "u{") (string "}") $ do + chr <$> p_hex_unicode + ] + , "" <$ p_ws_escape + , do + traverse_ notFollowedBy invalid + c <- satisfy (`notElem` ['\\', '"']) + guard $ not $ is_disallowed_literal_code_points c + pure $ Text.singleton c + ] + +-- | ref: (3.11.1) +-- hex-unicode := hex-digit{1, 6} - surrogate - above-max-scalar +-- surrogate := [0]{0, 2} [dD] [8-9a-fA-F] hex-digit{2} +-- // U+D800-DFFF: D 8 00 +-- // D F FF +-- above-max-scalar = [2-9a-fA-F] hex-digit{5} | +-- [1] [1-9a-fA-F] hex-digit{4} +p_hex_unicode :: Parser Int +p_hex_unicode = label "hex unicode" $ do + digits <- countBetween 1 6 p_hex_digit + guard $ (not . null) digits + let x = undigits 16 digits + guard $ is_unicode_scalar_value x + pure x + +-- | ref: (3.11.1.1) +-- ws-escape := '\\' (unicode-space | newline)+ +p_ws_escape :: Parser () +p_ws_escape = label "escaped Whitespace" $ do + char '\\' *> void (some $ void p_unicode_space <|> void p_newline) + +{----- (3.12) Multi-line String -----} + +-- | ref: (3.12) +-- multi-line-string-body := (('"' | '""')? string-character)* +-- +-- Requires some changes to the grammar: https://github.com/kdl-org/kdl/pull/552 +p_multi_line_string_body :: Parser end -> Parser MultilineChars +p_multi_line_string_body end = label "quoted multiline string body" $ do + manyTill (withSource validChar) (try . lookAhead $ end) + where + validChar = do + choice + [ string "\"\"" <* notFollowedBy (char '"') + , string "\"" <* notFollowedBy (char '"') + , p_string_character Nothing + ] + +-- | Characters in a multiline string. +-- +-- In simple cases, the input is effectively [(Char, Text)], containing +-- each character in the multiline string and its raw representation. This +-- distinguishes what the user actually wrote vs its semantic value; e.g. +-- '\s' is represented as (" ", "\s"). +-- +-- It needs to be (Text, Text) instead of (Char, Text) because some character +-- sequences are semantically an empty string (e.g. p_ws_escape) and a few can +-- return multiple characters (e.g. p_newline). +type MultilineChars = [(Text, Text)] + +type MultilineProcessorM a = StateT MultilineProcessorState Parser a +data MultilineProcessorState = MultilineProcessorState + { wsPrefix :: Text + , lineStartOffset :: Int + } + +-- | Parse a multiline string with the grammar: +-- +-- '"""' newline +-- ( newline)? +-- * '"""' +parseMultilineString :: + (forall end. Parser end -> Parser MultilineChars) -> + Parser Text -> + Parser Text +parseMultilineString parseBody parseEndSpace = do + _ <- p_newline -- Drop the first newline + State{stateOffset = startOffset} <- getParserState + mBody <- optional . try $ do + body <- parseBody (p_newline *> parseEndSpace *> string quotes3) + _ <- p_newline -- Drop the last newline + pure body + end <- parseEndSpace + case mBody of + Nothing -> pure "" + Just body -> do + let state = + MultilineProcessorState + { wsPrefix = end + , lineStartOffset = startOffset + } + body' <- flip evalStateT state $ mapLinesM processLine body + pure $ foldMap fst body' + where + processLine = rmPrefix . collapseWsOnlyLines + + collapseWsOnlyLines line = + let srcs = foldMap snd line + in if Text.all isSpace srcs + then [("", srcs)] + else line + + rmPrefix line0 = do + let go pre = \case + -- Consumed the full prefix, return the final line + line | Text.null pre -> pure line + -- The prefix starts with the source text; consume and continue matching the rest of the prefix + (_, src) : rest | Just pre' <- Text.stripPrefix src pre -> go pre' rest + -- Prefix did not match, return the initial line unchanged + _ -> do + offset <- State.gets (.lineStartOffset) + parseError . FancyError offset . Set.singleton $ + ErrorFail "Line does not have the correct indentation" + pre0 <- State.gets (.wsPrefix) + -- If the line is completely empty (e.g. after collapseWsOnlyLines), + -- there's no prefix to strip. + if all (Text.null . fst) line0 + then pure line0 + else go pre0 line0 + + mapLinesM :: + (MultilineChars -> MultilineProcessorM MultilineChars) -> + MultilineChars -> + MultilineProcessorM MultilineChars + mapLinesM f = + let resolveLine (buf, acc) = (acc <>) . Seq.fromList <$> f (Seq.toList buf) + go (buf, acc) (c, src) + -- If we're at a newline, `buf` contains the chars before the newline. + -- Apply the function and reset the buffer. The newline should be added + -- directly to the accumulator, since the newline isn't part of the line. + -- Per (3.12.1), newline characters are normalized to LF. + | c `Set.member` chars_newline = do + acc' <- resolveLine (buf, acc) + State.modify $ \s -> + let lineLen = sum . fmap Text.length $ fmap snd buf Seq.|> src + in s{lineStartOffset = s.lineStartOffset + lineLen} + pure (Seq.empty, acc' Seq.|> ("\n", src)) + -- Otherwise, append to the buffer and continue + | otherwise = do + pure (buf Seq.|> (c, src), acc) + in foldlM go (Seq.empty, Seq.empty) + -- Resolve line one last time to apply the function on the last line + >=> resolveLine + >=> (pure . Seq.toList) + +{----- (3.13) Raw String -----} + +-- | ref: (3.13) +-- raw-string := '#' raw-string-quotes '#' | '#' raw-string '#' +p_raw_string :: Parser Text +p_raw_string = label "raw string" $ do + -- For efficiency, we'll implement this slightly differently than the grammar + -- verbatim. Find all the hashes up front and use that to pass the closing + -- delimiter to the inner parsers. + delim <- withSource_ . some $ string "#" + s <- p_raw_string_quotes (string delim) + _ <- string delim + pure s + +-- | ref: (3.13) +-- raw-string-quotes := +-- '"' single-line-raw-string-body '"' | +-- '"""' newline +-- (multi-line-raw-string-body newline)? +-- unicode-space* '"""' +p_raw_string_quotes :: Parser end -> Parser Text +p_raw_string_quotes end = label "raw string quotes" $ do + choice + [ between (string quotes3) (string quotes3) $ do + parseMultilineString + p_multi_line_raw_string_body + (repeat0 $ Text.singleton <$> p_unicode_space) + , between (string quotes1) (string quotes1) . withSource_ $ do + p_single_line_raw_string_body (string quotes1 *> end) + ] + +-- | ref: (3.13) +-- single-line-raw-string-body := +-- '' | +-- (single-line-raw-string-char - '"') +-- single-line-raw-string-char*? | +-- '"' (single-line-raw-string-char - '"') +-- single-line-raw-string-char*? +p_single_line_raw_string_body :: Parser end -> Parser () +p_single_line_raw_string_body end = label "raw string single line" $ do + void . optional $ do + _ <- optional $ char '"' + c <- p_single_line_raw_string_char + guard $ c /= '"' + manyTill p_single_line_raw_string_char (try $ lookAhead end) + +-- | ref: (3.13) +-- single-line-raw-string-char := +-- unicode - newline - disallowed-literal-code-points +p_single_line_raw_string_char :: Parser Char +p_single_line_raw_string_char = label "raw string character" $ do + c <- p_unicode + guard $ Text.singleton c `Set.notMember` chars_newline + guard $ not $ is_disallowed_literal_code_points c + pure c + +-- | ref: (3.13) +-- multi-line-raw-string-body := +-- (unicode - disallowed-literal-code-points)*? +p_multi_line_raw_string_body :: Parser end -> Parser MultilineChars +p_multi_line_raw_string_body end = label "raw string multiline body" $ do + manyTill validChar (try . lookAhead $ end) + where + validChar = do + c <- p_unicode + guard $ not $ is_disallowed_literal_code_points c + let s = Text.singleton c + pure (s, s) + +{----- (3.14) Number -----} + +-- | ref: (3.14) +-- number := keyword-number | hex | octal | binary | decimal +p_number :: Parser ValueData +p_number = label "number" $ do + choice . map try $ + [ p_keyword_number + , Number <$> p_hex + , Number <$> p_octal + , Number <$> p_binary + , Number <$> p_decimal + ] + +-- | ref: (3.14) +-- hex := sign? '0x' hex-digit (hex-digit | '_')* +p_hex :: Parser Scientific +p_hex = label "hex" $ parseNumWith "0x" 16 isHexDigit + +-- | ref: (3.14) +-- hex-digit := [0-9a-fA-F] +p_hex_digit :: Parser Int +p_hex_digit = label "hex digit" $ do + digitToInt <$> satisfy isHexDigit + +-- | ref: (3.14) +-- octal := sign? '0o' [0-7] [0-7_]* +p_octal :: Parser Scientific +p_octal = label "octal" $ parseNumWith "0o" 8 isOctDigit + +-- | ref: (3.14) +-- binary := sign? '0b' ('0' | '1') ('0' | '1' | '_')* +p_binary :: Parser Scientific +p_binary = label "binary" $ parseNumWith "0b" 2 isBinDigit + where + isBinDigit c = c == '0' || c == '1' + +parseNumWith :: Text -> Integer -> (Char -> Bool) -> Parser Scientific +parseNumWith prefix base isValid = do + signed <- parseSigned + _ <- string prefix + (x, _) <- p_digits base isValid + pure . signed . fromInteger $ x + +-- | ref: (3.14) +-- decimal := sign? integer ('.' integer)? exponent? +p_decimal :: Parser Scientific +p_decimal = label "decimal number" $ do + signed <- parseSigned + (i, _) <- p_integer + (f, fdigits) <- option (0, 0) $ label "decimal point" (char '.') *> p_integer + e <- option 0 p_exponent + pure . signed $ Scientific.scientific (i * (10 ^ fdigits) + f) (e - fdigits) + +-- | ref: (3.14) +-- integer := digit (digit | '_')* +p_integer :: Parser (Integer, Int) +p_integer = label "integer" $ p_digits 10 isDigit + +p_digits :: Integer -> (Char -> Bool) -> Parser (Integer, Int) +p_digits base isValid = do + d <- p_digit + ds <- + fmap catMaybes . many . choice $ + [ Just <$> p_digit + , Nothing <$ hidden (char '_') + ] + let digits = map toInteger (d : ds) + pure (undigits base digits, length digits) + where + p_digit = digitToInt <$> satisfy isValid + +-- | ref: (3.14) +-- exponent := ('e' | 'E') sign? integer +p_exponent :: Parser Int +p_exponent = label "exponent" $ do + _ <- oneOf ['e', 'E'] + signed <- parseSigned + (x, _) <- p_integer + case toIntegralSized x of + Just x' -> pure $ signed x' + Nothing -> fail $ "Exponent is too large: " <> show x + +-- | ref: (3.14) +-- sign := '+' | '-' +p_sign :: Parser Char +p_sign = label "sign" $ do + oneOf ['-', '+'] + +parseSigned :: (Num a) => Parser (a -> a) +parseSigned = toSign <$> optional p_sign + where + toSign = \case + Just '-' -> negate + _ -> id + +-- | ref: (3.14.1) +-- keyword-number := '#inf' | '#-inf' | '#nan' +p_keyword_number :: Parser ValueData +p_keyword_number = do + choice + [ Inf <$ string "#inf" + , NegInf <$ string "#-inf" + , NaN <$ string "#nan" + ] + +{----- (3.15) Boolean -----} + +-- | ref: (3.15) +-- boolean := '#true' | '#false' +p_boolean :: Parser Bool +p_boolean = label "boolean" $ do + choice + [ True <$ string "#true" + , False <$ string "#false" + ] + +{----- (3.17) Whitespace -----} + +-- | ref: (3.17) +-- ws := unicode-space | multi-line-comment +p_ws :: Parser Text +p_ws = label "whitespace" $ do + (Text.singleton <$> p_unicode_space) <|> p_multi_line_comment + +-- | ref: (3.17) +-- unicode-space := See Table +-- (All White_Space unicode characters which are not `newline`) +p_unicode_space :: Parser Char +p_unicode_space = label "unicode space" $ do + choice + [ char c l + | (c, l) <- chars_unicode_space' + ] + +-- | ref: (3.17) +chars_unicode_space :: Set Char +chars_unicode_space = Set.fromList $ map fst chars_unicode_space' + +-- | ref: (3.17) +chars_unicode_space' :: [(Char, String)] +chars_unicode_space' = + [ ('\x0009', "character tabulation") + , ('\x0020', "space") + , ('\x00A0', "no-break space") + , ('\x1680', "ogham space mark") + , ('\x2000', "en quad") + , ('\x2001', "em quad") + , ('\x2002', "en space") + , ('\x2003', "em space") + , ('\x2004', "three-per-em space") + , ('\x2005', "four-per-em space") + , ('\x2006', "six-per-em space") + , ('\x2007', "figure space") + , ('\x2008', "punctuation space") + , ('\x2009', "thin space") + , ('\x200A', "hair space") + , ('\x202F', "narrow no-break space") + , ('\x205F', "medium mathmatical space") + , ('\x3000', "ideographic space") + ] + +-- | ref: (3.17.1) +-- single-line-comment := '//' ^newline* (newline | eof) +p_single_line_comment :: Parser Text +p_single_line_comment = label "single-line comment" $ do + withSource_ $ do + _ <- string "//" + _ <- takeWhileP Nothing (/= '\n') + _ <- p_newline <|> p_eof + pure () + +-- | ref: (3.17.2) +-- multi-line-comment := '/*' commented-block +p_multi_line_comment :: Parser Text +p_multi_line_comment = label "multi-line comment" . withSource_ $ do + string "/*" *> p_commented_block + +-- | ref: (3.17.2) +-- commented-block := +-- '*/' | (multi-line-comment | '*' | '/' | [^*/]+) commented-block +p_commented_block :: Parser () +p_commented_block = label "commented block" $ do + choice + [ void $ string "*/" + , do + choice + [ void p_multi_line_comment + , void $ char '*' + , void $ char '/' + , void $ takeWhileP Nothing (`notElem` ['*', '/']) + ] + p_commented_block + ] + +data SlashdashResult = Slashdash Text | NoSlashdash + +resolveSlashdash :: SlashdashResult -> Parser a -> Parser (Either Text a) +resolveSlashdash = \case + Slashdash sdash -> fmap (Left . (sdash <>)) . withSource_ + NoSlashdash -> fmap Right + +-- | ref: (3.17.3) +-- slashdash := '/-' line-space* +p_slashdash :: Parser SlashdashResult +p_slashdash = hidden $ do + sdash <- withSource_ $ string "/-" *> many p_line_space + pure $ Slashdash sdash + +{----- (3.18) Newline -----} + +-- | ref: (3.18) +-- newline := See Table (All Newline White_Space) +p_newline :: Parser Text +p_newline = label "newline" $ do + choice [string s l | (s, l) <- chars_newline'] + +-- | ref: (3.18) +chars_newline :: Set Text +chars_newline = Set.fromList $ map fst chars_newline' + +-- | ref: (3.18) +chars_newline' :: [(Text, String)] +chars_newline' = + [ ("\x000D\x000A", "CRLF") + , ("\x000D", "CR") + , ("\x000A", "LF") + , ("\x0085", "NEL") + , ("\x000B", "VT") + , ("\x000C", "FF") + , ("\x2028", "LS") + , ("\x2029", "PS") + ] + +{----- (3.19) Disallowed Literal Code Points -----} + +-- | ref: (3.19) +-- disallowed-literal-code-points := +-- See Table (Disallowed Literal Code Points) +is_disallowed_literal_code_points :: Char -> Bool +is_disallowed_literal_code_points c = + or + [ -- The codepoints U+0000-0008 or the codepoints U+000E-001F (various control characters). + or + [ '\x0000' <= c && c <= '\x0008' + , '\x000E' <= c && c <= '\x001F' + ] + , c == '\x007F' -- U+007F (the Delete control character). + , (not . is_unicode_scalar_value . ord) c -- Any codepoint that is not a Unicode Scalar Value (U+D800-DFFF). + , -- U+200E-200F, U+202A-202E, and U+2066-2069, the unicode "direction control" characters + or + [ '\x200E' <= c && c <= '\x200F' + , '\x202A' <= c && c <= '\x202E' + , '\x2066' <= c && c <= '\x2069' + ] + , -- U+FEFF, aka Zero-width Non-breaking Space (ZWNBSP)/Byte Order Mark (BOM), except as the first code point in a document. + c == '\xFEFF' + ] + +{----- Unicode -----} + +-- | unicode := Any Unicode Scalar Value +p_unicode :: Parser Char +p_unicode = label "unicode scalar value" $ do + satisfy (is_unicode_scalar_value . ord) + +-- | https://unicode.org/glossary/#unicode_scalar_value +is_unicode_scalar_value :: Int -> Bool +is_unicode_scalar_value x = + (0 <= x && x <= 0xD7FF) || (0xE000 <= x && x <= 0x10FFFF) + +{----- Utilities -----} + +withSource :: Parser a -> Parser (a, Text) +withSource p = do + s <- getParserState + a <- p + s' <- getParserState + let n = stateOffset s' - stateOffset s + pure (a, Text.take n (stateInput s)) + +withSource_ :: Parser a -> Parser Text +withSource_ = fmap snd . withSource + +repeat0 :: (Monoid a) => Parser a -> Parser a +repeat0 = fmap mconcat . many + +quotes1 :: Text +quotes1 = "\"" + +quotes3 :: Text +quotes3 = "\"\"\"" + +-- | Return a list whose length is in the range [lo, hi], inclusive. +countBetween :: Int -> Int -> Parser a -> Parser [a] +countBetween lo hi m = go 0 + where + go n + | n < lo = (:) <$> m <*> go (n + 1) + | n < hi = ((:) <$> m <*> go (n + 1)) <|> pure [] + | otherwise = pure [] + +p_eof :: Parser Text +p_eof = hidden $ "" <$ eof + +undigits :: (Num a) => a -> [a] -> a +undigits base = foldl' (\acc x -> acc * base + x) 0 + +-- | Group all 'Left' values and put it in the leading whitespace of the next +-- element. Return any leftover whitespace (i.e. the list ends with 'Left' +-- values). +mergeLeadingWS :: (HasWsFormat a) => Text -> [Either Text a] -> ([a], Text) +mergeLeadingWS initialLeading = + bimap Seq.toList toText . foldl' go (Seq.empty, Seq.singleton initialLeading) + where + toText = Text.concat . Seq.toList + go (nodes, buf) = \case + Left t -> (nodes, buf Seq.|> t) + Right node -> (nodes Seq.|> prependLeading (toText buf) node, Seq.empty) + +class HasFormat a where + type KdlFormat a + mapFormat :: (KdlFormat a -> KdlFormat a) -> a -> a +instance HasFormat NodeList where + type KdlFormat NodeList = NodeListFormat + mapFormat f NodeList{..} = NodeList{format = f <$> format, ..} +instance HasFormat Node where + type KdlFormat Node = NodeFormat + mapFormat f Node{..} = Node{format = f <$> format, ..} +instance HasFormat Ann where + type KdlFormat Ann = AnnFormat + mapFormat f Ann{..} = Ann{format = f <$> format, ..} +instance HasFormat Entry where + type KdlFormat Entry = EntryFormat + mapFormat f Entry{..} = Entry{format = f <$> format, ..} + +class (HasFormat a) => HasWsFormat a where + mapLeading :: (Text -> Text) -> a -> a + mapTrailing :: (Text -> Text) -> a -> a + prependLeading :: Text -> a -> a + prependLeading s = mapLeading (s <>) + appendTrailing :: Text -> a -> a + appendTrailing s = mapTrailing (<> s) +instance HasWsFormat NodeList where + mapLeading f = mapFormat $ \format -> format{NodeListFormat.leading = f format.leading} + mapTrailing f = mapFormat $ \format -> format{NodeListFormat.trailing = f format.trailing} +instance HasWsFormat Node where + mapLeading f = mapFormat $ \format -> format{NodeFormat.leading = f format.leading} + mapTrailing f = mapFormat $ \format -> format{NodeFormat.trailing = f format.trailing} +instance HasWsFormat Ann where + mapLeading f = mapFormat $ \format -> format{AnnFormat.leading = f format.leading} + mapTrailing f = mapFormat $ \format -> format{AnnFormat.trailing = f format.trailing} +instance HasWsFormat Entry where + mapLeading f = mapFormat $ \format -> format{EntryFormat.leading = f format.leading} + mapTrailing f = mapFormat $ \format -> format{EntryFormat.trailing = f format.trailing} diff --git a/src/KDL/Render.hs b/src/KDL/Render.hs index 785b156..9ecb77d 100644 --- a/src/KDL/Render.hs +++ b/src/KDL/Render.hs @@ -54,11 +54,11 @@ renderNode Node{..} = , maybe "" renderAnn ann , renderIdentifier name , foldMap renderEntry entries - , maybe "" (.before_children) format + , maybe "" (.beforeChildren) format , case children of Nothing -> "" Just nodes -> "{" <> renderNodeList nodes <> "}" - , maybe "" (.before_terminator) format + , maybe "" (.beforeTerminator) format , maybe "" (.terminator) format , maybe "" (.trailing) format ] @@ -72,9 +72,9 @@ renderEntry Entry{..} = Just nameId -> Text.concat [ renderIdentifier nameId - , maybe "" (.after_key) format + , maybe "" (.afterKey) format , "=" - , maybe "" (.after_eq) format + , maybe "" (.afterEq) format , renderValue value ] , maybe "" (.trailing) format @@ -85,9 +85,9 @@ renderAnn Ann{..} = Text.concat [ maybe "" (.leading) format , "(" - , maybe "" (.before_id) format + , maybe "" (.beforeId) format , renderIdentifier identifier - , maybe "" (.after_id) format + , maybe "" (.afterId) format , ")" , maybe "" (.trailing) format ] @@ -104,6 +104,9 @@ renderValueData = \case Text s -> renderString s Number x -> (Text.pack . show) x Bool b -> if b then "#true" else "#false" + Inf -> "#inf" + NegInf -> "#-inf" + NaN -> "#nan" Null -> "#null" where renderString s = if isPlainIdent s then s else "\"" <> Text.concatMap escapeChar s <> "\"" diff --git a/src/KDL/Types.hs b/src/KDL/Types.hs index 8ded49c..03aa3a7 100644 --- a/src/KDL/Types.hs +++ b/src/KDL/Types.hs @@ -99,9 +99,9 @@ data NodeList = NodeList data NodeListFormat = NodeListFormat { leading :: Text - -- ^ Whitespace and comments preceding the document's first node. + -- ^ Whitespace and comments preceding the first node. , trailing :: Text - -- ^ Whitespace and comments following the document's last node. + -- ^ Whitespace and comments following the last node. } deriving (Show, Eq) @@ -204,9 +204,9 @@ data Ann = Ann data AnnFormat = AnnFormat { leading :: Text -- ^ Whitespace and comments preceding the annotation itself. - , before_id :: Text + , beforeId :: Text -- ^ Whitespace and comments between the opening `(` and the identifier. - , after_id :: Text + , afterId :: Text -- ^ Whitespace and comments between the identifier and the closing `)`. , trailing :: Text -- ^ Whitespace and comments following the annotation itself. @@ -233,9 +233,9 @@ data Node = Node data NodeFormat = NodeFormat { leading :: Text -- ^ Whitespace and comments preceding the node itself. - , before_children :: Text + , beforeChildren :: Text -- ^ Whitespace and comments preceding the node's children block. - , before_terminator :: Text + , beforeTerminator :: Text -- ^ Whitespace and comments right before the node's terminator. , terminator :: Text -- ^ The terminator for the node. @@ -295,9 +295,9 @@ data Entry = Entry data EntryFormat = EntryFormat { leading :: Text -- ^ Whitespace and comments preceding the entry itself. - , after_key :: Text + , afterKey :: Text -- ^ Whitespace and comments between an entry's key name and its equals sign. - , after_eq :: Text + , afterEq :: Text -- ^ Whitespace and comments between an entry's equals sign and its value. , trailing :: Text -- ^ Whitespace and comments following the entry itself. @@ -341,6 +341,9 @@ data ValueData = Text Text | Number Scientific | Bool Bool + | Inf + | NegInf + | NaN | Null deriving (Show, Eq) diff --git a/test/KDL/Decoder/ArrowSpec.hs b/test/KDL/Decoder/ArrowSpec.hs index 6e96235..afcea9b 100644 --- a/test/KDL/Decoder/ArrowSpec.hs +++ b/test/KDL/Decoder/ArrowSpec.hs @@ -13,6 +13,7 @@ import Data.Text (Text) import Data.Text qualified as Text import Data.Typeable (typeRep) import KDL.Arrow qualified as KDL +import KDL.TestUtils.AST (scrubFormat) import KDL.TestUtils.Error (decodeErrorMsg) import KDL.Types ( Entry (..), @@ -38,7 +39,7 @@ apiSpec = do it "decodes a node" $ do let config = "foo 1.0" decoder = KDL.document $ proc () -> do - KDL.node "foo" -< () + scrubFormat <$> KDL.node "foo" -< () expected = Node { ann = Nothing @@ -50,7 +51,7 @@ apiSpec = do , format = Nothing } ] - , children = Just NodeList{nodes = [], format = Nothing} + , children = Nothing , format = Nothing } KDL.decodeWith decoder config `shouldBe` Right expected @@ -58,14 +59,14 @@ apiSpec = do it "decodes multiple nodes" $ do let config = "foo; foo" decoder = KDL.document $ proc () -> do - KDL.many $ KDL.node "foo" -< () + fmap (map scrubFormat) . KDL.many $ KDL.node "foo" -< () expected = [fooNode, fooNode] fooNode = Node { ann = Nothing , name = Identifier{value = "foo", format = Nothing} , entries = [] - , children = Just NodeList{nodes = [], format = Nothing} + , children = Nothing , format = Nothing } KDL.decodeWith decoder config `shouldBe` Right expected @@ -73,8 +74,8 @@ apiSpec = do it "decodes nodes in any order" $ do let config = "foo; bar" decoder = KDL.document $ proc () -> do - bar <- KDL.node "bar" -< () - foo <- KDL.node "foo" -< () + bar <- scrubFormat <$> KDL.node "bar" -< () + foo <- scrubFormat <$> KDL.node "foo" -< () returnA -< (bar, foo) expected = (node "bar", node "foo") node name = @@ -82,7 +83,7 @@ apiSpec = do { ann = Nothing , name = Identifier{value = name, format = Nothing} , entries = [] - , children = Just NodeList{nodes = [], format = Nothing} + , children = Nothing , format = Nothing } KDL.decodeWith decoder config `shouldBe` Right expected @@ -90,8 +91,8 @@ apiSpec = do it "fails when not enough nodes" $ do let config = "foo" decoder = KDL.document $ proc () -> do - foo1 <- KDL.node @Node "foo" -< () - foo2 <- KDL.node @Node "foo" -< () + foo1 <- scrubFormat <$> KDL.node @Node "foo" -< () + foo2 <- scrubFormat <$> KDL.node @Node "foo" -< () returnA -< (foo1, foo2) KDL.decodeWith decoder config `shouldSatisfy` decodeErrorMsg @@ -102,7 +103,7 @@ apiSpec = do -- Most behaviors tested with `node` describe "nodeWith" $ do it "decodes a node" $ do - let config = "foo 1.0 { hello \"world\"; }" + let config = "foo 1.0 { hello world; }" decodeFoo = proc () -> do arg <- KDL.arg @Int -< () child <- KDL.children $ KDL.argAt @Text "hello" -< () @@ -164,7 +165,7 @@ apiSpec = do let config = "foo 1.0; foo 2.0; bar" decoder = KDL.document $ proc () -> do _ <- KDL.node @Node "foo" -< () - KDL.remainingNodes -< () + fmap (map scrubFormat) <$> KDL.remainingNodes -< () expected = Map.fromList [ ("foo", [fooNode2]) @@ -181,7 +182,7 @@ apiSpec = do , format = Nothing } ] - , children = Just NodeList{nodes = [], format = Nothing} + , children = Nothing , format = Nothing } barNode = @@ -189,7 +190,7 @@ apiSpec = do { ann = Nothing , name = Identifier{value = "bar", format = Nothing} , entries = [] - , children = Just NodeList{nodes = [], format = Nothing} + , children = Nothing , format = Nothing } KDL.decodeWith decoder config `shouldBe` Right expected @@ -211,7 +212,7 @@ apiSpec = do KDL.decodeWith decoder config `shouldBe` Right expected it "fails when node fails to parse" $ do - let config = "foo 1; bar 1; bar \"hello\"" + let config = "foo 1; bar 1; bar hello" decodeNode = proc () -> do KDL.arg @Int -< () decoder = KDL.document $ proc () -> do @@ -262,7 +263,7 @@ apiSpec = do describe "argAt" $ do it "gets argument at a node" $ do - let config = "foo \"bar\"; hello \"world\"" + let config = "foo bar; hello world" decoder = KDL.document $ proc () -> do hello <- KDL.argAt @Text "hello" -< () foo <- KDL.argAt @Text "foo" -< () @@ -363,7 +364,7 @@ apiSpec = do KDL.decodeWith decoder config `shouldBe` Right [] it "fails if any arg fails to parse" $ do - let config = "foo 1 \"asdf\"" + let config = "foo 1 asdf" decoder = KDL.document $ proc () -> do KDL.argsAt @Int "foo" -< () KDL.decodeWith decoder config @@ -456,7 +457,7 @@ apiSpec = do ] it "fails if any child fails to parse" $ do - let config = "foo { - 1; - \"asdf\"; }" + let config = "foo { - 1; - asdf; }" decoder = KDL.document $ proc () -> do KDL.dashChildrenAt @Int "foo" -< () KDL.decodeWith decoder config @@ -513,14 +514,17 @@ apiSpec = do it "gets dash nodes at a node" $ do let config = "foo { - { bar; }; - { baz; }; }" decoder = KDL.document $ proc () -> do - KDL.dashNodesAt "foo" -< () + map scrubFormat <$> KDL.dashNodesAt "foo" -< () expected = [node "-" [node "bar" []], node "-" [node "baz" []]] node name children = Node { ann = Nothing , name = Identifier{value = name, format = Nothing} , entries = [] - , children = Just NodeList{nodes = children, format = Nothing} + , children = + if null children + then Nothing + else Just NodeList{nodes = children, format = Nothing} , format = Nothing } KDL.decodeWith decoder config `shouldBe` Right expected @@ -550,7 +554,7 @@ apiSpec = do -- Most behaviors tested with `dashNodesAt` describe "dashNodesAtWith" $ do it "gets dash nodes at a node" $ do - let config = "foo { - 1 { bar \"hello\"; }; - 2 { bar \"world\"; }; }" + let config = "foo { - 1 { bar hello; }; - 2 { bar world; }; }" decodeChild = proc () -> do arg <- KDL.arg @Int -< () child <- KDL.children $ KDL.nodeWith "bar" $ KDL.arg @Text -< () @@ -560,7 +564,7 @@ apiSpec = do KDL.decodeWith decoder config `shouldBe` Right [(1, "hello"), (2, "world")] it "fails if any child fails to parse" $ do - let config = "foo { - { bar 1; }; - { bar \"test\"; }; }" + let config = "foo { - { bar 1; }; - { bar test; }; }" decoder = KDL.document $ proc () -> do KDL.dashNodesAtWith "foo" $ KDL.children $ KDL.argAt @Int "bar" -< () KDL.decodeWith decoder config @@ -579,7 +583,7 @@ apiSpec = do describe "arg" $ do it "decodes an argument" $ do - let config = "foo 1 \"bar\"" + let config = "foo 1 bar" decoder = proc () -> do arg1 <- KDL.arg @Int -< () arg2 <- KDL.arg @Text -< () @@ -603,7 +607,7 @@ apiSpec = do ] it "fails if argument fails to parse" $ do - let config = "foo \"test\"" + let config = "foo test" decoder = proc () -> do KDL.arg @Int -< () decodeNode "foo" decoder config @@ -625,7 +629,7 @@ apiSpec = do -- Most behaviors tested with `arg` describe "argWith" $ do it "decodes an argument" $ do - let config = "foo \"bar\"" + let config = "foo bar" decoder = proc () -> do KDL.argWith KDL.text -< () decodeNode "foo" decoder config `shouldBe` Right "bar" @@ -668,7 +672,7 @@ apiSpec = do describe "prop" $ do it "decodes a prop" $ do - let config = "foo test1=1 test2=\"hello\"" + let config = "foo test1=1 test2=hello" decoder = proc () -> do prop1 <- KDL.prop @Text "test2" -< () prop2 <- KDL.prop @Int "test1" -< () @@ -842,7 +846,7 @@ apiSpec = do it "decodes children" $ do let config = "foo { bar test; }" decoder = proc () -> do - KDL.children $ KDL.node @Node "bar" -< () + fmap scrubFormat . KDL.children $ KDL.node @Node "bar" -< () expected = Node { ann = Nothing @@ -854,7 +858,7 @@ apiSpec = do , format = Nothing } ] - , children = Just NodeList{nodes = [], format = Nothing} + , children = Nothing , format = Nothing } decodeNode "foo" decoder config `shouldBe` Right expected @@ -882,7 +886,7 @@ apiSpec = do it "decodes any value" $ do let config = "foo 1.0 asdf #true" decoder = KDL.document $ proc () -> do - KDL.argsAtWith "foo" KDL.any -< () + map scrubFormat <$> KDL.argsAtWith "foo" KDL.any -< () val data_ = Value { ann = Nothing diff --git a/test/KDL/Decoder/MonadSpec.hs b/test/KDL/Decoder/MonadSpec.hs index c635dcc..b4b44c9 100644 --- a/test/KDL/Decoder/MonadSpec.hs +++ b/test/KDL/Decoder/MonadSpec.hs @@ -8,6 +8,7 @@ import Data.Map qualified as Map import Data.Text (Text) import Data.Text qualified as Text import KDL qualified +import KDL.TestUtils.AST (scrubFormat) import KDL.TestUtils.Error (decodeErrorMsg) import KDL.Types ( Entry (..), @@ -32,7 +33,7 @@ apiSpec = do it "decodes a node" $ do let config = "foo 1.0" decoder = KDL.document $ do - KDL.node "foo" + scrubFormat <$> KDL.node "foo" expected = Node { ann = Nothing @@ -44,7 +45,7 @@ apiSpec = do , format = Nothing } ] - , children = Just NodeList{nodes = [], format = Nothing} + , children = Nothing , format = Nothing } KDL.decodeWith decoder config `shouldBe` Right expected @@ -52,14 +53,14 @@ apiSpec = do it "decodes multiple nodes" $ do let config = "foo; foo" decoder = KDL.document $ do - KDL.many $ KDL.node "foo" + fmap (map scrubFormat) . KDL.many $ KDL.node "foo" expected = [fooNode, fooNode] fooNode = Node { ann = Nothing , name = Identifier{value = "foo", format = Nothing} , entries = [] - , children = Just NodeList{nodes = [], format = Nothing} + , children = Nothing , format = Nothing } KDL.decodeWith decoder config `shouldBe` Right expected @@ -67,8 +68,8 @@ apiSpec = do it "decodes nodes in any order" $ do let config = "foo; bar" decoder = KDL.document $ do - bar <- KDL.node "bar" - foo <- KDL.node "foo" + bar <- scrubFormat <$> KDL.node "bar" + foo <- scrubFormat <$> KDL.node "foo" pure (bar, foo) expected = (node "bar", node "foo") node name = @@ -76,7 +77,7 @@ apiSpec = do { ann = Nothing , name = Identifier{value = name, format = Nothing} , entries = [] - , children = Just NodeList{nodes = [], format = Nothing} + , children = Nothing , format = Nothing } KDL.decodeWith decoder config `shouldBe` Right expected @@ -84,8 +85,8 @@ apiSpec = do it "fails when not enough nodes" $ do let config = "foo" decoder = KDL.document $ do - foo1 <- KDL.node @Node "foo" - foo2 <- KDL.node @Node "foo" + foo1 <- scrubFormat <$> KDL.node @Node "foo" + foo2 <- scrubFormat <$> KDL.node @Node "foo" pure (foo1, foo2) KDL.decodeWith decoder config `shouldSatisfy` decodeErrorMsg @@ -96,7 +97,7 @@ apiSpec = do -- Most behaviors tested with `node` describe "nodeWith" $ do it "decodes a node" $ do - let config = "foo 1.0 { hello \"world\"; }" + let config = "foo 1.0 { hello world; }" decodeFoo = do arg <- KDL.arg @Int child <- KDL.children $ KDL.argAt @Text "hello" @@ -158,7 +159,7 @@ apiSpec = do let config = "foo 1.0; foo 2.0; bar" decoder = KDL.document $ do _ <- KDL.node @Node "foo" - KDL.remainingNodes + fmap (map scrubFormat) <$> KDL.remainingNodes expected = Map.fromList [ ("foo", [fooNode2]) @@ -175,7 +176,7 @@ apiSpec = do , format = Nothing } ] - , children = Just NodeList{nodes = [], format = Nothing} + , children = Nothing , format = Nothing } barNode = @@ -183,7 +184,7 @@ apiSpec = do { ann = Nothing , name = Identifier{value = "bar", format = Nothing} , entries = [] - , children = Just NodeList{nodes = [], format = Nothing} + , children = Nothing , format = Nothing } KDL.decodeWith decoder config `shouldBe` Right expected @@ -205,7 +206,7 @@ apiSpec = do KDL.decodeWith decoder config `shouldBe` Right expected it "fails when node fails to parse" $ do - let config = "foo 1; bar 1; bar \"hello\"" + let config = "foo 1; bar 1; bar hello" decodeNode = do KDL.arg @Int decoder = KDL.document $ do @@ -256,7 +257,7 @@ apiSpec = do describe "argAt" $ do it "gets argument at a node" $ do - let config = "foo \"bar\"; hello \"world\"" + let config = "foo bar; hello world" decoder = KDL.document $ do hello <- KDL.argAt @Text "hello" foo <- KDL.argAt @Text "foo" @@ -357,7 +358,7 @@ apiSpec = do KDL.decodeWith decoder config `shouldBe` Right [] it "fails if any arg fails to parse" $ do - let config = "foo 1 \"asdf\"" + let config = "foo 1 asdf" decoder = KDL.document $ do KDL.argsAt @Int "foo" KDL.decodeWith decoder config @@ -450,7 +451,7 @@ apiSpec = do ] it "fails if any child fails to parse" $ do - let config = "foo { - 1; - \"asdf\"; }" + let config = "foo { - 1; - asdf; }" decoder = KDL.document $ do KDL.dashChildrenAt @Int "foo" KDL.decodeWith decoder config @@ -507,14 +508,17 @@ apiSpec = do it "gets dash nodes at a node" $ do let config = "foo { - { bar; }; - { baz; }; }" decoder = KDL.document $ do - KDL.dashNodesAt "foo" + map scrubFormat <$> KDL.dashNodesAt "foo" expected = [node "-" [node "bar" []], node "-" [node "baz" []]] node name children = Node { ann = Nothing , name = Identifier{value = name, format = Nothing} , entries = [] - , children = Just NodeList{nodes = children, format = Nothing} + , children = + if null children + then Nothing + else Just NodeList{nodes = children, format = Nothing} , format = Nothing } KDL.decodeWith decoder config `shouldBe` Right expected @@ -544,7 +548,7 @@ apiSpec = do -- Most behaviors tested with `dashNodesAt` describe "dashNodesAtWith" $ do it "gets dash nodes at a node" $ do - let config = "foo { - 1 { bar \"hello\"; }; - 2 { bar \"world\"; }; }" + let config = "foo { - 1 { bar hello; }; - 2 { bar world; }; }" decodeChild = do arg <- KDL.arg @Int child <- KDL.children $ KDL.nodeWith "bar" $ KDL.arg @Text @@ -554,7 +558,7 @@ apiSpec = do KDL.decodeWith decoder config `shouldBe` Right [(1, "hello"), (2, "world")] it "fails if any child fails to parse" $ do - let config = "foo { - { bar 1; }; - { bar \"test\"; }; }" + let config = "foo { - { bar 1; }; - { bar test; }; }" decoder = KDL.document $ do KDL.dashNodesAtWith "foo" $ KDL.children $ KDL.argAt @Int "bar" KDL.decodeWith decoder config @@ -573,7 +577,7 @@ apiSpec = do describe "arg" $ do it "decodes an argument" $ do - let config = "foo 1 \"bar\"" + let config = "foo 1 bar" decoder = do arg1 <- KDL.arg @Int arg2 <- KDL.arg @Text @@ -597,7 +601,7 @@ apiSpec = do ] it "fails if argument fails to parse" $ do - let config = "foo \"test\"" + let config = "foo test" decoder = do KDL.arg @Int decodeNode "foo" decoder config @@ -619,7 +623,7 @@ apiSpec = do -- Most behaviors tested with `arg` describe "argWith" $ do it "decodes an argument" $ do - let config = "foo \"bar\"" + let config = "foo bar" decoder = do KDL.argWith KDL.text decodeNode "foo" decoder config `shouldBe` Right "bar" @@ -662,7 +666,7 @@ apiSpec = do describe "prop" $ do it "decodes a prop" $ do - let config = "foo test1=1 test2=\"hello\"" + let config = "foo test1=1 test2=hello" decoder = do prop1 <- KDL.prop @Text "test2" prop2 <- KDL.prop @Int "test1" @@ -836,7 +840,7 @@ apiSpec = do it "decodes children" $ do let config = "foo { bar test; }" decoder = do - KDL.children $ KDL.node @Node "bar" + fmap scrubFormat . KDL.children $ KDL.node @Node "bar" expected = Node { ann = Nothing @@ -848,7 +852,7 @@ apiSpec = do , format = Nothing } ] - , children = Just NodeList{nodes = [], format = Nothing} + , children = Nothing , format = Nothing } decodeNode "foo" decoder config `shouldBe` Right expected @@ -876,7 +880,7 @@ apiSpec = do it "decodes any value" $ do let config = "foo 1.0 asdf #true" decoder = KDL.document $ do - KDL.argsAtWith "foo" KDL.any + map scrubFormat <$> KDL.argsAtWith "foo" KDL.any val data_ = Value { ann = Nothing diff --git a/test/KDL/DecoderSpec.hs b/test/KDL/DecoderSpec.hs index 2c6af77..9a26b30 100644 --- a/test/KDL/DecoderSpec.hs +++ b/test/KDL/DecoderSpec.hs @@ -6,27 +6,26 @@ import Control.Monad (when) import Data.Text (Text) import Data.Text qualified as Text import KDL qualified -import KDL.TestUtils.Error (decodeErrorMsg) import KDL.Types (Node) import Skeletest import Skeletest.Predicate qualified as P import System.FilePath (()) +decodeErrorMsgSnapshot :: Maybe FilePath -> Predicate IO (Either KDL.DecodeError a) +decodeErrorMsgSnapshot mfile = P.left (KDL.renderDecodeError P.>>> sanitize P.>>> P.matchesSnapshot) + where + sanitize = + case mfile of + Nothing -> id + Just file -> Text.replace (Text.pack file) "test_config.kdl" + spec :: Spec spec = do describe "decodeWith" $ do it "fails with helpful error if parsing fails" $ do - let config = "foo hello= 123" + let config = "foo 123=123" decoder = KDL.document $ KDL.node @Node "foo" - KDL.decodeWith decoder config - `shouldSatisfy` decodeErrorMsg - [ "1:10:" - , " |" - , "1 | foo hello= 123" - , " | ^^" - , "unexpected \"= \"" - , "expecting Node Child, Node Space, or Node Terminator" - ] + KDL.decodeWith decoder config `shouldSatisfy` decodeErrorMsgSnapshot Nothing it "fails with user-defined error" $ do let config = "foo -1" @@ -36,11 +35,7 @@ spec = do when (x < 0) $ do KDL.failM $ "Got negative number: " <> (Text.pack . show) x pure x - KDL.decodeWith decoder config - `shouldSatisfy` decodeErrorMsg - [ "At: foo #0 > arg #0" - , " Got negative number: -1.0" - ] + KDL.decodeWith decoder config `shouldSatisfy` decodeErrorMsgSnapshot Nothing it "shows context in deeply nested error" $ do let config = "foo; foo { bar { baz; baz; baz; baz a=1; }; }" @@ -50,27 +45,14 @@ spec = do . (KDL.many . KDL.nodeWith "bar" . KDL.children) . (KDL.many . KDL.nodeWith "baz") $ KDL.optional (KDL.prop @Text "a") - KDL.decodeWith decoder config - `shouldSatisfy` decodeErrorMsg - [ "At: foo #1 > bar #0 > baz #3 > prop a" - , " Expected text, got: 1" - ] + KDL.decodeWith decoder config `shouldSatisfy` decodeErrorMsgSnapshot Nothing describe "decodeFileWith" $ do it "fails with helpful error if parsing fails" $ do FixtureKdlFile file <- getFixture - writeFile file "foo hello= 123" + writeFile file "foo 123=123" let decoder = KDL.document $ KDL.node @Node "foo" - KDL.decodeFileWith decoder file - `shouldSatisfy` (P.returns . decodeErrorMsg) - [ "Failed to decode " <> Text.pack file <> ":" - , "1:10:" - , " |" - , "1 | foo hello= 123" - , " | ^^" - , "unexpected \"= \"" - , "expecting Node Child, Node Space, or Node Terminator" - ] + KDL.decodeFileWith decoder file `shouldSatisfy` P.returns (decodeErrorMsgSnapshot (Just file)) it "fails with user-defined error" $ do FixtureKdlFile file <- getFixture @@ -81,12 +63,7 @@ spec = do when (x < 0) $ do KDL.failM $ "Got negative number: " <> (Text.pack . show) x pure x - KDL.decodeFileWith decoder file - `shouldSatisfy` (P.returns . decodeErrorMsg) - [ "Failed to decode " <> Text.pack file <> ":" - , "At: foo #0 > arg #0" - , " Got negative number: -1.0" - ] + KDL.decodeFileWith decoder file `shouldSatisfy` P.returns (decodeErrorMsgSnapshot (Just file)) it "shows context in deeply nested error" $ do FixtureKdlFile file <- getFixture @@ -97,12 +74,7 @@ spec = do . (KDL.many . KDL.nodeWith "bar" . KDL.children) . (KDL.many . KDL.nodeWith "baz") $ KDL.optional (KDL.prop @Text "a") - KDL.decodeFileWith decoder file - `shouldSatisfy` (P.returns . decodeErrorMsg) - [ "Failed to decode " <> Text.pack file <> ":" - , "At: foo #1 > bar #0 > baz #3 > prop a" - , " Expected text, got: 1" - ] + KDL.decodeFileWith decoder file `shouldSatisfy` P.returns (decodeErrorMsgSnapshot (Just file)) newtype FixtureKdlFile = FixtureKdlFile FilePath diff --git a/test/KDL/ParserSpec.hs b/test/KDL/ParserSpec.hs index 849a2fc..5f9031c 100644 --- a/test/KDL/ParserSpec.hs +++ b/test/KDL/ParserSpec.hs @@ -3,111 +3,53 @@ module KDL.ParserSpec (spec) where -import Data.Text (Text) -import Data.Text qualified as Text -import KDL.Parser -import KDL.Types ( - Entry (..), - Identifier (..), - Node (..), - NodeList (..), - Value (..), - ValueData (..), - ) +import Control.Monad (forM_) +import Data.Text.IO qualified as Text +import KDL qualified import Skeletest import Skeletest.Predicate qualified as P +import System.Directory (findExecutable, listDirectory) +import System.FilePath (takeExtension, ()) import System.IO.Temp (withSystemTempDirectory) +import System.Process (callProcess) spec :: Spec spec = do + let exampleConfig = "foo hello=world 1.0 { bar; }" + describe "parse" $ do it "parses a KDL document" $ do - let expected = - newNodeList - [ newNode - "foo" - [ newArg $ Number 1.0 - , newProp "hello" $ Text "world" - ] - ( Just - [ newNode "bar" [] (Just []) - ] - ) - ] - parse "foo hello=world 1.0 { bar; }" `shouldBe` Right expected + KDL.parse exampleConfig `shouldSatisfy` P.right P.matchesSnapshot + + describe "error messages" $ do + let test msg input = do + it msg $ KDL.parse input `shouldSatisfy` P.left P.matchesSnapshot - it "returns a textual error on parse failure" $ do - let msg = - Text.intercalate "\n" $ - [ "1:10:" - , " |" - , "1 | foo hello= 123" - , " | ^^" - , "unexpected \"= \"" - , "expecting Node Child, Node Space, or Node Terminator" - ] - parse "foo hello= 123" `shouldBe` Left msg + -- TODO: add more + test "Unquoted numeric prop name" "foo 123=123" + -- Most behavior tested in `parse` tests describe "parseFile" $ do it "parses a KDL document from a filepath" $ do withSystemTempDirectory "" $ \tmpdir -> do let file = tmpdir ++ "/test.kdl" - writeFile file "foo hello=world 1.0 { bar; }" - let expected = - newNodeList - [ newNode - "foo" - [ newArg $ Number 1.0 - , newProp "hello" $ Text "world" - ] - ( Just - [ newNode "bar" [] (Just []) - ] - ) - ] - parseFile file `shouldSatisfy` P.returns (P.right (P.eq expected)) - -{----- Helpers -----} - -newNodeList :: [Node] -> NodeList -newNodeList nodes = - NodeList - { nodes = nodes - , format = Nothing - } - -newNode :: Text -> [Entry] -> Maybe [Node] -> Node -newNode name entries children = - Node - { ann = Nothing - , name = newIdentifier name - , entries = entries - , children = newNodeList <$> children - , format = Nothing - } - -newArg :: ValueData -> Entry -newArg = newEntry Nothing - -newProp :: Text -> ValueData -> Entry -newProp = newEntry . Just - -newEntry :: Maybe Text -> ValueData -> Entry -newEntry mName data_ = - Entry - { name = newIdentifier <$> mName - , value = - Value - { ann = Nothing - , data_ = data_ - , format = Nothing - } - , format = Nothing - } - -newIdentifier :: Text -> Identifier -newIdentifier value = - Identifier - { value = value - , format = Nothing - } + Text.writeFile file exampleConfig + Right actual <- KDL.parseFile file + Right expected <- pure $ KDL.parse exampleConfig + -- tested in `parse` + actual `shouldBe` expected + + describe "kdl-test examples" $ do + it "decodes correctly" $ do + decoder <- findExecutable "kdl-hs-test-decoder" >>= maybe (error "Could not find kdl-hs-test-decoder") pure + callProcess "scripts/kdl-test" ["run", "--decoder", decoder] + + it "roundtrips successfully" $ do + FixtureTmpDir tmpdir <- getFixture + let dir = tmpdir "kdl-examples" + callProcess "scripts/kdl-test" ["extract", "--dir", dir] + files <- filter ((== ".kdl") . takeExtension) <$> listDirectory (dir "valid") + forM_ files $ \file -> do + context file $ do + content <- Text.readFile (dir "valid" file) + (fmap KDL.render . KDL.parse) content `shouldBe` Right content diff --git a/test/KDL/TestUtils/AST.hs b/test/KDL/TestUtils/AST.hs new file mode 100644 index 0000000..c785ff2 --- /dev/null +++ b/test/KDL/TestUtils/AST.hs @@ -0,0 +1,51 @@ +{-# LANGUAGE RecordWildCards #-} + +module KDL.TestUtils.AST ( + scrubFormat, +) where + +import KDL.Types + +class ScrubFormat a where + scrubFormat :: a -> a +instance ScrubFormat NodeList where + scrubFormat NodeList{..} = + NodeList + { nodes = map scrubFormat nodes + , format = Nothing + } +instance ScrubFormat Node where + scrubFormat Node{..} = + Node + { ann = scrubFormat <$> ann + , name = scrubFormat name + , entries = map scrubFormat entries + , children = scrubFormat <$> children + , format = Nothing + } +instance ScrubFormat Entry where + scrubFormat Entry{..} = + Entry + { name = scrubFormat <$> name + , value = scrubFormat value + , format = Nothing + } +instance ScrubFormat Value where + scrubFormat Value{..} = + Value + { ann = scrubFormat <$> ann + , data_ = data_ + , format = Nothing + } +instance ScrubFormat Ann where + scrubFormat Ann{..} = + Ann + { identifier = scrubFormat identifier + , format = Nothing + } +instance ScrubFormat Identifier where + scrubFormat Identifier{..} = + Identifier + { value = value + , format = Nothing + } diff --git a/test/KDL/__snapshots__/DecoderSpec.snap.md b/test/KDL/__snapshots__/DecoderSpec.snap.md new file mode 100644 index 0000000..0650b30 --- /dev/null +++ b/test/KDL/__snapshots__/DecoderSpec.snap.md @@ -0,0 +1,53 @@ +# test/KDL/DecoderSpec.hs + +## decodeFileWith / fails with helpful error if parsing fails + +``` +test_config.kdl:1:8: + | +1 | foo 123=123 + | ^ +unexpected '=' +expecting children block, decimal point, end of node, exponent, or node prop or arg +``` + +## decodeFileWith / fails with user-defined error + +``` +Failed to decode test_config.kdl: +At: foo #0 > arg #0 + Got negative number: -1.0 +``` + +## decodeFileWith / shows context in deeply nested error + +``` +Failed to decode test_config.kdl: +At: foo #1 > bar #0 > baz #3 > prop a + Expected text, got: 1 +``` + +## decodeWith / fails with helpful error if parsing fails + +``` +1:8: + | +1 | foo 123=123 + | ^ +unexpected '=' +expecting children block, decimal point, end of node, exponent, or node prop or arg +``` + +## decodeWith / fails with user-defined error + +``` +At: foo #0 > arg #0 + Got negative number: -1.0 +``` + +## decodeWith / shows context in deeply nested error + +``` +At: foo #1 > bar #0 > baz #3 > prop a + Expected text, got: 1 +``` diff --git a/test/KDL/__snapshots__/ParserSpec.snap.md b/test/KDL/__snapshots__/ParserSpec.snap.md new file mode 100644 index 0000000..ee5097e --- /dev/null +++ b/test/KDL/__snapshots__/ParserSpec.snap.md @@ -0,0 +1,94 @@ +# test/KDL/ParserSpec.hs + +## parse / error messages / Unquoted numeric prop name + +``` +1:8: + | +1 | foo 123=123 + | ^ +unexpected '=' +expecting children block, decimal point, end of node, exponent, or node prop or arg +``` + +## parse / parses a KDL document + +```haskell +NodeList + { nodes = + [ Node + { ann = Nothing + , name = + Identifier + { value = "foo" , format = Just IdentifierFormat { repr = "foo" } } + , entries = + [ Entry + { name = + Just + Identifier + { value = "hello" + , format = Just IdentifierFormat { repr = "hello" } + } + , value = + Value + { ann = Nothing + , data_ = Text "world" + , format = Just ValueFormat { repr = "world" } + } + , format = + Just + EntryFormat + { leading = " " , afterKey = "" , afterEq = "" , trailing = "" } + } + , Entry + { name = Nothing + , value = + Value + { ann = Nothing + , data_ = Number 1.0 + , format = Just ValueFormat { repr = "1.0" } + } + , format = + Just + EntryFormat + { leading = " " , afterKey = "" , afterEq = "" , trailing = "" } + } + ] + , children = + Just + NodeList + { nodes = + [ Node + { ann = Nothing + , name = + Identifier + { value = "bar" , format = Just IdentifierFormat { repr = "bar" } } + , entries = [] + , children = Nothing + , format = + Just + NodeFormat + { leading = " " + , beforeChildren = "" + , beforeTerminator = "" + , terminator = ";" + , trailing = "" + } + } + ] + , format = Just NodeListFormat { leading = "" , trailing = " " } + } + , format = + Just + NodeFormat + { leading = "" + , beforeChildren = " " + , beforeTerminator = "" + , terminator = "" + , trailing = "" + } + } + ] + , format = Just NodeListFormat { leading = "" , trailing = "" } + } +``` diff --git a/test/Main.hs b/test/Main.hs index 237dc43..8609306 100644 --- a/test/Main.hs +++ b/test/Main.hs @@ -1 +1,23 @@ +{-# LANGUAGE AllowAmbiguousTypes #-} +{-# LANGUAGE OverloadedStrings #-} + +import Data.Text qualified as Text +import Data.Typeable (Typeable) +import KDL qualified import Skeletest.Main +import Text.Show.Pretty (ppShow) + +snapshotRenderers :: [SnapshotRenderer] +snapshotRenderers = + [ hsRender @KDL.NodeList + , hsRender @KDL.Node + , hsRender @KDL.Entry + , hsRender @KDL.Value + ] + where + hsRender :: forall a. (Show a, Typeable a) => SnapshotRenderer + hsRender = + SnapshotRenderer + { render = Text.pack . ppShow @a + , snapshotLang = Just "haskell" + } diff --git a/test/kdl-hs-test-decoder.hs b/test/kdl-hs-test-decoder.hs new file mode 100644 index 0000000..cd8782d --- /dev/null +++ b/test/kdl-hs-test-decoder.hs @@ -0,0 +1,74 @@ +{-# LANGUAGE LambdaCase #-} +{-# LANGUAGE OverloadedRecordDot #-} +{-# LANGUAGE OverloadedStrings #-} + +import Data.Aeson ((.=)) +import Data.Aeson qualified as Aeson +import Data.ByteString.Lazy.Char8 qualified as ByteStringL +import Data.Either (partitionEithers) +import Data.Map qualified as Map +import Data.Scientific qualified as Scientific +import Data.Text qualified as Text +import Data.Text.IO qualified as Text +import KDL qualified +import System.Exit (exitFailure) +import System.IO (stderr) + +main :: IO () +main = do + input <- Text.getContents + doc <- + case KDL.parse input of + Right doc -> pure doc + Left e -> do + Text.hPutStrLn stderr e + exitFailure + ByteStringL.putStrLn . Aeson.encode $ encodeNodeList doc + +encodeNodeList :: KDL.NodeList -> Aeson.Value +encodeNodeList nodes = Aeson.toJSON . map encodeNode $ nodes.nodes + +encodeNode :: KDL.Node -> Aeson.Value +encodeNode node = + Aeson.object + [ "type" .= (encodeAnn <$> node.ann) + , "name" .= encodeIdentifier node.name + , "args" .= args + , "props" .= Map.fromList props + , "children" .= maybe (Aeson.Array mempty) encodeNodeList node.children + ] + where + (args, props) = + partitionEithers + [ case entry.name of + Nothing -> Left val + Just name -> Right (name.value, val) + | entry <- node.entries + , let val = encodeEntry entry + ] + +encodeEntry :: KDL.Entry -> Aeson.Value +encodeEntry entry = + Aeson.object + [ "type" .= (encodeAnn <$> entry.value.ann) + , "value" .= encodeValueData entry.value.data_ + ] + +encodeValueData :: KDL.ValueData -> Aeson.Value +encodeValueData = \case + KDL.Text s -> val "string" (Text.unpack s) + KDL.Number x -> val "number" (Scientific.formatScientific Scientific.Fixed Nothing x) + KDL.Bool x -> val "boolean" (if x then "true" else "false") + KDL.Inf -> val "number" "inf" + KDL.NegInf -> val "number" "-inf" + KDL.NaN -> val "number" "nan" + KDL.Null -> Aeson.object ["type" .= Text.pack "null"] + where + val :: String -> String -> Aeson.Value + val ty v = Aeson.object ["type" .= ty, "value" .= v] + +encodeAnn :: KDL.Ann -> Aeson.Value +encodeAnn = encodeIdentifier . (.identifier) + +encodeIdentifier :: KDL.Identifier -> Aeson.Value +encodeIdentifier = Aeson.toJSON . (.value)