From c8e97a9e6ebfefec0db5bc51bd095b3d10dfd078 Mon Sep 17 00:00:00 2001 From: Matthew Hall Date: Fri, 17 Feb 2023 23:45:26 +1300 Subject: Lexing and parsing overhaul --- DSL/BaseParsers.hs | 19 +++++-- DSL/Parsing.hs | 142 ++++++++++++++++++++++++++--------------------------- DSL/Types.hs | 16 +++--- 3 files changed, 95 insertions(+), 82 deletions(-) (limited to 'DSL') diff --git a/DSL/BaseParsers.hs b/DSL/BaseParsers.hs index 403ca42..b4b5234 100644 --- a/DSL/BaseParsers.hs +++ b/DSL/BaseParsers.hs @@ -69,6 +69,13 @@ infixl 3 `alt` alt :: Parser t a -> Parser t a -> Parser t a alt (Parser p) (Parser q) = Parser $ \ inp -> p inp <|> q inp +chain :: Parser s [t] -> Parser t a -> Parser s a +chain p q = do + ts <- p + case parse q ts of + Nothing -> flunk + Just (_, x) -> result x + ----------------------------------------------------------------------------- phrase :: Parser t a -> Parser t a @@ -77,8 +84,14 @@ phrase = (<* eof) recover :: a -> Parser t a -> Parser t a recover x p = p <|> pure x -optional :: Parser t a -> Parser t (Maybe a) -optional = recover Nothing . fmap Just +optionalMaybe :: Parser t a -> Parser t (Maybe a) +optionalMaybe = recover Nothing . fmap Just + +optionalEither :: e -> Parser t a -> Parser t (Either e a) +optionalEither e = recover (Left e) . fmap Right + +optional :: Parser t a -> Parser t () +optional p = () <$ optionalMaybe p assert :: (a -> Bool) -> Parser t a -> Parser t a assert f p = do @@ -108,7 +121,7 @@ atLeast :: Int -> Parser t a -> Parser t [a] atLeast i p = exactly i p <> mult p atMost :: Int -> Parser t a -> Parser t [a] -atMost i = fmap catMaybes . exactly i . optional +atMost i = fmap catMaybes . exactly i . optionalMaybe between :: Int -> Int -> Parser t a -> Parser t [a] between x y p = exactly (min x y) p <> atMost (abs $ x - y) p diff --git a/DSL/Parsing.hs b/DSL/Parsing.hs index 0b07679..ef7da9e 100644 --- a/DSL/Parsing.hs +++ b/DSL/Parsing.hs @@ -5,91 +5,94 @@ import Data.Char (isDigit, isSpace) import DSL.Types import DSL.BaseParsers -buildDSLLexer :: (Parser Char [Char]) -> ([Char] -> TokenData) -> DSLLexer -buildDSLLexer p f = do +buildDSLLexer :: Parser Char [Char] -> TokenTag -> DSLLexer +buildDSLLexer p t = do str <- p - return DSLToken { tStr=str - , tData=f str - } + return Token { tStr=str, tTag=t } -keywordL :: String -> TokenData -> DSLLexer -keywordL s d = buildDSLLexer (list s) (const d) +fromStringL :: String -> TokenTag -> DSLLexer +fromStringL s t = buildDSLLexer (list s) t -intrinsicL :: String -> Intrinsic -> DSLLexer -intrinsicL s i = keywordL s $ T_INTRINSIC i +fromTableL :: [(String, TokenTag)] -> DSLLexer +fromTableL table = firstOf $ map (uncurry fromStringL) table wsL :: DSLLexer -wsL = buildDSLLexer (mult1 $ satisfy isSpace) (const T_WHITESPACE) +wsL = buildDSLLexer (mult1 $ satisfy isSpace) T_WHITESPACE boolLiteralL :: DSLLexer boolLiteralL = t `alt` f where - t = buildDSLLexer (list "true") (const $ T_BOOL_LITERAL True) - f = buildDSLLexer (list "false") (const $ T_BOOL_LITERAL False) + t = buildDSLLexer (list "true") T_BOOL_LITERAL + f = buildDSLLexer (list "false") T_BOOL_LITERAL intLiteralL :: DSLLexer -intLiteralL = buildDSLLexer go (T_INT_LITERAL . read) +intLiteralL = buildDSLLexer go T_INT_LITERAL where go = do - sign <- optional $ token '-' + sign <- optionalMaybe $ token '-' digits <- mult1 $ satisfy isDigit result $ maybe digits (:digits) sign -mainLexer :: Parser Char [DSLToken] -mainLexer = phrase $ mult1 $ firstOf subLexers - where - subLexers = [wsL] - ++ keywords - ++ literals - ++ intrinsics - keywords = map (uncurry keywordL) [ ("IF", T_IF) - , ("ELSE", T_ELSE) - , ("WHILE", T_WHILE) - , ("DO", T_DO) - , ("END", T_END) - ] - literals = [ boolLiteralL - , intLiteralL - ] - intrinsics = map (uncurry intrinsicL) [ (".", I_DUMP) - , ("DROP", I_DROP) - , ("SWAP", I_SWAP) - , ("DUP", I_DUP) - , ("OVER", I_OVER) - , ("ROT", I_ROT) - , ("+", I_PLUS) - , ("-", I_MINUS) - , ("*", I_TIMES) - , ("/%", I_DIVMOD) - , ("!", I_NOT) - , ("&&", I_AND) - , ("||", I_OR) - , ("^", I_XOR) - , ("==", I_EQUAL) - , ("<", I_LESSTHAN) - , (">", I_GREATERTHAN) - ] - -stripWhitespace :: [DSLToken] -> [DSLToken] -stripWhitespace = filter $ not . (==T_WHITESPACE) . tData +keywordL :: DSLLexer +keywordL = fromTableL [ ("IF", T_IF) + , ("ELSE", T_ELSE) + , ("WHILE", T_WHILE) + , ("DO", T_DO) + , ("END", T_END) + ] + +literalL :: DSLLexer +literalL = firstOf [ boolLiteralL + , intLiteralL + ] + +intrinsicL :: DSLLexer +intrinsicL = fromTableL [ (".", T_INTRINSIC I_DUMP) + , ("DROP", T_INTRINSIC I_DROP) + , ("SWAP", T_INTRINSIC I_SWAP) + , ("DUP", T_INTRINSIC I_DUP) + , ("OVER", T_INTRINSIC I_OVER) + , ("ROT", T_INTRINSIC I_ROT) + , ("+", T_INTRINSIC I_PLUS) + , ("-", T_INTRINSIC I_MINUS) + , ("*", T_INTRINSIC I_TIMES) + , ("/%", T_INTRINSIC I_DIVMOD) + , ("!", T_INTRINSIC I_NOT) + , ("&&", T_INTRINSIC I_AND) + , ("||", T_INTRINSIC I_OR) + , ("^", T_INTRINSIC I_XOR) + , ("==", T_INTRINSIC I_EQUAL) + , ("<", T_INTRINSIC I_LESSTHAN) + , (">", T_INTRINSIC I_GREATERTHAN) + ] + +lexemeL :: DSLLexer +lexemeL = firstOf [ keywordL + , literalL + , intrinsicL + ] + +tokenizer :: Parser Char [Token] +tokenizer = optional wsL *> lexemeL `sepBy` wsL <* optional wsL ------------------------------------------------------------------------------ -- Parsing ------------------------------------------------------------------------------ -tokenDataP :: TokenData -> DSLParser DSLToken -tokenDataP t = satisfy $ (==t) . tData +tagP :: TokenTag -> DSLParser Token +tagP t = satisfy $ (==t) . tTag wsP :: DSLParser () -wsP = () <$ tokenDataP T_WHITESPACE +wsP = () <$ tagP T_WHITESPACE dataLiteralP :: DSLParser StackData dataLiteralP = do - t <- anyToken - case tData t of - T_INT_LITERAL x -> result $ StackInt x - T_BOOL_LITERAL x -> result $ StackBool x - _ -> flunk + Token { tTag=t, tStr=s } <- anyToken + case (t, s) of + (T_BOOL_LITERAL, "true") -> result $ StackBool True + (T_BOOL_LITERAL, "false") -> result $ StackBool False + (T_INT_LITERAL, _) -> result $ StackInt $ read s + _ -> flunk pushDataP :: DSLParser Operation pushDataP = OpPushData <$> dataLiteralP @@ -97,7 +100,7 @@ pushDataP = OpPushData <$> dataLiteralP intrinsicP :: DSLParser Operation intrinsicP = do t <- anyToken - case tData t of + case tTag t of T_INTRINSIC i -> result $ OpIntrinsic i _ -> flunk @@ -111,21 +114,21 @@ linearP = BLinear <$> mult1 operationP ifP :: DSLParser Block ifP = do - c <- tokenDataP T_IF *> mult blockP <* tokenDataP T_DO - b <- mult blockP <* tokenDataP T_END + c <- tagP T_IF *> mult blockP <* tagP T_DO + b <- mult blockP <* tagP T_END return $ BIf c b ifElseP :: DSLParser Block ifElseP = do - c <- tokenDataP T_IF *> mult blockP <* tokenDataP T_DO - b1 <- mult blockP <* tokenDataP T_ELSE - b2 <- mult blockP <* tokenDataP T_END + c <- tagP T_IF *> mult blockP <* tagP T_DO + b1 <- mult blockP <* tagP T_ELSE + b2 <- mult blockP <* tagP T_END return $ BIfElse c b1 b2 whileP :: DSLParser Block whileP = do - c <- tokenDataP T_WHILE *> mult blockP <* tokenDataP T_DO - b <- mult blockP <* tokenDataP T_END + c <- tagP T_WHILE *> mult blockP <* tagP T_DO + b <- mult blockP <* tagP T_END return $ BWhile c b blockP :: DSLParser Block @@ -139,8 +142,5 @@ programP :: DSLParser Program programP = phrase $ mult1 blockP stringToProgram :: String -> Maybe Program -stringToProgram str = do - (_, tokens) <- parse mainLexer str - (_, program) <- parse programP $ stripWhitespace tokens - return program +stringToProgram = fmap snd . parse (chain tokenizer programP) diff --git a/DSL/Types.hs b/DSL/Types.hs index 2c33aad..8d59a06 100644 --- a/DSL/Types.hs +++ b/DSL/Types.hs @@ -63,22 +63,22 @@ data Machine = Machine { ok :: Bool , stack :: Stack } -data TokenData +data TokenTag = T_WHITESPACE | T_IF | T_ELSE | T_WHILE | T_DO | T_END - | T_BOOL_LITERAL Bool - | T_INT_LITERAL Integer + | T_BOOL_LITERAL + | T_INT_LITERAL | T_INTRINSIC Intrinsic deriving (Show, Eq) -data DSLToken = DSLToken { tStr :: String -- original text - , tData :: TokenData -- actual data - } deriving (Show) +data Token = Token { tStr :: String -- original text + , tTag :: TokenTag -- actual data + } deriving (Show) -type DSLLexer = Parser Char DSLToken -type DSLParser = Parser DSLToken +type DSLLexer = Parser Char Token +type DSLParser = Parser Token -- cgit v1.2.1