summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMatthew Hall <hallmatthew314@gmail.com>2023-02-17 23:45:26 +1300
committerMatthew Hall <hallmatthew314@gmail.com>2023-02-17 23:45:26 +1300
commitc8e97a9e6ebfefec0db5bc51bd095b3d10dfd078 (patch)
treeb89d2f152e765dc23724a046915d41e0379f0a5d
parent342ba2c6d5e738f4ad5eb03a297a561ce43d6b5f (diff)
Lexing and parsing overhaul
-rw-r--r--DSL/BaseParsers.hs19
-rw-r--r--DSL/Parsing.hs142
-rw-r--r--DSL/Types.hs16
3 files changed, 95 insertions, 82 deletions
diff --git a/DSL/BaseParsers.hs b/DSL/BaseParsers.hs
index 403ca42..b4b5234 100644
--- a/DSL/BaseParsers.hs
+++ b/DSL/BaseParsers.hs
@@ -69,6 +69,13 @@ infixl 3 `alt`
alt :: Parser t a -> Parser t a -> Parser t a
alt (Parser p) (Parser q) = Parser $ \ inp -> p inp <|> q inp
+chain :: Parser s [t] -> Parser t a -> Parser s a
+chain p q = do
+ ts <- p
+ case parse q ts of
+ Nothing -> flunk
+ Just (_, x) -> result x
+
-----------------------------------------------------------------------------
phrase :: Parser t a -> Parser t a
@@ -77,8 +84,14 @@ phrase = (<* eof)
recover :: a -> Parser t a -> Parser t a
recover x p = p <|> pure x
-optional :: Parser t a -> Parser t (Maybe a)
-optional = recover Nothing . fmap Just
+optionalMaybe :: Parser t a -> Parser t (Maybe a)
+optionalMaybe = recover Nothing . fmap Just
+
+optionalEither :: e -> Parser t a -> Parser t (Either e a)
+optionalEither e = recover (Left e) . fmap Right
+
+optional :: Parser t a -> Parser t ()
+optional p = () <$ optionalMaybe p
assert :: (a -> Bool) -> Parser t a -> Parser t a
assert f p = do
@@ -108,7 +121,7 @@ atLeast :: Int -> Parser t a -> Parser t [a]
atLeast i p = exactly i p <> mult p
atMost :: Int -> Parser t a -> Parser t [a]
-atMost i = fmap catMaybes . exactly i . optional
+atMost i = fmap catMaybes . exactly i . optionalMaybe
between :: Int -> Int -> Parser t a -> Parser t [a]
between x y p = exactly (min x y) p <> atMost (abs $ x - y) p
diff --git a/DSL/Parsing.hs b/DSL/Parsing.hs
index 0b07679..ef7da9e 100644
--- a/DSL/Parsing.hs
+++ b/DSL/Parsing.hs
@@ -5,91 +5,94 @@ import Data.Char (isDigit, isSpace)
import DSL.Types
import DSL.BaseParsers
-buildDSLLexer :: (Parser Char [Char]) -> ([Char] -> TokenData) -> DSLLexer
-buildDSLLexer p f = do
+buildDSLLexer :: Parser Char [Char] -> TokenTag -> DSLLexer
+buildDSLLexer p t = do
str <- p
- return DSLToken { tStr=str
- , tData=f str
- }
+ return Token { tStr=str, tTag=t }
-keywordL :: String -> TokenData -> DSLLexer
-keywordL s d = buildDSLLexer (list s) (const d)
+fromStringL :: String -> TokenTag -> DSLLexer
+fromStringL s t = buildDSLLexer (list s) t
-intrinsicL :: String -> Intrinsic -> DSLLexer
-intrinsicL s i = keywordL s $ T_INTRINSIC i
+fromTableL :: [(String, TokenTag)] -> DSLLexer
+fromTableL table = firstOf $ map (uncurry fromStringL) table
wsL :: DSLLexer
-wsL = buildDSLLexer (mult1 $ satisfy isSpace) (const T_WHITESPACE)
+wsL = buildDSLLexer (mult1 $ satisfy isSpace) T_WHITESPACE
boolLiteralL :: DSLLexer
boolLiteralL = t `alt` f
where
- t = buildDSLLexer (list "true") (const $ T_BOOL_LITERAL True)
- f = buildDSLLexer (list "false") (const $ T_BOOL_LITERAL False)
+ t = buildDSLLexer (list "true") T_BOOL_LITERAL
+ f = buildDSLLexer (list "false") T_BOOL_LITERAL
intLiteralL :: DSLLexer
-intLiteralL = buildDSLLexer go (T_INT_LITERAL . read)
+intLiteralL = buildDSLLexer go T_INT_LITERAL
where
go = do
- sign <- optional $ token '-'
+ sign <- optionalMaybe $ token '-'
digits <- mult1 $ satisfy isDigit
result $ maybe digits (:digits) sign
-mainLexer :: Parser Char [DSLToken]
-mainLexer = phrase $ mult1 $ firstOf subLexers
- where
- subLexers = [wsL]
- ++ keywords
- ++ literals
- ++ intrinsics
- keywords = map (uncurry keywordL) [ ("IF", T_IF)
- , ("ELSE", T_ELSE)
- , ("WHILE", T_WHILE)
- , ("DO", T_DO)
- , ("END", T_END)
- ]
- literals = [ boolLiteralL
- , intLiteralL
- ]
- intrinsics = map (uncurry intrinsicL) [ (".", I_DUMP)
- , ("DROP", I_DROP)
- , ("SWAP", I_SWAP)
- , ("DUP", I_DUP)
- , ("OVER", I_OVER)
- , ("ROT", I_ROT)
- , ("+", I_PLUS)
- , ("-", I_MINUS)
- , ("*", I_TIMES)
- , ("/%", I_DIVMOD)
- , ("!", I_NOT)
- , ("&&", I_AND)
- , ("||", I_OR)
- , ("^", I_XOR)
- , ("==", I_EQUAL)
- , ("<", I_LESSTHAN)
- , (">", I_GREATERTHAN)
- ]
-
-stripWhitespace :: [DSLToken] -> [DSLToken]
-stripWhitespace = filter $ not . (==T_WHITESPACE) . tData
+keywordL :: DSLLexer
+keywordL = fromTableL [ ("IF", T_IF)
+ , ("ELSE", T_ELSE)
+ , ("WHILE", T_WHILE)
+ , ("DO", T_DO)
+ , ("END", T_END)
+ ]
+
+literalL :: DSLLexer
+literalL = firstOf [ boolLiteralL
+ , intLiteralL
+ ]
+
+intrinsicL :: DSLLexer
+intrinsicL = fromTableL [ (".", T_INTRINSIC I_DUMP)
+ , ("DROP", T_INTRINSIC I_DROP)
+ , ("SWAP", T_INTRINSIC I_SWAP)
+ , ("DUP", T_INTRINSIC I_DUP)
+ , ("OVER", T_INTRINSIC I_OVER)
+ , ("ROT", T_INTRINSIC I_ROT)
+ , ("+", T_INTRINSIC I_PLUS)
+ , ("-", T_INTRINSIC I_MINUS)
+ , ("*", T_INTRINSIC I_TIMES)
+ , ("/%", T_INTRINSIC I_DIVMOD)
+ , ("!", T_INTRINSIC I_NOT)
+ , ("&&", T_INTRINSIC I_AND)
+ , ("||", T_INTRINSIC I_OR)
+ , ("^", T_INTRINSIC I_XOR)
+ , ("==", T_INTRINSIC I_EQUAL)
+ , ("<", T_INTRINSIC I_LESSTHAN)
+ , (">", T_INTRINSIC I_GREATERTHAN)
+ ]
+
+lexemeL :: DSLLexer
+lexemeL = firstOf [ keywordL
+ , literalL
+ , intrinsicL
+ ]
+
+tokenizer :: Parser Char [Token]
+tokenizer = optional wsL *> lexemeL `sepBy` wsL <* optional wsL
------------------------------------------------------------------------------
-- Parsing
------------------------------------------------------------------------------
-tokenDataP :: TokenData -> DSLParser DSLToken
-tokenDataP t = satisfy $ (==t) . tData
+tagP :: TokenTag -> DSLParser Token
+tagP t = satisfy $ (==t) . tTag
wsP :: DSLParser ()
-wsP = () <$ tokenDataP T_WHITESPACE
+wsP = () <$ tagP T_WHITESPACE
dataLiteralP :: DSLParser StackData
dataLiteralP = do
- t <- anyToken
- case tData t of
- T_INT_LITERAL x -> result $ StackInt x
- T_BOOL_LITERAL x -> result $ StackBool x
- _ -> flunk
+ Token { tTag=t, tStr=s } <- anyToken
+ case (t, s) of
+ (T_BOOL_LITERAL, "true") -> result $ StackBool True
+ (T_BOOL_LITERAL, "false") -> result $ StackBool False
+ (T_INT_LITERAL, _) -> result $ StackInt $ read s
+ _ -> flunk
pushDataP :: DSLParser Operation
pushDataP = OpPushData <$> dataLiteralP
@@ -97,7 +100,7 @@ pushDataP = OpPushData <$> dataLiteralP
intrinsicP :: DSLParser Operation
intrinsicP = do
t <- anyToken
- case tData t of
+ case tTag t of
T_INTRINSIC i -> result $ OpIntrinsic i
_ -> flunk
@@ -111,21 +114,21 @@ linearP = BLinear <$> mult1 operationP
ifP :: DSLParser Block
ifP = do
- c <- tokenDataP T_IF *> mult blockP <* tokenDataP T_DO
- b <- mult blockP <* tokenDataP T_END
+ c <- tagP T_IF *> mult blockP <* tagP T_DO
+ b <- mult blockP <* tagP T_END
return $ BIf c b
ifElseP :: DSLParser Block
ifElseP = do
- c <- tokenDataP T_IF *> mult blockP <* tokenDataP T_DO
- b1 <- mult blockP <* tokenDataP T_ELSE
- b2 <- mult blockP <* tokenDataP T_END
+ c <- tagP T_IF *> mult blockP <* tagP T_DO
+ b1 <- mult blockP <* tagP T_ELSE
+ b2 <- mult blockP <* tagP T_END
return $ BIfElse c b1 b2
whileP :: DSLParser Block
whileP = do
- c <- tokenDataP T_WHILE *> mult blockP <* tokenDataP T_DO
- b <- mult blockP <* tokenDataP T_END
+ c <- tagP T_WHILE *> mult blockP <* tagP T_DO
+ b <- mult blockP <* tagP T_END
return $ BWhile c b
blockP :: DSLParser Block
@@ -139,8 +142,5 @@ programP :: DSLParser Program
programP = phrase $ mult1 blockP
stringToProgram :: String -> Maybe Program
-stringToProgram str = do
- (_, tokens) <- parse mainLexer str
- (_, program) <- parse programP $ stripWhitespace tokens
- return program
+stringToProgram = fmap snd . parse (chain tokenizer programP)
diff --git a/DSL/Types.hs b/DSL/Types.hs
index 2c33aad..8d59a06 100644
--- a/DSL/Types.hs
+++ b/DSL/Types.hs
@@ -63,22 +63,22 @@ data Machine = Machine { ok :: Bool
, stack :: Stack
}
-data TokenData
+data TokenTag
= T_WHITESPACE
| T_IF
| T_ELSE
| T_WHILE
| T_DO
| T_END
- | T_BOOL_LITERAL Bool
- | T_INT_LITERAL Integer
+ | T_BOOL_LITERAL
+ | T_INT_LITERAL
| T_INTRINSIC Intrinsic
deriving (Show, Eq)
-data DSLToken = DSLToken { tStr :: String -- original text
- , tData :: TokenData -- actual data
- } deriving (Show)
+data Token = Token { tStr :: String -- original text
+ , tTag :: TokenTag -- actual data
+ } deriving (Show)
-type DSLLexer = Parser Char DSLToken
-type DSLParser = Parser DSLToken
+type DSLLexer = Parser Char Token
+type DSLParser = Parser Token