diff options
| author | Matthew Hall <hallmatthew314@gmail.com> | 2023-02-23 23:05:54 +1300 |
|---|---|---|
| committer | Matthew Hall <hallmatthew314@gmail.com> | 2023-02-23 23:05:54 +1300 |
| commit | 1c2e8f59960c18b5e5794fba214a3f0906fb074d (patch) | |
| tree | 0c1d1a421ad50b5d704d512a328e13aa693deefa | |
| parent | 06692c8e1754ac8d5d671160b839723e3610fcf1 (diff) | |
Parsing overhaul (slightly better errors)
| -rw-r--r-- | DSL.hs | 4 | ||||
| -rw-r--r-- | DSL/BaseParsers.hs | 108 | ||||
| -rw-r--r-- | DSL/Interpretation.hs | 4 | ||||
| -rw-r--r-- | DSL/Parsing.hs | 164 | ||||
| -rw-r--r-- | DSL/StdLib.hs | 7 | ||||
| -rw-r--r-- | DSL/Types.hs | 2 | ||||
| -rw-r--r-- | DSL/Util.hs | 12 |
7 files changed, 173 insertions, 128 deletions
@@ -5,9 +5,7 @@ import DSL.Parsing import DSL.Interpretation interpretFromString :: String -> IO () -interpretFromString = maybe err interpret . stringToProgram - where - err = putStrLn "Unable to parse program" +interpretFromString = either putStrLn interpret . stringToProgram interpretFromFile :: FilePath -> IO () interpretFromFile path = readFile path >>= interpretFromString diff --git a/DSL/BaseParsers.hs b/DSL/BaseParsers.hs index 4da9c38..05eb509 100644 --- a/DSL/BaseParsers.hs +++ b/DSL/BaseParsers.hs @@ -5,9 +5,15 @@ module DSL.BaseParsers where import Control.Applicative (Alternative(..), liftA2) import Control.Monad (MonadPlus(..), guard) import Data.Maybe (catMaybes) +import Data.Void + +data ParserError + = ErrorLeaf String + | ErrorBranch String [ParserError] + deriving (Show) newtype Parser t a = - Parser { parse :: [t] -> Maybe ([t], a) } + Parser { parse :: [t] -> Either ParserError ([t], a) } instance Semigroup a => Semigroup (Parser t a) where (<>) = liftA2 (<>) @@ -33,7 +39,7 @@ instance Monad (Parser t) where parse (f x) inp' instance Alternative (Parser t) where - empty = flunk + empty = flunkWith "Flunked: empty Alternative" (<|>) = alt @@ -42,47 +48,87 @@ instance MonadPlus (Parser t) instance MonadFail (Parser t) where fail _ = mzero +type ErrorChanger t a = String -> Parser t a -> Parser t a + +nicePrintParserError :: ParserError -> String +nicePrintParserError = unlines . go 0 + where + indentSize = 4 + go indent (ErrorLeaf s) = [replicate indent ' ' ++ s] + go indent (ErrorBranch s es) = s':es' + where + s' = replicate indent ' ' ++ s + es' = es >>= go (indent + indentSize) + ----------------------------------------------------------------------------- +perr :: String -> Either ParserError Void +perr = Left . ErrorLeaf + +overwriteError :: ErrorChanger t a +overwriteError s p = Parser $ \ inp -> case parse p inp of + Left _ -> absurd <$> perr s + Right x -> Right x + +setError :: ErrorChanger t a +setError s p = Parser $ \ inp -> case parse p inp of + Left (ErrorLeaf _) -> absurd <$> perr s + Left (ErrorBranch _ es) -> Left $ ErrorBranch s es + Right x -> Right x + +prependError :: ErrorChanger t a +prependError s p = Parser $ \ inp -> case parse p inp of + Left e -> Left $ ErrorBranch s [e] + Right x -> Right x + flunk :: Parser t a -flunk = Parser $ \ _ -> Nothing +flunk = flunkWith "Flunked parser" + +flunkWith :: String -> Parser t a +flunkWith e = Parser $ \ _ -> absurd <$> perr e result :: a -> Parser t a -result x = Parser $ \ inp -> Just (inp, x) +result x = Parser $ \ inp -> Right (inp, x) peek :: Parser t a -> Parser t a -peek p = Parser $ \ inp -> do +peek p = prependError "peek:" $ Parser $ \ inp -> do (_, x) <- parse p inp return (inp, x) anyToken :: Parser t t anyToken = Parser $ \ inp -> case inp of - t:ts -> Just (ts, t) - _ -> Nothing + t:ts -> Right (ts, t) + _ -> absurd <$> perr "anyToken: Expected a token, got EOF" eof :: Parser t () eof = Parser $ \ inp -> case inp of - [] -> Just ([], ()) - _ -> Nothing + [] -> Right ([], ()) + _ -> absurd <$> perr "eof: Expected EOF, got a token" infixl 3 `alt` alt :: Parser t a -> Parser t a -> Parser t a -alt (Parser p) (Parser q) = Parser $ \ inp -> p inp <|> q inp +alt p q = Parser $ \ inp -> case (parse p inp, parse q inp) of + (Right x, _) -> Right x + (_, Right x) -> Right x + (Left e1, Left e2) -> Left $ case (e1, e2) of + (ErrorLeaf s, ErrorBranch b es) -> ErrorBranch b (ErrorLeaf s:es) + (ErrorBranch b es, ErrorLeaf s) -> ErrorBranch b (es ++ [ErrorLeaf s]) + _ -> ErrorBranch "alt:" [e1, e2] chain :: Parser s [t] -> Parser t a -> Parser s a -chain p q = do - ts <- p - case parse q ts of - Nothing -> flunk - Just (_, x) -> result x +chain p q = prependError "chain:" $ Parser $ \ inp1 -> case parse p inp1 of + Left e1 -> Left e1 + Right (inp1', inp2) -> case parse q inp2 of + Left e2 -> Left e2 + Right (_, x) -> Right (inp1', x) ----------------------------------------------------------------------------- phrase :: Parser t a -> Parser t a -phrase = (<* eof) +phrase = prependError "phrase:" . (<* eof) plus :: Parser t a -> Parser t b -> Parser t (a, b) -plus = liftA2 (,) +plus p q = prependError "plus:" $ liftA2 (,) p q recover :: a -> Parser t a -> Parser t a recover x p = p <|> pure x @@ -97,55 +143,55 @@ optional :: Parser t a -> Parser t () optional p = () <$ optionalMaybe p assert :: (a -> Bool) -> Parser t a -> Parser t a -assert f p = do +assert f p = prependError "assert:" $ do x <- p guard $ f x return x satisfy :: (t -> Bool) -> Parser t t -satisfy f = assert f anyToken +satisfy f = prependError "satisfy:" $ assert f anyToken token :: Eq t => t -> Parser t t -token t = satisfy (==t) +token t = prependError "token:" $ satisfy (==t) list :: Eq t => [t] -> Parser t [t] -list = traverse token +list = prependError "list:" . traverse token mult :: Parser t a -> Parser t [a] -mult = many +mult = prependError "mult:" . many mult1 :: Parser t a -> Parser t [a] -mult1 = some +mult1 = prependError "mult1:" . some exactly :: Int -> Parser t a -> Parser t [a] -exactly i = sequence . replicate i +exactly i = prependError "exactly:" . sequence . replicate i atLeast :: Int -> Parser t a -> Parser t [a] -atLeast i p = exactly i p <> mult p +atLeast i p = prependError "atLeast:" $ exactly i p <> mult p atMost :: Int -> Parser t a -> Parser t [a] -atMost i = fmap catMaybes . exactly i . optionalMaybe +atMost i = prependError "atMost" . fmap catMaybes . exactly i . optionalMaybe between :: Int -> Int -> Parser t a -> Parser t [a] -between x y p = exactly (min x y) p <> atMost (abs $ x - y) p +between x y p = prependError "between:" $ exactly (min x y) p <> atMost (abs $ x - y) p stopAt :: Parser t a -> Parser t [t] -stopAt p = go +stopAt p = setError "stopAt: failed to find stop point" $ go where p' = ([] <$ peek p) go = p' <|> liftA2 (:) anyToken go stopAfter :: Parser t a -> Parser t ([t], a) -stopAfter p = (,) <$> stopAt p <*> p +stopAfter p = prependError "stopAfter:" $ stopAt p `plus` p stopIf :: Parser t a -> Parser t [t] stopIf p = stopAt $ () <$ p <|> eof firstOf :: [Parser t a] -> Parser t a -firstOf = foldr1 (<|>) +firstOf = prependError "firstOf:" . foldr1 (<|>) sepBy1 :: Parser t a -> Parser t b -> Parser t [a] -sepBy1 a b = do +sepBy1 a b = prependError "sepBy1:" $ do x <- a xs <- mult $ b >> a return $ x:xs diff --git a/DSL/Interpretation.hs b/DSL/Interpretation.hs index d9c53bd..82a8ef2 100644 --- a/DSL/Interpretation.hs +++ b/DSL/Interpretation.hs @@ -98,8 +98,8 @@ evalBlocks (BWhile c b:bs) m = applyWhile c b m >>= evalBlocks bs interpret :: ProcTable -> IO () interpret t = case mergeProcTables stdlib t of - Nothing -> putStrLn "Failed to include stdlib, duplicate proc definition?" - Just t' -> case M.lookup entrypoint t' of + Left e -> putStrLn e + Right t' -> case M.lookup entrypoint t' of Nothing -> putStrLn "No MAIN proc defined, aborting." Just b -> () <$ evalBlocks b newMachine{ pTable=t' } diff --git a/DSL/Parsing.hs b/DSL/Parsing.hs index d4d1253..028b738 100644 --- a/DSL/Parsing.hs +++ b/DSL/Parsing.hs @@ -18,26 +18,28 @@ fromTableL :: [(String, TokenTag)] -> DSLLexer fromTableL table = firstOf $ map (uncurry fromStringL) table wsL :: DSLLexer -wsL = buildDSLLexer (mult1 $ satisfy isSpace) T_WHITESPACE +wsL = prependError "wsL:" $ buildDSLLexer (mult1 $ satisfy isSpace) T_WHITESPACE commentL :: DSLLexer -commentL = buildDSLLexer go T_COMMENT +commentL = prependError "commentL" $ buildDSLLexer go T_COMMENT where delim = list "'''" go = delim *> (fmap fst $ stopAfter delim) keywordL :: DSLLexer -keywordL = fromTableL [ ("PROC", T_PROC) - , ("IF", T_IF) - , ("ELIF", T_ELIF) - , ("ELSE", T_ELSE) - , ("WHILE", T_WHILE) - , ("DO", T_DO) - , ("END", T_END) - ] +keywordL = prependError "keywordL:" $ fromTableL t + where + t = [ ("PROC", T_PROC) + , ("IF", T_IF) + , ("ELIF", T_ELIF) + , ("ELSE", T_ELSE) + , ("WHILE", T_WHILE) + , ("DO", T_DO) + , ("END", T_END) + ] intLiteralL :: DSLLexer -intLiteralL = buildDSLLexer go T_INT_LITERAL +intLiteralL = prependError "intLiteralL:" $ buildDSLLexer go T_INT_LITERAL where go = do sign <- optionalMaybe $ token '-' @@ -45,59 +47,65 @@ intLiteralL = buildDSLLexer go T_INT_LITERAL result $ maybe digits (:digits) sign boolLiteralL :: DSLLexer -boolLiteralL = f "true" `alt` f "false" +boolLiteralL = prependError "boolLiteralL:" $ f "true" `alt` f "false" where f s = fromStringL s T_BOOL_LITERAL stringLiteralL :: DSLLexer -stringLiteralL = buildDSLLexer go T_STRING_LITERAL +stringLiteralL = prependError "stringLiteralL:" $ buildDSLLexer go T_STRING_LITERAL where go = token '"' *> strChars <* token '"' strChars = concat <$> mult strChar strChar = list ['\\', '"'] `alt` (pure <$> satisfy (/='"')) literalL :: DSLLexer -literalL = firstOf [ intLiteralL - , boolLiteralL - , stringLiteralL - ] +literalL = prependError "literalL:" $ firstOf ps + where + ps = [ intLiteralL + , boolLiteralL + , stringLiteralL + ] identifierL :: DSLLexer -identifierL = buildDSLLexer go T_IDENTIFIER +identifierL = prependError "identifierL:" $ buildDSLLexer go T_IDENTIFIER where go = mult1 $ satisfy $ not . isSpace intrinsicL :: DSLLexer -intrinsicL = fromTableL [ (".", T_INTRINSIC I_DUMP) - , ("DROP", T_INTRINSIC I_DROP) - , ("SWAP", T_INTRINSIC I_SWAP) - , ("DUP", T_INTRINSIC I_DUP) - , ("OVER", T_INTRINSIC I_OVER) - , ("ROT", T_INTRINSIC I_ROT) - , ("+", T_INTRINSIC I_PLUS) - , ("-", T_INTRINSIC I_MINUS) - , ("*", T_INTRINSIC I_TIMES) - , ("/%", T_INTRINSIC I_DIVMOD) - , ("!", T_INTRINSIC I_NOT) - , ("&&", T_INTRINSIC I_AND) - , ("||", T_INTRINSIC I_OR) - , ("^", T_INTRINSIC I_XOR) - , ("++", T_INTRINSIC I_STRCAT) - , ("==", T_INTRINSIC I_EQUAL) - , ("<", T_INTRINSIC I_LESSTHAN) - , (">", T_INTRINSIC I_GREATERTHAN) - ] +intrinsicL = prependError "intrinsicL:" $ fromTableL t + where + t = [ (".", T_INTRINSIC I_DUMP) + , ("DROP", T_INTRINSIC I_DROP) + , ("SWAP", T_INTRINSIC I_SWAP) + , ("DUP", T_INTRINSIC I_DUP) + , ("OVER", T_INTRINSIC I_OVER) + , ("ROT", T_INTRINSIC I_ROT) + , ("+", T_INTRINSIC I_PLUS) + , ("-", T_INTRINSIC I_MINUS) + , ("*", T_INTRINSIC I_TIMES) + , ("/%", T_INTRINSIC I_DIVMOD) + , ("!", T_INTRINSIC I_NOT) + , ("&&", T_INTRINSIC I_AND) + , ("||", T_INTRINSIC I_OR) + , ("^", T_INTRINSIC I_XOR) + , ("++", T_INTRINSIC I_STRCAT) + , ("==", T_INTRINSIC I_EQUAL) + , ("<", T_INTRINSIC I_LESSTHAN) + , (">", T_INTRINSIC I_GREATERTHAN) + ] lexemeL :: DSLLexer -lexemeL = firstOf [ commentL - , keywordL - , literalL - , intrinsicL - , identifierL - ] +lexemeL = prependError "lexemeL:" $ firstOf ls + where + ls = [ commentL + , keywordL + , literalL + , intrinsicL + , identifierL + ] tokenizer :: Parser Char [Token] -tokenizer = filter f <$> go +tokenizer = prependError "tokenizer:" $ filter f <$> go where go = optional wsL *> lexemeL `sepBy` wsL <* optional wsL f Token { tTag=t } = t /= T_COMMENT @@ -107,13 +115,13 @@ tokenizer = filter f <$> go ------------------------------------------------------------------------------ tagP :: TokenTag -> DSLParser Token -tagP t = satisfy $ (==t) . tTag +tagP t = prependError "tagP:" $ satisfy $ (==t) . tTag wsP :: DSLParser () -wsP = () <$ tagP T_WHITESPACE +wsP = prependError "wsP:" $ () <$ tagP T_WHITESPACE dataLiteralP :: DSLParser StackData -dataLiteralP = do +dataLiteralP = prependError "dataLiteralP:" $ do Token { tTag=t, tStr=s } <- anyToken case (t, s) of (T_BOOL_LITERAL, "true") -> result $ StackBool True @@ -123,29 +131,31 @@ dataLiteralP = do _ -> flunk pushDataP :: DSLParser Operation -pushDataP = OpPushData <$> dataLiteralP +pushDataP = prependError "pushDataP:" $ OpPushData <$> dataLiteralP intrinsicP :: DSLParser Operation -intrinsicP = do +intrinsicP = prependError "intrinsicP:" $ do t <- anyToken case tTag t of T_INTRINSIC i -> result $ OpIntrinsic i _ -> flunk callP :: DSLParser Operation -callP = OpCall . tStr <$> tagP T_IDENTIFIER +callP = prependError "callP:" $ OpCall . tStr <$> tagP T_IDENTIFIER operationP :: DSLParser Operation -operationP = firstOf [ pushDataP - , intrinsicP - , callP - ] +operationP = prependError "operationP:" $ firstOf ops + where + ops = [ pushDataP + , intrinsicP + , callP + ] linearP :: DSLParser Block -linearP = BLinear <$> mult1 operationP +linearP = prependError "linearP:" $ BLinear <$> mult1 operationP ifP :: DSLParser Block -ifP = do +ifP = prependError "ifP:" $ do first <- (tagP T_IF *> bp <* tagP T_DO) `plus` bp elifs <- mult $ (tagP T_ELIF *> bp <* tagP T_DO) `plus` bp elze <- optionalMaybe $ tagP T_ELSE >> bp @@ -158,41 +168,31 @@ ifP = do elseToPair Nothing = ([BLinear [OpPushData $ StackBool True]], []) elseToPair (Just bs) = ([BLinear [OpPushData $ StackBool True]], bs) ---ifP = do --- c <- tagP T_IF *> mult blockP <* tagP T_DO --- b <- mult blockP <* tagP T_END --- return $ BIf c b - -{- -exactly 1: IF [blocks] DO [blocks] -0 or many: ELIF [blocks] DO [blocks] -0 or 1: ELSE [blocks] -exactly 1: END - -IF c1 DO b1 ELIF c2 DO b2 ELIF c3 DO b3 ELSE b4 --} - - whileP :: DSLParser Block -whileP = do +whileP = prependError "whileP:" $ do c <- tagP T_WHILE *> mult blockP <* tagP T_DO b <- mult blockP <* tagP T_END return $ BWhile c b +blockP :: DSLParser Block +blockP = prependError "blockP:" $ firstOf bs + where + bs = [ whileP + , ifP + , linearP + ] + procP :: DSLParser ProcSpec -procP = front `plus` back +procP = prependError "procP:" $ front `plus` back where front = tagP T_PROC >> tStr <$> tagP T_IDENTIFIER back = mult blockP <* tagP T_END -blockP :: DSLParser Block -blockP = firstOf [ whileP - , ifP - , linearP - ] +programP :: DSLParser [ProcSpec] +programP = prependError "programP:" $ phrase $ mult procP -stringToProgram :: String -> Maybe ProcTable -stringToProgram = (>>=buildProcTable . snd) . parse (tokenizer `chain` program) - where - program = phrase $ mult procP +stringToProgram :: String -> Either String ProcTable +stringToProgram s = case parse (tokenizer `chain` programP) s of + Left e -> Left $ nicePrintParserError e + Right (_, x) -> buildProcTable x diff --git a/DSL/StdLib.hs b/DSL/StdLib.hs index 360e139..09c5338 100644 --- a/DSL/StdLib.hs +++ b/DSL/StdLib.hs @@ -3,16 +3,17 @@ module DSL.StdLib (stdlib) where import Data.Map.Strict (fromList) import DSL.Types -import DSL.BaseParsers (parse, mult, chain) +import DSL.BaseParsers (parse, mult, chain, nicePrintParserError) import DSL.Parsing (tokenizer, procP) stdlib :: ProcTable stdlib = procs where p = tokenizer `chain` mult procP + m = "Failed to parse standard library:\n" procs = case parse p (unlines sources) of - Nothing -> error "Failed to parse standard library" - Just (_, ps) -> fromList ps + Right (_, ps) -> fromList ps + Left e -> error $ m ++ nicePrintParserError e sources :: [String] sources = [ div' diff --git a/DSL/Types.hs b/DSL/Types.hs index aa1fb22..bcbe7a6 100644 --- a/DSL/Types.hs +++ b/DSL/Types.hs @@ -1,6 +1,6 @@ module DSL.Types where -import Data.Map.Strict (Map(..)) +import Data.Map.Strict (Map) import DSL.BaseParsers (Parser(..)) diff --git a/DSL/Util.hs b/DSL/Util.hs index 75d38af..44d272c 100644 --- a/DSL/Util.hs +++ b/DSL/Util.hs @@ -28,20 +28,20 @@ runChecks fs s hcf :: Machine -> String -> IO Machine hcf m msg = putStrLn msg >> return m{ ok=False } -appendProcTable :: [ProcSpec] -> ProcTable -> Maybe ProcTable +appendProcTable :: [ProcSpec] -> ProcTable -> Either String ProcTable appendProcTable x y = go x where - go [] = Just y + go [] = Right y go ((n, bs):ps) = go ps >>= f where f acc - | member n acc = Nothing - | otherwise = Just $ insert n bs acc + | member n acc = Left $ "Duplicate PROC definition: " ++ n + | otherwise = Right $ insert n bs acc -mergeProcTables :: ProcTable -> ProcTable -> Maybe ProcTable +mergeProcTables :: ProcTable -> ProcTable -> Either String ProcTable mergeProcTables x = appendProcTable $ toList x -buildProcTable :: [ProcSpec] -> Maybe ProcTable +buildProcTable :: [ProcSpec] -> Either String ProcTable buildProcTable ps = appendProcTable ps empty unreachable :: a |
