summaryrefslogtreecommitdiff
path: root/DSL/Parsing.hs
blob: 6dcc4bf6801adaad92628ae37c5ce5b4b7aa8897 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
module DSL.Parsing where

import Data.Char (isDigit, isSpace)

import DSL.Types
import DSL.BaseParsers

buildDSLLexer :: Parser Char [Char] -> TokenTag -> DSLLexer
buildDSLLexer p t = do
  str <- p
  return Token { tStr=str, tTag=t }

fromStringL :: String -> TokenTag -> DSLLexer
fromStringL s t = buildDSLLexer (list s) t <* peek (() <$ satisfy isSpace `alt` eof)

fromTableL :: [(String, TokenTag)] -> DSLLexer
fromTableL table = firstOf $ map (uncurry fromStringL) table

wsL :: DSLLexer
wsL = buildDSLLexer (mult1 $ satisfy isSpace) T_WHITESPACE

commentL :: DSLLexer
commentL = buildDSLLexer go T_COMMENT
  where
    delim = list "'''"
    go = delim *> (fmap fst $ stopAfter delim)

keywordL :: DSLLexer
keywordL = fromTableL [ ("PROC",  T_PROC)
                      , ("IF",    T_IF)
                      , ("ELSE",  T_ELSE)
                      , ("WHILE", T_WHILE)
                      , ("DO",    T_DO)
                      , ("END",   T_END)
                      ]

intLiteralL :: DSLLexer
intLiteralL = buildDSLLexer go T_INT_LITERAL
  where
    go = do
      sign   <- optionalMaybe $ token '-'
      digits <- mult1 $ satisfy isDigit
      result $ maybe digits (:digits) sign

boolLiteralL :: DSLLexer
boolLiteralL = f "true" `alt` f "false"
  where
    f s = fromStringL s T_BOOL_LITERAL

stringLiteralL :: DSLLexer
stringLiteralL = buildDSLLexer go T_STRING_LITERAL
  where
    go = token '"' *> strChars <* token '"'
    strChars = concat <$> mult strChar
    strChar = list ['\\', '"'] `alt` (pure <$> satisfy (/='"'))

literalL :: DSLLexer
literalL = firstOf [ intLiteralL
                   , boolLiteralL
                   , stringLiteralL
                   ]

identifierL :: DSLLexer
identifierL = buildDSLLexer go T_IDENTIFIER
  where
    go = mult1 $ satisfy $ not . isSpace

intrinsicL :: DSLLexer
intrinsicL = fromTableL [ (".",    T_INTRINSIC I_DUMP)
                        , ("DROP", T_INTRINSIC I_DROP)
                        , ("SWAP", T_INTRINSIC I_SWAP)
                        , ("DUP",  T_INTRINSIC I_DUP)
                        , ("OVER", T_INTRINSIC I_OVER)
                        , ("ROT",  T_INTRINSIC I_ROT)
                        , ("+",    T_INTRINSIC I_PLUS)
                        , ("-",    T_INTRINSIC I_MINUS)
                        , ("*",    T_INTRINSIC I_TIMES)
                        , ("/%",   T_INTRINSIC I_DIVMOD)
                        , ("!",    T_INTRINSIC I_NOT)
                        , ("&&",   T_INTRINSIC I_AND)
                        , ("||",   T_INTRINSIC I_OR)
                        , ("^",    T_INTRINSIC I_XOR)
                        , ("++",   T_INTRINSIC I_STRCAT)
                        , ("==",   T_INTRINSIC I_EQUAL)
                        , ("<",    T_INTRINSIC I_LESSTHAN)
                        , (">",    T_INTRINSIC I_GREATERTHAN)
                        ]

lexemeL :: DSLLexer
lexemeL = firstOf [ commentL
                  , keywordL
                  , literalL
                  , intrinsicL
                  , identifierL
                  ]

tokenizer :: Parser Char [Token]
tokenizer = filter f <$> go
  where
    go = optional wsL *> lexemeL `sepBy` wsL <* optional wsL
    f Token { tTag=t } = t /= T_COMMENT

------------------------------------------------------------------------------
-- Parsing
------------------------------------------------------------------------------

tagP :: TokenTag -> DSLParser Token
tagP t = satisfy $ (==t) . tTag

wsP :: DSLParser ()
wsP = () <$ tagP T_WHITESPACE

dataLiteralP :: DSLParser StackData
dataLiteralP = do
  Token { tTag=t, tStr=s } <- anyToken
  case (t, s) of
    (T_BOOL_LITERAL, "true")  -> result $ StackBool True
    (T_BOOL_LITERAL, "false") -> result $ StackBool False
    (T_INT_LITERAL, _)        -> result $ StackInt $ read s
    (T_STRING_LITERAL, _)     -> result $ StackString s
    _                         -> flunk

pushDataP :: DSLParser Operation
pushDataP = OpPushData <$> dataLiteralP

intrinsicP :: DSLParser Operation
intrinsicP = do
  t <- anyToken
  case tTag t of
    T_INTRINSIC i -> result $ OpIntrinsic i
    _             -> flunk

callP :: DSLParser Operation
callP = OpCall . tStr <$> tagP T_IDENTIFIER

operationP :: DSLParser Operation
operationP = firstOf [ pushDataP
                     , intrinsicP
                     , callP
                     ]

linearP :: DSLParser Block
linearP = BLinear <$> mult1 operationP

ifP :: DSLParser Block
ifP = do
  c <- tagP T_IF *> mult blockP <* tagP T_DO
  b <- mult blockP <* tagP T_END
  return $ BIf c b

ifElseP :: DSLParser Block
ifElseP = do
  c <- tagP T_IF *> mult blockP <* tagP T_DO
  b1 <- mult blockP <* tagP T_ELSE
  b2 <- mult blockP <* tagP T_END
  return $ BIfElse c b1 b2

whileP :: DSLParser Block
whileP = do
  c <- tagP T_WHILE *> mult blockP <* tagP T_DO
  b <- mult blockP <* tagP T_END
  return $ BWhile c b

procP :: DSLParser ProcSpec
procP = front `plus` back
  where
    front = tagP T_PROC >> tStr <$> tagP T_IDENTIFIER
    back  = mult blockP <* tagP T_END

blockP :: DSLParser Block
blockP = firstOf [ whileP
                 , ifElseP
                 , ifP
                 , linearP
                 ]

programP :: DSLParser ([ProcSpec], Program)
programP = phrase $ procs `plus` code
  where
    procs = mult procP
    code  = mult1 blockP

stringToProgram :: String -> Maybe ([ProcSpec], Program)
stringToProgram = fmap snd . parse (tokenizer `chain` programP)