1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
|
require "./spec_helper"
include Parcom
describe "Text surrounded by whitespace" do
ws_char = Parser(Char, Char).satisfy { |c| c.whitespace? }
normal_char = Parser(Char, Char).satisfy { |c| !c.whitespace? }
word = normal_char.some.map { |cs| cs.join }
ws = ws_char.some
words = ws.optional >> word.sep_by(ws) << ws.optional
good_strings = {
"test with no trailing whitespace",
" test with whitespace in the front",
"test with whitespace in the back",
" test surrounded by whitespace ",
}
good_strings.each do |s|
tokens = Tokens.from_string(s)
result = words.parse(tokens)
result.value.should eq(s.strip.split(/\s+/))
result.tokens.empty?.should be_true
end
bad_strings = {
"",
" \t \n ",
}
bad_strings.each do |s|
tokens = Tokens.from_string(s)
expect_raises(ParserFail) { words.parse(tokens) }
end
end
enum BFOpType
BFAdd
BFShift
BFByteIn
BFByteOut
BFLoopStart
BFLoopEnd
end
alias BFOp = {
type: BFOpType,
amount: Int32,
}
describe "brainfuck parser" do
# From https://esolangs.org/wiki/Brainfuck#Examples
hello_world_str = "1 +++++ +++ Set Cell #0 to 8
2 [
3 >++++ Add 4 to Cell #1; this will always set Cell #1 to 4
4 [ as the cell will be cleared by the loop
5 >++ Add 4*2 to Cell #2
6 >+++ Add 4*3 to Cell #3
7 >+++ Add 4*3 to Cell #4
8 >+ Add 4 to Cell #5
9 <<<<- Decrement the loop counter in Cell #1
10 ] Loop till Cell #1 is zero
11 >+ Add 1 to Cell #2
12 >+ Add 1 to Cell #3
13 >- Subtract 1 from Cell #4
14 >>+ Add 1 to Cell #6
15 [<] Move back to the first zero cell you find; this will
16 be Cell #1 which was cleared by the previous loop
17 <- Decrement the loop Counter in Cell #0
18 ] Loop till Cell #0 is zero
19
20 The result of this is:
21 Cell No : 0 1 2 3 4 5 6
22 Contents: 0 0 72 104 88 32 8
23 Pointer : ^
24
25 >>. Cell #2 has value 72 which is 'H'
26 >---. Subtract 3 from Cell #3 to get 101 which is 'e'
27 +++++ ++..+++. Likewise for 'llo' from Cell #3
28 >>. Cell #5 is 32 for the space
29 <-. Subtract 1 from Cell #4 for 87 to give a 'W'
30 <. Cell #3 was set to 'o' from the end of 'Hello'
31 +++.----- -.----- ---. Cell #3 for 'rl' and 'd'
32 >>+. Add 1 to Cell #5 gives us an exclamation point
33 >++. And finally a newline from Cell #6"
bf_chars = Set{'+', '-', '.', ',', '<', '>', '[', ']'}
other = Parser(Char, Char).satisfy { |c| !bf_chars.includes?(c) }
bf_char = Parser(Char, Char).satisfy { |c| bf_chars.includes?(c) }
char_body = bf_char.some.sep_by(other.some).map(&.flatten)
just_bf_chars = other.many >> char_body << other.many
loop_start = Parser(Char, Char).token('[').map do |_|
{type: BFOpType::BFLoopStart, amount: 0}
end
loop_end = Parser(Char, Char).token(']').map do |_|
{type: BFOpType::BFLoopEnd, amount: 0}
end
read_block = Parser(Char, Char).token(',').some.map do |cs|
{type: BFOpType::BFByteIn, amount: cs.size}
end
write_block = Parser(Char, Char).token('.').some.map do |cs|
{type: BFOpType::BFByteOut, amount: cs.size}
end
shift_block = (Parser.token('<') | Parser.token('>')).some.map do |cs|
left_count = cs.count { |c| c == '<' }
right_count = cs.size - left_count
{type: BFOpType::BFShift, amount: right_count - left_count}
end
add_block = (Parser.token('+') | Parser.token('-')).some.map do |cs|
minus_count = cs.count { |c| c == '-' }
plus_count = cs.size - minus_count
{type: BFOpType::BFAdd, amount: plus_count - minus_count}
end
bf_token = Parser.first_of([
loop_start,
loop_end,
read_block,
write_block,
shift_block,
add_block,
])
tokenizer = Parser(Char, Array(BFOp)).new("BF tokenizer") do |tokens|
result = just_bf_chars.parse(tokens)
puts result
chars = Tokens.new(result.value)
bf_token.some.parse(chars)
end
tokens = Tokens.from_string(hello_world_str)
result = tokenizer.parse(tokens)
puts result
# TODO: find a good way to verify this result
end
|