1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
|
require "./parcom/*"
module Parcom
VERSION = "0.2.0"
# A ParserFail exception should be raised by `Parser#parse` when
# a parse attempt is unsuccessful.
class ParserFail < Exception
end
# Provides a more convenient syntax for combining parsers via `Parser#and_then`.
# The first argument is a string literal used for the name of the parser.
# The second and third arguments are types used for the parser's type.
# These are followed by any number of 2-tuples containing a variable name and
# an expression resolving to a `Parser(t.Class, _)`, whose success value will
# be stored in the aformentioned variable. The `finally` named argument is an
# expression that resolves to a `Parser(t.class, u.class)`.
#
# Example:
# ```
# any_word = Parser(Char, Char).satisfy(&.letter?).some.map(&.join)
# ws = Parser(Char, Array(Char)).satisfy(&.whitespace?).many
# two_of_same_word = parser_chain "two words", Char, String,
# {word, any_word},
# {_, ws},
# finally: Parser.token_sequence(word.chars).map(&.join)
#
# tokens = Tokens.from_string("foo foo")
# result = two_of_same_word.parse(tokens)
# result.value # => "foo"
#
# # The above definition of `two_of_same word`
# # is an alternative way of doing this:
# two_of_same_word = any_word.and_then do |word|
# ws.and_then do |_|
# Parser.token_sequence(word.chars).map(&.join)
# end
# end.named("two words")
# ```
#
# This macro is based on Haskell's do-notation.
macro parser_chain(name, t, u, *steps, finally)
Parser({{t}}, {{u}}).new({{name}}) do |tokens|
{% for tup, index in steps %}
{{tup.last}}.and_then do |{{tup.first}}|
{% end %}
{{finally}}
{% for _, _ in steps %}
end
{% end %}
.parse(tokens)
end
end
# `Tokens` is an `Array` wrapper struct to store the input
# stream of one or more `Parser` objects.
# A `Tokens` can be created from any `Iterable`, along with
# `String` objects using a special constructor.
struct Tokens(T)
getter tokens
# Constructs a `Tokens(Char)` from a `String`.
def self.from_string(s : String) : Tokens(Char)
Tokens.new(s.chars)
end
# Constructs a `Tokens` from an `Iterable`.
def initialize(ts : Iterable(T))
if ts.responds_to?(:to_a)
@tokens = ts.to_a
else
@tokens = [] of T
ts.each { |t| @tokens << t }
end
end
# Exposes `Array#[](Int)`.
def [](index : Int) : T
@tokens[index]
end
# Exposes `Array#[](Int, Int)`, but wraps the returned array in a new `Tokens`.
def [](start : Int, count : Int) : Tokens(T)
Tokens.new(@tokens[start, count])
end
# Exposes `Array#[](Range)`, but wraps the returned array in a new `Tokens`.
def [](range : Range) : Tokens(T)
Tokens.new(@tokens[range])
end
# Like `#[]`, but returns `nil` instead of raising an `IndexError`.
def []?(*args)
self.[](*args)
rescue IndexError
nil
end
# Exposes `Array#empty?`.
def empty? : Bool
@tokens.empty?
end
end
# A `Result` stores a `Tokens` object and a parsed value,
# and is effectively used to store the state of a parser chain.
# This is used instead of a `Tuple` or `NamedTuple` because:
# 1. This is more idiomatic than a `Tuple`.
# 2. Crystal does not support generic named tuples.
struct Result(T, U)
getter tokens, value
def initialize(@tokens : Tokens(T), @value : U)
end
def map(f : U -> V) : Result(T, V) forall V
Result.new(@tokens, f.call(@value))
end
def map(&block : U -> V) : Result(T, V) forall V
map(block)
end
end
end
|