diff options
| author | Matthew Hall <hallmatthew314@gmail.com> | 2023-03-30 21:55:10 +1300 |
|---|---|---|
| committer | Matthew Hall <hallmatthew314@gmail.com> | 2023-03-30 21:55:10 +1300 |
| commit | e5796552a7f0e8a3ff6de2c0a97d61dc22c0466b (patch) | |
| tree | 39f3364590de593d95e2510db1f0dd37bc48ee37 | |
| parent | f4a7dcce562ff672651f36306423d37eaaa01fed (diff) | |
between no longer allows min/max values in either order + documentation and minor refactors
| -rw-r--r-- | src/parcom/parser.cr | 181 | ||||
| -rw-r--r-- | src/parcom/result.cr | 7 | ||||
| -rw-r--r-- | src/parcom/tokens.cr | 4 |
3 files changed, 87 insertions, 105 deletions
diff --git a/src/parcom/parser.cr b/src/parcom/parser.cr index af486c4..e7442db 100644 --- a/src/parcom/parser.cr +++ b/src/parcom/parser.cr @@ -43,7 +43,7 @@ module Parcom # Creates a parser that returns the first token in the input stream. # Fails if the input stream is empty. - # Analagous to a `.` in a regular expression. + # Analagous to a '.' in a regular expression. def self.any_token : Parser(T, T) forall T Parser(T, T).new("Any Token") do |tokens| if tokens.empty? @@ -56,7 +56,7 @@ module Parcom # Creates a parser that succeeds with `nil` if the input stream is empty. # Fails if the input stream is not empty. - # Analagous to a `$` in a regular expression. + # Analagous to a '$' in a regular expression. def self.eof : Parser(T, Nil) Parser(T, Nil).new("EOF") do |tokens| if tokens.empty? @@ -80,10 +80,9 @@ module Parcom satisfy(block) end - # Creates a parser that parses the fist token in the input stream - # if that token matches the provided token. + # Creates a parser that parses an instance of the given token. def self.token(token : T) : Parser(T, T) - Parser(T, T).satisfy { |x| x == token }.named("Token <#{token}>") + Parser(T, T).satisfy(&.==(token)).named("Token <#{token}>") end # Creates a parser from an array of parsers that parses with @@ -136,13 +135,12 @@ module Parcom end end - # Creates a new parser from a `Proc`. - # The `Proc` should have the properties outlined above. + # Creates a new parser from a function with + # the properties outlined in the overview. def initialize(@name : String, @f : Tokens(T) -> Result(T, U)) end - # Creates a new parser from a block. - # The block should have the properties outline above. + # :ditto: def initialize(@name : String, &block : Tokens(T) -> Result(T, U)) @f = block end @@ -150,35 +148,31 @@ module Parcom # Changes the `name` property and returns `self`. # This should be used to specify a custom name: # ``` - # a = Basic.token(Char, 'a').named("letter a") + # a = Parser.token('a').named("letter a") # ``` def named(name : String) : self @name = name self end - # Tries to parse some kind of data from the given input stream. - # This method calls the `Proc` or block this parser object was - # initialized with. + # Calls this parser's parsing function with the given input stream. def parse(tokens : Tokens(T)) : Result(T, U) @f.call(tokens) end - # Same as `#parse(Tokens(T)) : Result(T, U)`, but returns `nil` - # instead of raising an exception if parsing fails. + # Same as `#parse`, but returns `nil` instead of + # raising an exception if parsing fails. def parse?(tokens : Tokens(T)) : Result(T, U)? parse(tokens) - rescue + rescue ParserFail nil end - # Creates a new parser that is the same as the parser object it is - # called from, but tests the result against a given predicate. - # If the value does not pass the test, the parser fails. + # Creates a new parser from `self` that behaves the same, but fails + # if the result does not satisfy the given predicate. def assert(f : U -> Bool) : Parser(T, U) - p = self - Parser(T, U).new("#{p.name} (assertion)") do |tokens| - result = p.parse(tokens) + Parser(T, U).new("#{@name} (assertion)") do |tokens| + result = self.parse(tokens) unless f.call(result.value) raise ParserFail.new("Assertion failed for value #{result.value}") end @@ -191,14 +185,12 @@ module Parcom assert(block) end - # Creates a new parser that is the same as the parser object it is - # called from, but transforms the result into something else via a - # given function. - # The function in question should not introduce side effects. + # Creates a new parser from `self`, but transforms the result + # into something else via the given function. + # NOTE: Functions given to `#map` should not introduce side effects. def map(f : U -> V) : Parser(T, V) forall V - p = self - Parser(T, V).new("#{p.name} (mapped)") do |tokens| - p.parse(tokens).map(f) + Parser(T, V).new("#{@name} (mapped)") do |tokens| + self.parse(tokens).map(f) end end @@ -223,31 +215,29 @@ module Parcom # try to parse with either of them. If the first parser succeeds, # it will return the result of the first parser. Otherwise, it will # return the result of the second parser. - def |(p2 : Parser(T, U)) : Parser(T, U) - p1 = self - Parser(T, U).new("#{p1.name} | #{p2.name}") do |tokens| - p1.parse(tokens) + def |(other : Parser(T, U)) : Parser(T, U) + Parser(T, U).new("#{@name} | #{other.name}") do |tokens| + self.parse(tokens) rescue ParserFail - p2.parse(tokens) + other.parse(tokens) end end - # Creates a new parser that is the same as the parser object it is - # called from, but it will return a default value without consuming - # any input instead of failing. + # Creates a new parser from `self` that will return a + # default value without consuming any input instead of + # raising a `ParserFail`. def recover(default : U) : Parser(T, U) - r = Parser(T, U).pure(default) - (self | r).named("#{@name} (recover <#{default}>)") + backup = Parser(T, U).pure(default) + (self | backup).named("#{@name} (recover <#{default}>)") end - # Creates a new parser that is the same as the parser object it is - # called from, but it will return `nil` without consuming any input + # Creates a new parser from `self` that returns `nil` + # without consuming any input # instead of returning. def optional : Parser(T, U?) - p = self - Parser(T, U?).new("#{p.name} (optional)") do |tokens| + Parser(T, U?).new("#{@name} (optional)") do |tokens| # We have to do it this way due to how type unions work. - r = p.parse?(tokens) + r = parse?(tokens) new_tokens = r.nil? ? tokens : r.tokens new_value = r.nil? ? nil : r.value Result.new(new_tokens, new_value) @@ -257,25 +247,22 @@ module Parcom # Creates a new parser from `self` and another parser that will # try to parse with both parsers and return both results. If either # sub-parser fails, the whole parser fails. - def +(p2 : Parser(T, V)) : Parser(T, {U, V}) forall V - p1 = self - Parser(T, {U, V}).new("#{p1.name} + #{p2.name}") do |tokens| - r1 = p1.parse(tokens) - r2 = p2.parse(r1.tokens) + def +(other : Parser(T, V)) : Parser(T, {U, V}) forall V + Parser(T, {U, V}).new("#{@name} + #{other.name}") do |tokens| + r1 = self.parse(tokens) + r2 = other.parse(r1.tokens) Result.new(r2.tokens, {r1.value, r2.value}) end end # Same as `#+`, but discards the second parser's result. - def <<(p2 : Parser(T, _)) : Parser(T, U) - p1 = self - (p1 + p2).map(&.first).named("#{p1.name} << #{p2.name}") + def <<(other : Parser(T, _)) : Parser(T, U) + (self + other).map(&.first).named("#{@name} << #{other.name}") end # Same as `#+`, but discards the first parser's result. - def >>(p2 : Parser(T, V)) : Parser(T, V) forall V - p1 = self - (p1 + p2).map(&.last).named("#{p1.name} >> #{p2.name}") + def >>(other : Parser(T, V)) : Parser(T, V) forall V + (self + other).map(&.last).named("#{@name} >> #{other.name}") end # Creates a new parser from `self` and a function based on @@ -292,9 +279,8 @@ module Parcom # two_words = (p_any_word << space).and_then(p_string) # ``` def and_then(f : U -> Parser(T, V)) : Parser(T, V) forall V - p = self - Parser(T, V).new("#{p.name} and_then..") do |tokens| - r = p.parse(tokens) + Parser(T, V).new("#{@name} and_then..") do |tokens| + r = self.parse(tokens) f.call(r.value).parse(r.tokens) end end @@ -307,15 +293,14 @@ module Parcom # Creates a new parser from `self` that parses with `self` as many times # as possible. Returns an empty list if it never succeeds. def many : Parser(T, Array(U)) - p = self - Parser(T, Array(U)).new("Many: #{p.name}") do |tokens| + Parser(T, Array(U)).new("Many: #{@name}") do |tokens| values = [] of U - r = p.parse?(tokens) + r = self.parse?(tokens) until r.nil? break unless tokens != r.tokens tokens = r.tokens values << r.value - r = p.parse?(tokens) + r = self.parse?(tokens) end Result.new(tokens, values) end @@ -335,11 +320,10 @@ module Parcom raise ArgumentError.new("`n` must be non-negative (got #{n})") end - p = self Parser(T, Array(U)).new("#Exactly #{n} <#{@name}>") do |tokens| values = [] of U n.times do - result = p.parse(tokens) + result = self.parse(tokens) values << result.value tokens = result.tokens end @@ -383,71 +367,64 @@ module Parcom optional.exactly(n).map(&.compact).named("At least #{n} #{@name}") end - # Creates a new parser from `self` that tries to parse with `self` - # between `n` and `m` times (inclusive). The order that `n` and `m` - # are specified does not matter, this method will figure out the - # lower and greater values. If `self` does not succeed the minimum + # Creates a new parser that tries to parse with `self` a number + # of times within a given range. If `self` does not succeed the minimum # number of times, the whole parser will fail. If `self` succeeds # the maximum number of times, or if `self` fails after succeeding # the minimum number of times, parsing stops and the success values - # are returned. - # Raises `ArgumentError` if `n` or `m` is negative, but will accept `0`. - # TODO: Add support for `Range` objects - def between(n : Int, m : Int) : Parser(T, Array(U)) - if n.negative? - raise ArgumentError.new("`n` must be non-negative (got #{n})") + # are returned. Raises `ArgumentError` if the minimum or maximum values + # are negative, but will accept zero. + def between(minimum : Int, maximum : Int) : Parser(T, Array(U)) + if minimum.negative? + raise ArgumentError.new("`minimum` must be non-negative (got #{minimum})") end - if m.negative? - raise ArgumentError.new("`m` must be non-negative (got #{m})") + if maximum.negative? + raise ArgumentError.new("`maximum` must be non-negative (got #{maximum})") end - if n == m - return exactly(n).named("Between #{n}-#{n} <#{@name}>") + if minimum > maximum + raise ArgumentError.new("`minimum` value was greater than `maximum` value. You may have them in the wrong order.") end - min = n < m ? n : m - extra = (n - m).abs + if minimum == maximum + return exactly(minimum).named("Between #{minimum}-#{minimum} <#{@name}>") + end + + extra = maximum - minimum - (exactly(min) + at_most(extra)).map do |t| + (exactly(minimum) + at_most(extra)).map do |t| t.first + t.last - end.named("Between #{min}-#{extra} <#{@name}>") + end.named("Between #{minimum}-#{maximum} <#{@name}>") end - # Creates a new parser from `self` that tries to parse with `self` - # a number of times within the given range. If `self` does not succeed - # the minimum number of times, the whole parser will fail. If `self` - # succeeds the maximum number of times, or if `self` fails after - # succeeding the minimum number of times, parsing stops and the - # success values are returned. - # Raises `ArgumentError` if the range includes negative numbers, - # but will accept `0`. - def between(r : Range(Int, Int)) : Parser(T, Array(U)) - if r.begin.negative? || r.end.negative? - raise ArgumentError.new("All values in Range must be non-negative (got #{r})") + # :ditto: + def between(range : Range(Int, Int)) : Parser(T, Array(U)) + if range.begin.negative? || range.end.negative? + raise ArgumentError.new("All values in Range must be non-negative (got #{range})") end - between(r.begin, r.end) + between(range.begin, range.end) end # Special overload of `#between` for beginless ranges. # Raises `ArgumentError` if the range end is negative. - def between(r : Range(Nil, Int)) : Parser(T, Array(U)) - if r.end.negative? - raise ArgumentError.new("End of beginless range must be non-negative (got #{r})") + def between(range : Range(Nil, Int)) : Parser(T, Array(U)) + if range.end.negative? + raise ArgumentError.new("End of beginless range must be non-negative (got #{range})") end - at_most(r.end).named("Between 0-#{r.end} <#{@name}>") + at_most(range.end).named("Between 0-#{range.end} <#{@name}>") end # Special overload of `#between` for endless ranges. # Raises `ArgumentError` if the range start is negative. - def between(r : Range(Int, Nil)) : Parser(T, Array(U)) - if r.begin.negative? - raise ArgumentError.new("Start of endless range must be non-negative (got #{r})") + def between(range : Range(Int, Nil)) : Parser(T, Array(U)) + if range.begin.negative? + raise ArgumentError.new("Start of endless range must be non-negative (got #{range})") end - at_least(r.begin).named("Between #{r.begin}-infinity <#{@name}> (from inifnite range)") + at_least(range.begin).named("Between #{range.begin}-infinity <#{@name}> (from inifnite range)") end # Creates a new parser from `self` and another parser that tries to diff --git a/src/parcom/result.cr b/src/parcom/result.cr index 4a21a16..f832cd9 100644 --- a/src/parcom/result.cr +++ b/src/parcom/result.cr @@ -7,15 +7,20 @@ module Parcom # 1. This is more idiomatic than a `Tuple`. # 2. Crystal does not support generic named tuples. struct Result(T, U) - getter tokens, value + getter tokens : Tokens(T), value : U def initialize(@tokens : Tokens(T), @value : U) end + # Creates a new `Result` object with the same tokens, but with + # a new value generated by giving the current value to a proc. + # + # See also: `Parser#map`. def map(f : U -> V) : Result(T, V) forall V Result.new(@tokens, f.call(@value)) end + # :ditto: def map(&block : U -> V) : Result(T, V) forall V map(block) end diff --git a/src/parcom/tokens.cr b/src/parcom/tokens.cr index 21c0e10..ee6bdcd 100644 --- a/src/parcom/tokens.cr +++ b/src/parcom/tokens.cr @@ -2,11 +2,11 @@ require "../parcom.cr" module Parcom # `Tokens` is an `Array` wrapper struct to store the input - # stream of one or more `Parser` objects. + # stream of a `Parser` object. # A `Tokens` can be created from any `Iterable`, along with # `String` objects using a special constructor. struct Tokens(T) - getter tokens + getter tokens : Array(T) # Constructs a `Tokens(Char)` from a `String`. def self.from_string(s : String) : Tokens(Char) |
