From acd14e4b4f722f7fe502bbf3aabab16d7d7df396 Mon Sep 17 00:00:00 2001 From: Matthew Hall Date: Sun, 19 Mar 2023 21:48:31 +1300 Subject: exactly, at_least, at_most, between --- spec/parcom_spec.cr | 210 ++++++++++++++++++++++++++++++++++++++++++++++++++- src/parcom/parser.cr | 128 ++++++++++++++++++++++++++++++- 2 files changed, 332 insertions(+), 6 deletions(-) diff --git a/spec/parcom_spec.cr b/spec/parcom_spec.cr index 749f7e0..aae1d1b 100644 --- a/spec/parcom_spec.cr +++ b/spec/parcom_spec.cr @@ -415,10 +415,212 @@ describe Parser do end end end - # TODO: exactly - # TODO: at_least - # TODO: at_most - # TODO: between + + describe "#exactly" do + a = Parser.token('a') + tokens = Tokens.from_string("aaabbb") + + it "fails to instantiate if `n` is negative" do + expect_raises(ArgumentError) { a * -3 } + end + + it "accepts parsing 0 times" do + p = a * 0 + result = p.parse(tokens) + + result.value.empty?.should be_true + result.tokens.should eq(tokens) + end + + it "tries to parse exactly n times" do + p = a * 3 + result = p.parse(tokens) + + result.value.should eq(['a', 'a', 'a']) + result.tokens.should eq(tokens[3..]) + end + + it "does not parse more than n times, even if it could" do + p = a * 1 + result = p.parse(tokens) + + result.value.should eq(['a']) + result.tokens.should eq(tokens[1..]) + end + + it "fails if unable to parse enough times" do + p = a * 5 + expect_raises(ParserFail) { p.parse(tokens) } + end + end + + describe "#at_least" do + a = Parser.token('a') + tokens = Tokens.from_string("aaabbb") + + it "fails to instantiate if `n` is negative" do + expect_raises(ArgumentError) { a.at_least(-3) } + end + + it "accepts parsing 0 times" do + p = a.at_least(0) + result = p.parse(tokens) + + result.value.should eq(['a', 'a', 'a']) + result.tokens.should eq(tokens[3..]) + end + + it "tries to parse at least n times" do + p = a.at_least(3) + result = p.parse(tokens) + + result.value.should eq(['a', 'a', 'a']) + result.tokens.should eq(tokens[3..]) + end + + it "parses as many times as possible (>=n)" do + p = a.at_least(1) + result = p.parse(tokens) + + result.value.should eq(['a', 'a', 'a']) + result.tokens.should eq(tokens[3..]) + end + + it "fails if unable to parse enough times" do + p = a.at_least(5) + expect_raises(ParserFail) { p.parse(tokens) } + end + end + + describe "#at_most" do + a = Parser.token('a') + tokens = Tokens.from_string("aaabbb") + + it "fails to instantiate if `n` is negative" do + expect_raises(ArgumentError) { a.at_most(-3) } + end + + it "accepts parsing 0 times" do + p = a.at_most(0) + result = p.parse(tokens) + + result.value.should eq([] of Char) + result.tokens.should eq(tokens) + end + + it "tries to parse at most n times" do + p = a.at_most(3) + result = p.parse(tokens) + + result.value.should eq(['a', 'a', 'a']) + result.tokens.should eq(tokens[3..]) + end + + it "stops if it succeeds `n` times" do + p = a.at_most(1) + result = p.parse(tokens) + + result.value.should eq(['a']) + result.tokens.should eq(tokens[1..]) + end + + it "stops if unable to parse `n` times" do + p = a.at_most(5) + result = p.parse(tokens) + + result.value.should eq(['a', 'a', 'a']) + result.tokens.should eq(tokens[3..]) + end + end + + describe "#between" do + a = Parser.token('a') + tokens = Tokens.from_string("aaabbb") + + it "fails to instantiate if `n` or `m` are negative" do + expect_raises(ArgumentError) { a.between(-1, 1) } + expect_raises(ArgumentError) { a.between(1, -1) } + expect_raises(ArgumentError) { a.between(-1, -1) } + end + + it "fails to instantiate if any range values are negative" do + {(-1..3), (-3..-1), (..-3), (-3..)}.each do |r| + expect_raises(ArgumentError) { a.between(r) } + end + end + + it "accepts either order for `n` and `m`" do + p1 = a.between(1,2) + r1 = p1.parse(tokens) + + p2 = a.between(2,1) + r2 = p1.parse(tokens) + + r1.should eq(r2) + end + + it "accepts `0` for `n` and `m`" do + p = a.between(0, 2) + result = p.parse(tokens) + + result.value.should eq(['a', 'a']) + result.tokens.should eq(tokens[2..]) + end + + it "accepts Range objects" do + p = a.between(1..2) + result = p.parse(tokens) + + result.value.should eq(['a', 'a']) + result.tokens.should eq(tokens[2..]) + end + + it "accepts beginless ranges" do + p = a.between(..2) + result = p.parse(tokens) + + result.value.should eq(['a', 'a']) + result.tokens.should eq(tokens[2..]) + end + + it "accepts endless ranges" do + p = a.between(2..) + result = p.parse(tokens) + + result.value.should eq(['a', 'a', 'a']) + result.tokens.should eq(tokens[3..]) + end + + it "tries to parse between `n` and `m` times" do + p = a.between(1, 2) + result = p.parse(tokens) + + result.value.should eq(['a', 'a']) + result.tokens.should eq(tokens[2..]) + end + + it "fails if it does not succeed the minimum number of times" do + p = a.between(5, 6) + expect_raises(ParserFail) { p.parse(tokens) } + end + + it "stops parsing if it fails after succeeding the minimum number of times" do + p = a.between(2, 4) + result = p.parse(tokens) + + result.value.should eq(['a', 'a', 'a']) + result.tokens.should eq(tokens[3..]) + end + + it "stops parsing if it succeeds the maximum number of times" do + p = a.between(1, 2) + result = p.parse(tokens) + + result.value.should eq(['a', 'a']) + result.tokens.should eq(tokens[2..]) + end + end + # TODO: first_of # TODO: sep_by # TODO: phrase diff --git a/src/parcom/parser.cr b/src/parcom/parser.cr index 8ee3f68..0b09e95 100644 --- a/src/parcom/parser.cr +++ b/src/parcom/parser.cr @@ -259,11 +259,135 @@ module Parcom end end - # Creates a new parser from `self` that parses with `self` as many times - # as possible. Raises `ParserFail` it never succeeds. + # Creates a new parser from `self` that repeatedly parses with `self` + # at least once. Raises `ParserFail` it never succeeds. def some : Parser(T, Array(U)) many.assert { |arr| !arr.empty? }.named("Some: #{@name}") end + + # Creates a new parser from `self` that tries to parse with `self` + # exactly `n` times. If `self` ever fails, the whole parser fails. + # Raises `ArgumentError` if `n` is negative, but will accept `0`. + def exactly(n : Int) : Parser(T, Array(U)) + if n.negative? + raise ArgumentError.new("`n` must be non-negative (got #{n})") + end + + p = self + Parser(T, Array(U)).new("#Exactly #{n} <#{@name}>") do |tokens| + values = [] of U + n.times do + result = p.parse(tokens) + values << result.value + tokens = result.tokens + end + Result.new(tokens, values) + end + end + + # :ditto: + def *(n : Int) : Parser(T, Array(U)) + exactly(n) + end + + # Creates a new parser from `self` that tries to parse with `self` + # at least `n` times. If `self` fails before it is able to parse `n` times, + # the whole parser fails. If it is able to parse `n` times, it will continue + # to parse until `self` fails. + # Raises `ArgumentError` if `n` is negative, but will accept `0`. + # TODO: Maybe introduce a `>=` shorthand? + def at_least(n : Int) : Parser(T, Array(U)) + if n.negative? + raise ArgumentError.new("`n` must be non-negative (got #{n})") + end + + (exactly(n) + many).map do |t| + t.first + t.last + end.named("At least #{n} #{@name}") + end + + # Creates a new parser from `self` that tries to parse with `self` + # at most `n` times. If `self` succeeds `n` times, parsing will stop + # and the accumulated results will be returned. It will not fail the + # chain if `self` fails, but will return all previous results. + # Raises `ArgumentError` if `n` is negative, but will accept `0`. + # TODO: Maybe introduce a `<=` shorthand? + # TODO: could be made faster by not depending on `#exactly` + def at_most(n : Int) : Parser(T, Array(U)) + if n.negative? + raise ArgumentError.new("`n` must be non-negative (got #{n})") + end + + optional.exactly(n).map(&.compact).named("At least #{n} #{@name}") + end + + # Creates a new parser from `self` that tries to parse with `self` + # between `n` and `m` times (inclusive). The order that `n` and `m` + # are specified does not matter, this method will figure out the + # lower and greater values. If `self` does not succeed the minimum + # number of times, the whole parser will fail. If `self` succeeds + # the maximum number of times, or if `self` fails after succeeding + # the minimum number of times, parsing stops and the success values + # are returned. + # Raises `ArgumentError` if `n` or `m` is negative, but will accept `0`. + # TODO: Add support for `Range` objects + def between(n : Int, m : Int) : Parser(T, Array(U)) + if n.negative? + raise ArgumentError.new("`n` must be non-negative (got #{n})") + end + + if m.negative? + raise ArgumentError.new("`m` must be non-negative (got #{m})") + end + + if n == m + return exactly(n).named("Between #{n}-#{n} <#{@name}>") + end + + min = n < m ? n : m + extra = (n - m).abs + + p = self + (exactly(min) + at_most(extra)).map do |t| + t.first + t.last + end.named("Between #{min}-#{extra} <#{@name}>") + end + + # Creates a new parser from `self` that tries to parse with `self` + # a number of times within the given range. If `self` does not succeed + # the minimum number of times, the whole parser will fail. If `self` + # succeeds the maximum number of times, or if `self` fails after + # succeeding the minimum number of times, parsing stops and the + # success values are returned. + # Raises `ArgumentError` if the range includes negative numbers, + # but will accept `0`. + def between(r : Range(Int, Int)) : Parser(T, Array(U)) + if r.begin.negative? || r.end.negative? + raise ArgumentError.new("All values in Range must be non-negative (got #{r})") + end + + between(r.begin, r.end) + end + + # Special overload of `#between` for beginless ranges. + # Raises `ArgumentError` if the range end is negative. + def between(r : Range(Nil, Int)) : Parser(T, Array(U)) + if r.end.negative? + raise ArgumentError.new("End of beginless range must be non-negative (got #{r})") + end + + at_most(r.end).named("Between 0-#{r.end} <#{@name}>") + end + + # Special overload of `#between` for endless ranges. + # Raises `ArgumentError` if the range start is negative. + def between(r : Range(Int, Nil)) : Parser(T, Array(U)) + if r.begin.negative? + raise ArgumentError.new("Start of endless range must be non-negative (got #{r})") + end + + at_least(r.begin).named("Between #{r.begin}-infinity <#{@name}> (from inifnite range)") + end end end -- cgit v1.2.1