From 467660e024bad8e2e084aa703686d0856a7e88b9 Mon Sep 17 00:00:00 2001 From: Matthew Hall Date: Sat, 18 Mar 2023 21:44:26 +1300 Subject: Sequence, +, and variants --- spec/parcom_spec.cr | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++ src/parcom/parser.cr | 56 ++++++++++++++++++++++++++++++- 2 files changed, 149 insertions(+), 1 deletion(-) diff --git a/spec/parcom_spec.cr b/spec/parcom_spec.cr index 6acd1b2..86ad93a 100644 --- a/spec/parcom_spec.cr +++ b/spec/parcom_spec.cr @@ -170,6 +170,52 @@ describe Parser do end end + describe "self.sequence" do + it "always succeeds with 0 parsers" do + p = Parser(Char, Char).sequence([] of Parser(Char, Char)) + tokens = Tokens.from_string("") + result = p.parse(tokens) + + result.value.empty?.should be_true + result.tokens.should eq(tokens) + end + + p = Parser(Char, Char).sequence([ + Parser(Char, Char).token('a'), + Parser(Char, Char).token('b'), + Parser(Char, Char).token('c'), + ]) + + it "runs each parser in sequence" do + tokens = Tokens.from_string("abcd") + result = p.parse(tokens) + + result.value.should eq("abc".chars) + result.tokens.should eq(tokens[3..]) + end + + it "fails if any of the parsers fail" do + "xbc axc abx".split.each do |s| + tokens = Tokens.from_string(s) + expect_raises(ParserFail) { p.parse(tokens) } + end + end + end + + # most testing should be able to be skipped, since it is already + # done for `Parser.sequence` + describe "self.token_sequence" do + p = Parser(Char, Char).token_sequence("abc".chars) + + it "parses the specified tokens in sequence" do + tokens = Tokens.from_string("abcd") + result = p.parse(tokens) + + result.value.should eq("abc".chars) + result.tokens.should eq(tokens[3..]) + end + end + describe "#assert" do p = Parser(Char, Char).any_token.assert { |c| c == 'a' } @@ -284,7 +330,55 @@ describe Parser do it "does not modify the input when recovering" do result2.tokens.should eq(tokens2) end + end + + describe "#+" do + a = Parser(Char, Char).token('a') + b = Parser(Char, Char).token('b') + p = a + b + + it "combines both success results if both parsers succeed" do + tokens = Tokens.from_string("abcd") + result = p.parse(tokens) + + result.value.should eq({'a', 'b'}) + result.tokens.should eq(tokens[2..]) + end + + it "fails if either parser fails" do + "aacd bbcd cccd".split.each do |s| + tokens = Tokens.from_string(s) + expect_raises(ParserFail) { p.parse(tokens) } + end + end + end + + # Should be able to skip some tests because they are already + # written for #+, which this is based on. + describe "#<<" do + a = Parser(Char, Char).token('a') + b = Parser(Char, Char).token('b') + p = a << b + tokens = Tokens.from_string("abcd") + result = p.parse(tokens) + + it "discards the second parser's value" do + result.value.should eq('a') + end + end + # Should be able to skip some tests because they are already + # written for #+, which this is based on. + describe "#>>" do + a = Parser(Char, Char).token('a') + b = Parser(Char, Char).token('b') + p = a >> b + tokens = Tokens.from_string("abcd") + result = p.parse(tokens) + + it "discards the second parser's value" do + result.value.should eq('b') + end end end diff --git a/src/parcom/parser.cr b/src/parcom/parser.cr index b78ab77..fcfe292 100644 --- a/src/parcom/parser.cr +++ b/src/parcom/parser.cr @@ -86,6 +86,32 @@ module Parcom Parser(T, T).satisfy { |x| x == token }.named("Token <#{token}>") end + # Creates a parser from an array of parsers that parses with + # each of them in sequence. The results of all parsers are returned + # in an array. If any of the parsers fail, the whole parser fails. + # TODO: allow support for Iterable(Parser(T, U)) + def self.sequence(ps : Array(Parser(T, U))) : Parser(T, Array(U)) + Parser(T, Array(U)).new("Sequence: #{ps.map(&.name)}") do |tokens| + values = [] of U + ps.each do |p| + result = p.parse(tokens) + values << result.value + tokens = result.tokens + end + Result.new(tokens, values) + end + end + + # Creates a parser from an array of `T` that tries to parser + # each member of the array in sequence. An identical array is + # returned on success. If any of the tokens are absent, the + # whole parser fails. + # TODO: allow support for Iterable(T) + def self.token_sequence(ts : Array(T)) : Parser(T, Array(T)) + ps = ts.map{ |t| Parser(T, T).token(t) } + Parser(T, T).sequence(ps).named("Token Sequence: #{ts}") + end + # Creates a new parser from a `Proc`. # The `Proc` should have the properties outlined above. def initialize(@name : String, @f : Tokens(T) -> Result(T, U)) @@ -157,9 +183,13 @@ module Parcom map(block) end + # Creates a new parser from `self` and another parser that will + # try to parse with either of them. If the first parser succeeds, + # it will return the result of the first parser. Otherwise, it will + # return the result of the second parser. def |(p2 : Parser(T, U)) : Parser(T, U) p1 = self - Parser(T, U).new("#{p1.name} or #{p2.name}") do |tokens| + Parser(T, U).new("#{p1.name} | #{p2.name}") do |tokens| p1.parse(tokens) rescue ParserFail p2.parse(tokens) @@ -187,6 +217,30 @@ module Parcom Result.new(new_tokens, new_value) end end + + # Creates a new parser from `self` and another parser that will + # try to parse with both parsers and return both results. If either + # sub-parser fails, the whole parser fails. + def +(p2 : Parser(T, V)) : Parser(T, {U, V}) forall V + p1 = self + Parser(T, {U, V}).new("#{p1.name} + #{p2.name}") do |tokens| + r1 = p1.parse(tokens) + r2 = p2.parse(r1.tokens) + Result.new(r2.tokens, {r1.value, r2.value}) + end + end + + # Same as `#+`, but discards the second parser's result. + def <<(p2 : Parser(T, V)) : Parser(T, U) forall V + p1 = self + (p1 + p2).map(&.first).named("#{p1.name} << #{p2.name}") + end + + # Same as `#+`, but discards the first parser's result. + def >>(p2 : Parser(T, V)) : Parser(T, V) forall V + p1 = self + (p1 + p2).map(&.last).named("#{p1.name} >> #{p2.name}") + end end end -- cgit v1.2.1