From 467660e024bad8e2e084aa703686d0856a7e88b9 Mon Sep 17 00:00:00 2001
From: Matthew Hall <hallmatthew314@gmail.com>
Date: Sat, 18 Mar 2023 21:44:26 +1300
Subject: Sequence, +, and variants

---
 spec/parcom_spec.cr  | 94 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 src/parcom/parser.cr | 56 ++++++++++++++++++++++++++++++-
 2 files changed, 149 insertions(+), 1 deletion(-)

diff --git a/spec/parcom_spec.cr b/spec/parcom_spec.cr
index 6acd1b2..86ad93a 100644
--- a/spec/parcom_spec.cr
+++ b/spec/parcom_spec.cr
@@ -170,6 +170,52 @@ describe Parser do
     end
   end
 
+  describe "self.sequence" do
+    it "always succeeds with 0 parsers" do
+      p = Parser(Char, Char).sequence([] of Parser(Char, Char))
+      tokens = Tokens.from_string("")
+      result = p.parse(tokens)
+
+      result.value.empty?.should be_true
+      result.tokens.should eq(tokens)
+    end
+
+    p = Parser(Char, Char).sequence([
+      Parser(Char, Char).token('a'),
+      Parser(Char, Char).token('b'),
+      Parser(Char, Char).token('c'),
+    ])
+
+    it "runs each parser in sequence" do
+      tokens = Tokens.from_string("abcd")
+      result = p.parse(tokens)
+
+      result.value.should eq("abc".chars)
+      result.tokens.should eq(tokens[3..])
+    end
+
+    it "fails if any of the parsers fail" do
+      "xbc axc abx".split.each do |s|
+        tokens = Tokens.from_string(s)
+        expect_raises(ParserFail) { p.parse(tokens) }
+      end
+    end
+  end
+
+  # most testing should be able to be skipped, since it is already
+  # done for `Parser.sequence`
+  describe "self.token_sequence" do
+    p = Parser(Char, Char).token_sequence("abc".chars)
+
+    it "parses the specified tokens in sequence" do
+      tokens = Tokens.from_string("abcd")
+      result = p.parse(tokens)
+
+      result.value.should eq("abc".chars)
+      result.tokens.should eq(tokens[3..])
+    end
+  end
+
   describe "#assert" do
     p = Parser(Char, Char).any_token.assert { |c| c == 'a' }
 
@@ -284,7 +330,55 @@ describe Parser do
     it "does not modify the input when recovering" do
       result2.tokens.should eq(tokens2)
     end
+  end
+
+  describe "#+" do
+    a = Parser(Char, Char).token('a')
+    b = Parser(Char, Char).token('b')
+    p = a + b
+
+    it "combines both success results if both parsers succeed" do
+      tokens = Tokens.from_string("abcd")
+      result = p.parse(tokens)
+
+      result.value.should eq({'a', 'b'})
+      result.tokens.should eq(tokens[2..])
+    end
+
+    it "fails if either parser fails" do
+      "aacd bbcd cccd".split.each do |s|
+        tokens = Tokens.from_string(s)
+        expect_raises(ParserFail) { p.parse(tokens) }
+      end
+    end
+  end
+
+  # Should be able to skip some tests because they are already
+  # written for #+, which this is based on.
+  describe "#<<" do
+    a = Parser(Char, Char).token('a')
+    b = Parser(Char, Char).token('b')
+    p = a << b
+    tokens = Tokens.from_string("abcd")
+    result = p.parse(tokens)
+
+    it "discards the second parser's value" do
+      result.value.should eq('a')
+    end
+  end
 
+  # Should be able to skip some tests because they are already
+  # written for #+, which this is based on.
+  describe "#>>" do
+    a = Parser(Char, Char).token('a')
+    b = Parser(Char, Char).token('b')
+    p = a >> b
+    tokens = Tokens.from_string("abcd")
+    result = p.parse(tokens)
+
+    it "discards the second parser's value" do
+      result.value.should eq('b')
+    end
   end
 end
 
diff --git a/src/parcom/parser.cr b/src/parcom/parser.cr
index b78ab77..fcfe292 100644
--- a/src/parcom/parser.cr
+++ b/src/parcom/parser.cr
@@ -86,6 +86,32 @@ module Parcom
       Parser(T, T).satisfy { |x| x == token }.named("Token <#{token}>")
     end
 
+    # Creates a parser from an array of parsers that parses with
+    # each of them in sequence. The results of all parsers are returned
+    # in an array. If any of the parsers fail, the whole parser fails.
+    # TODO: allow support for Iterable(Parser(T, U))
+    def self.sequence(ps : Array(Parser(T, U))) : Parser(T, Array(U))
+      Parser(T, Array(U)).new("Sequence: #{ps.map(&.name)}") do |tokens|
+        values = [] of U
+        ps.each do |p|
+          result = p.parse(tokens)
+          values << result.value
+          tokens = result.tokens
+        end
+        Result.new(tokens, values)
+      end
+    end
+
+    # Creates a parser from an array of `T` that tries to parser
+    # each member of the array in sequence. An identical array is
+    # returned on success. If any of the tokens are absent, the
+    # whole parser fails.
+    # TODO: allow support for Iterable(T)
+    def self.token_sequence(ts : Array(T)) : Parser(T, Array(T))
+      ps = ts.map{ |t| Parser(T, T).token(t) }
+      Parser(T, T).sequence(ps).named("Token Sequence: #{ts}")
+    end
+
     # Creates a new parser from a `Proc`.
     # The `Proc` should have the properties outlined above.
     def initialize(@name : String, @f : Tokens(T) -> Result(T, U))
@@ -157,9 +183,13 @@ module Parcom
       map(block)
     end
 
+    # Creates a new parser from `self` and another parser that will
+    # try to parse with either of them. If the first parser succeeds,
+    # it will return the result of the first parser. Otherwise, it will
+    # return the result of the second parser.
     def |(p2 : Parser(T, U)) : Parser(T, U)
       p1 = self
-      Parser(T, U).new("#{p1.name} or #{p2.name}") do |tokens|
+      Parser(T, U).new("#{p1.name} | #{p2.name}") do |tokens|
         p1.parse(tokens)
       rescue ParserFail
         p2.parse(tokens)
@@ -187,6 +217,30 @@ module Parcom
         Result.new(new_tokens, new_value)
       end
     end
+
+    # Creates a new parser from `self` and another parser that will
+    # try to parse with both parsers and return both results. If either
+    # sub-parser fails, the whole parser fails.
+    def +(p2 : Parser(T, V)) : Parser(T, {U, V}) forall V
+      p1 = self
+      Parser(T, {U, V}).new("#{p1.name} + #{p2.name}") do |tokens|
+        r1 = p1.parse(tokens)
+        r2 = p2.parse(r1.tokens)
+        Result.new(r2.tokens, {r1.value, r2.value})
+      end
+    end
+
+    # Same as `#+`, but discards the second parser's result.
+    def <<(p2 : Parser(T, V)) : Parser(T, U) forall V
+      p1 = self
+      (p1 + p2).map(&.first).named("#{p1.name} << #{p2.name}")
+    end
+
+    # Same as `#+`, but discards the first parser's result.
+    def >>(p2 : Parser(T, V)) : Parser(T, V) forall V
+      p1 = self
+      (p1 + p2).map(&.last).named("#{p1.name} >> #{p2.name}")
+    end
   end
 end
 
-- 
cgit v1.2.1