diff options
| author | Matthew Hall <hallmatthew314@gmail.com> | 2023-03-29 23:34:13 +1300 |
|---|---|---|
| committer | Matthew Hall <hallmatthew314@gmail.com> | 2023-03-29 23:34:13 +1300 |
| commit | ec5c32d902148ab958cd4aac82b473fc8242d5af (patch) | |
| tree | 2b7435d07b2301fb81fb468a395a9793cf527655 | |
| parent | 3d8af6e93dfedcf2fa4e37adbece58dccaba168a (diff) | |
Implement parser_chain macro
| -rw-r--r-- | spec/parcom_spec.cr | 75 | ||||
| -rw-r--r-- | src/parcom.cr | 44 |
2 files changed, 119 insertions, 0 deletions
diff --git a/spec/parcom_spec.cr b/spec/parcom_spec.cr index cee6ff4..2b7a447 100644 --- a/spec/parcom_spec.cr +++ b/spec/parcom_spec.cr @@ -2,6 +2,81 @@ require "./spec_helper" include Parcom +describe "parser_chain" do + it "works with zero intermediate steps" do + p = parser_chain "test", Char, Char, finally: Parser(Char, Char).pure('x') + tokens = Tokens.from_string("abcd") + result = p.parse(tokens) + + result.value.should eq('x') + result.tokens.should eq(tokens) + end + + it "works with one intermediate step" do + p = parser_chain "test", Char, Char, + {c, Parser(Char, Char).any_token}, + finally: Parser(Char, Char).pure(c) + tokens = Tokens.from_string("abcd") + result = p.parse(tokens) + + result.value.should eq('a') + result.tokens.should eq(tokens[1..]) + end + + it "works with many intermediate steps" do + digit = Parser(Char, Char).satisfy(&.number?) + p = parser_chain "float", Char, Float64, + {sign, Parser.token('-').map_const(-1).recover(1)}, + {front, digit.many}, + {point, Parser.token('.').optional}, + {back, digit.many}, + finally: case {front.empty?, point.nil?, back.empty?} + when {false, _, true} + Parser(Char, Float64).pure(front.join.to_f64 * sign) + when {true, false, false} + Parser(Char, Float64).pure("0.#{back.join}".to_f64 * sign) + when {false, false, false} + Parser(Char, Float64).pure("#{front.join}.#{back.join}".to_f64 * sign) + else + Parser(Char, Float64).flunk + end + { + {"1", 1_f64}, + {"-1", -1_f64}, + {"2.", 2_f64}, + {"-2.", -2_f64}, + {".3", 0.3_f64}, + {"-.3", -0.3_f64}, + {"0.4", 0.4_f64}, + {"-0.4", -0.4_f64}, + }.each do |s, v| + tokens = Tokens.from_string(s) + result = p.parse(tokens) + + result.value.should eq(v) + result.tokens.empty?.should be_true + end + end + + it "allows ignoring results with underscores" do + any_word = Parser(Char, String).satisfy(&.letter?).some.map(&.join) + ws = Parser(Char, Array(Char)).satisfy(&.whitespace?).many + two_words = parser_chain "two words", Char, {String, String}, + {word, any_word}, + {_, ws}, + finally: Parser.token_sequence(word.chars).map_const({word, word}) + + tokens = Tokens.from_string("foo \n foo") + result = two_words.parse(tokens) + result.value.should eq({"foo", "foo"}) + + tokens = Tokens.from_string("foo bar") + expect_raises(ParserFail) { two_words.parse(tokens) } + tokens = Tokens.from_string("foofoo") + expect_raises(ParserFail) { two_words.parse(tokens) } + end +end + describe Tokens do describe ".from_string" do it "constructs a Tokens(Char) from a String" do diff --git a/src/parcom.cr b/src/parcom.cr index cdbebdb..7f0e475 100644 --- a/src/parcom.cr +++ b/src/parcom.cr @@ -8,6 +8,50 @@ module Parcom class ParserFail < Exception end + # Provides a more convenient syntax for combining parsers via `Parser#and_then`. + # The first argument is a string literal used for the name of the parser. + # The second and third arguments are types used for the parser's type. + # These are followed by any number of 2-tuples containing a variable name and + # an expression resolving to a `Parser(t.Class, _)`, whose success value will + # be stored in the aformentioned variable. The `finally` named argument is an + # expression that resolves to a `Parser(t.class, u.class)`. + # + # Example: + # ``` + # any_word = Parser(Char, Char).satisfy(&.letter?).some.map(&.join) + # ws = Parser(Char, Array(Char)).satisfy(&.whitespace?).many + # two_of_same_word = parser_chain "two words", Char, String, + # {word, any_word}, + # {_, ws}, + # finally: Parser.token_sequence(word.chars).map(&.join) + # + # tokens = Tokens.from_string("foo foo") + # result = two_of_same_word.parse(tokens) + # result.value # => "foo" + # + # # The above definition of `two_of_same word` + # # is an alternative way of doing this: + # two_of_same_word = any_word.and_then do |word| + # ws.and_then do |_| + # Parser.token_sequence(word.chars).map(&.join) + # end + # end.named("two words") + # ``` + # + # This macro is based on Haskell's do-notation. + macro parser_chain(name, t, u, *steps, finally) + Parser({{t}}, {{u}}).new({{name}}) do |tokens| + {% for tup, index in steps %} + {{tup.last}}.and_then do |{{tup.first}}| + {% end %} + {{finally}} + {% for _, _ in steps %} + end + {% end %} + .parse(tokens) + end + end + # `Tokens` is an `Array` wrapper struct to store the input # stream of one or more `Parser` objects. # A `Tokens` can be created from any `Iterable`, along with |
