From b274828831fec26cd8b3089ffef14cb96ce2de2f Mon Sep 17 00:00:00 2001 From: Matthew Hall Date: Thu, 16 Mar 2023 20:36:03 +1300 Subject: Second rewrite attempt, this one should work, monkaS --- spec/__OLD_parcom_spec.cr | 765 ++++++++++++++++++++++++++++++++++++++++++ spec/parcom_spec.cr | 758 +---------------------------------------- src/__OLD_parcom.cr | 84 +++++ src/__OLD_parcom/alt.cr | 28 ++ src/__OLD_parcom/any_token.cr | 18 + src/__OLD_parcom/assert.cr | 45 +++ src/__OLD_parcom/at_least.cr | 29 ++ src/__OLD_parcom/at_most.cr | 29 ++ src/__OLD_parcom/between.cr | 37 ++ src/__OLD_parcom/eof.cr | 23 ++ src/__OLD_parcom/exactly.cr | 28 ++ src/__OLD_parcom/first_of.cr | 40 +++ src/__OLD_parcom/flunk.cr | 12 + src/__OLD_parcom/left.cr | 26 ++ src/__OLD_parcom/many.cr | 36 ++ src/__OLD_parcom/map.cr | 30 ++ src/__OLD_parcom/optional.cr | 40 +++ src/__OLD_parcom/parser.cr | 87 +++++ src/__OLD_parcom/peek.cr | 23 ++ src/__OLD_parcom/phrase.cr | 33 ++ src/__OLD_parcom/plus.cr | 54 +++ src/__OLD_parcom/recover.cr | 27 ++ src/__OLD_parcom/right.cr | 26 ++ src/__OLD_parcom/satisfy.cr | 33 ++ src/__OLD_parcom/sep_by.cr | 38 +++ src/__OLD_parcom/sequence.cr | 33 ++ src/__OLD_parcom/some.cr | 33 ++ src/__OLD_parcom/token.cr | 20 ++ src/__OLD_parcom/token_seq.cr | 38 +++ src/parcom.cr | 56 +++- src/parcom/alt.cr | 28 -- src/parcom/any_token.cr | 18 - src/parcom/assert.cr | 45 --- src/parcom/at_least.cr | 29 -- src/parcom/at_most.cr | 29 -- src/parcom/basic.cr | 10 + src/parcom/between.cr | 37 -- src/parcom/eof.cr | 23 -- src/parcom/exactly.cr | 28 -- src/parcom/first_of.cr | 40 --- src/parcom/flunk.cr | 12 - src/parcom/left.cr | 26 -- src/parcom/many.cr | 36 -- src/parcom/map.cr | 30 -- src/parcom/optional.cr | 40 --- src/parcom/parser.cr | 87 ----- src/parcom/peek.cr | 23 -- src/parcom/phrase.cr | 33 -- src/parcom/plus.cr | 54 --- src/parcom/recover.cr | 27 -- src/parcom/right.cr | 26 -- src/parcom/satisfy.cr | 33 -- src/parcom/sep_by.cr | 38 --- src/parcom/sequence.cr | 33 -- src/parcom/some.cr | 33 -- src/parcom/token.cr | 20 -- src/parcom/token_seq.cr | 38 --- 57 files changed, 1769 insertions(+), 1636 deletions(-) create mode 100644 spec/__OLD_parcom_spec.cr create mode 100644 src/__OLD_parcom.cr create mode 100644 src/__OLD_parcom/alt.cr create mode 100644 src/__OLD_parcom/any_token.cr create mode 100644 src/__OLD_parcom/assert.cr create mode 100644 src/__OLD_parcom/at_least.cr create mode 100644 src/__OLD_parcom/at_most.cr create mode 100644 src/__OLD_parcom/between.cr create mode 100644 src/__OLD_parcom/eof.cr create mode 100644 src/__OLD_parcom/exactly.cr create mode 100644 src/__OLD_parcom/first_of.cr create mode 100644 src/__OLD_parcom/flunk.cr create mode 100644 src/__OLD_parcom/left.cr create mode 100644 src/__OLD_parcom/many.cr create mode 100644 src/__OLD_parcom/map.cr create mode 100644 src/__OLD_parcom/optional.cr create mode 100644 src/__OLD_parcom/parser.cr create mode 100644 src/__OLD_parcom/peek.cr create mode 100644 src/__OLD_parcom/phrase.cr create mode 100644 src/__OLD_parcom/plus.cr create mode 100644 src/__OLD_parcom/recover.cr create mode 100644 src/__OLD_parcom/right.cr create mode 100644 src/__OLD_parcom/satisfy.cr create mode 100644 src/__OLD_parcom/sep_by.cr create mode 100644 src/__OLD_parcom/sequence.cr create mode 100644 src/__OLD_parcom/some.cr create mode 100644 src/__OLD_parcom/token.cr create mode 100644 src/__OLD_parcom/token_seq.cr delete mode 100644 src/parcom/alt.cr delete mode 100644 src/parcom/any_token.cr delete mode 100644 src/parcom/assert.cr delete mode 100644 src/parcom/at_least.cr delete mode 100644 src/parcom/at_most.cr create mode 100644 src/parcom/basic.cr delete mode 100644 src/parcom/between.cr delete mode 100644 src/parcom/eof.cr delete mode 100644 src/parcom/exactly.cr delete mode 100644 src/parcom/first_of.cr delete mode 100644 src/parcom/flunk.cr delete mode 100644 src/parcom/left.cr delete mode 100644 src/parcom/many.cr delete mode 100644 src/parcom/map.cr delete mode 100644 src/parcom/optional.cr delete mode 100644 src/parcom/parser.cr delete mode 100644 src/parcom/peek.cr delete mode 100644 src/parcom/phrase.cr delete mode 100644 src/parcom/plus.cr delete mode 100644 src/parcom/recover.cr delete mode 100644 src/parcom/right.cr delete mode 100644 src/parcom/satisfy.cr delete mode 100644 src/parcom/sep_by.cr delete mode 100644 src/parcom/sequence.cr delete mode 100644 src/parcom/some.cr delete mode 100644 src/parcom/token.cr delete mode 100644 src/parcom/token_seq.cr diff --git a/spec/__OLD_parcom_spec.cr b/spec/__OLD_parcom_spec.cr new file mode 100644 index 0000000..25ae2e9 --- /dev/null +++ b/spec/__OLD_parcom_spec.cr @@ -0,0 +1,765 @@ +require "./spec_helper" + +require "../src/parcom.cr" + +include Parcom + +describe Tokens do + describe ".from_string" do + it "constructs a Tokens(Char) from a String" do + tokens = Tokens.from_string("abcd") + tokens.tokens.should eq("abcd".chars) + end + end + + describe "#initialize" do + it "wraps an array with the contents of the given iterable" do + set = Set{'a', 'b', 'c', 'd'} + tokens = Tokens.new(set) + tokens.tokens.should eq(set.to_a) + + arr = "abcd".chars + tokens = Tokens.new(arr) + tokens.tokens.should eq(arr) + end + end + + context do + tokens_empty = Tokens.new([] of Char) + tokens = Tokens.from_string("abcd") + + describe "#[]" do + it "returns the token at the given index" do + tokens[2].should eq('c') + expect_raises(IndexError) { tokens_empty[2] } + end + + it "returns a new Tokens similar to Array#[](Int, Int)" do + tokens[1, 5].should eq(Tokens.new(['b', 'c', 'd'])) + expect_raises(IndexError) { tokens_empty[1, 5] } + end + + it "returns a new Tokens similar to Array#[](Range)" do + tokens[1..3].should eq(Tokens.new(['b', 'c', 'd'])) + expect_raises(IndexError) { tokens_empty[1..3] } + end + end + + describe "#[]?" do + it "analogous to `Array#[]?`" do + # we should only need to check the nil-returning cases + tokens_empty[2]?.should be_nil + tokens_empty[1, 5]?.should be_nil + tokens_empty[1..3]?.should be_nil + end + end + + describe "#empty?" do + it "exposes the `#empty?` method of the wrapped array" do + tokens.empty?.should be_false + tokens_empty.empty?.should be_true + end + end + end +end + +describe Result do + describe "#initialize" do + it "sets values for #tokens and #value" do + tokens = Tokens.from_string("esting") + value = 't' + result = Result(Char, Char).new(tokens, value) + + result.tokens.should eq(tokens) + result.value.should eq(value) + end + end +end + +describe Parser do + p = AnyToken(Char).new + + describe "#parse?" do + it "returns `nil` if the parser fails" do + result = p.parse?(Tokens.new([] of Char)) + + result.should be_nil + end + + it "returns a `Result(T, V)` if the parser succeeds" do + tokens = Tokens.from_string("testing") + result = p.parse(tokens) + + result.should be_a(Result(Char, Char)) + end + end +end + +describe Flunk do + describe "#parse" do + it "always fails" do + tokens = Tokens.from_string("testing") + + expect_raises(ParserFail) { Flunk(Char, Char).new.parse(tokens) } + end + end +end + +describe AnyToken do + context do + p = AnyToken(Char).new + + describe "#parse" do + it "succeeds when input is non-empty" do + tokens = Tokens.from_string("testing") + result = p.parse(tokens) + + result.tokens.should eq(tokens[1..]) + result.value.should eq('t') + end + + it "fails when input is empty" do + expect_raises(ParserFail) { p.parse(Tokens.new([] of Char)) } + end + end + end +end + +describe EOF do + p = EOF(Char).new + + describe "#parse" do + it "succeeds when input is empty" do + result = p.parse(Tokens.new([] of Char)) + + result.tokens.empty?.should be_true + result.value.should be_nil + end + + it "fails when input is non-empty" do + tokens = Tokens.from_string("testing") + + expect_raises(ParserFail) { p.parse(tokens) } + end + end +end + +describe Peek do + tokens = Tokens.from_string("testing") + p = AnyToken(Char).new + result_normal = p.parse(tokens) + result_peek = Peek.new(p).parse(tokens) + + describe "#parse" do + it "does not modify the result of the wrapped parser" do + result_peek.value.should eq(result_normal.value) + end + + it "does not consume any input" do + result_peek.tokens.should eq(tokens) + end + end +end + +describe Assert do + test_f = ->(x : Char) { x == 't' } + p = AnyToken(Char).new.assert { |x| x == 't' } + + describe "#parse" do + it "fails if the wrapped parser fails" do + expect_raises(ParserFail) do + p.parse(Tokens.new([] of Char)) + end + end + + it "fails if the result value fails the test" do + tokens = Tokens.from_string("_testing") + + expect_raises(ParserFail) { p.parse(tokens) } + end + + it "succeeds if the wrapped parser succeeds and the test passes" do + tokens = Tokens.from_string("testing") + expected_char = tokens[0] + result = p.parse(tokens) + + result.value.should eq(expected_char) + test_f.call(expected_char).should be_true + end + end +end + +describe Satisfy do + p = Satisfy(Char).new { |x| x == 't' } + + describe "#parse" do + it "fails if the input is empty" do + expect_raises(ParserFail) { p.parse(Tokens.new([] of Char)) } + end + + it "fails if the token fails the test" do + tokens = Tokens.from_string("_testing") + + expect_raises(ParserFail) { p.parse(tokens) } + end + + it "succeeds if the token passes the test" do + tokens = Tokens.from_string("testing") + expected_char = tokens[0] + result = p.parse(tokens) + + result.value.should eq(expected_char) + end + end +end + +describe Token do + tokens = Tokens.from_string("testing") + + describe "#parse" do + it "fails if the input is empty" do + p = Token(Char).new('t') + + expect_raises(ParserFail) { p.parse(Tokens.new([] of Char)) } + end + + it "fails if the token is not the expected token" do + p = Token(Char).new('#') + + expect_raises(ParserFail) { p.parse(tokens) } + end + + it "succeeds if the token is the expected token" do + expected_char = tokens[0] + p = Token(Char).new(expected_char) + result = p.parse(tokens) + + result.value.should eq(expected_char) + end + end +end + +describe Map do + describe "#parse" do + it "fails if the wrapped parser fails" do + p = AnyToken(Char).new.map { |x| x } + + expect_raises(ParserFail) { p.parse(Tokens.new([] of Char)) } + end + + it "changes the result value via the provided proc" do + p = AnyToken(Char).new.map { |x| x.letter? } + + result = p.parse(Tokens.from_string("testing")) + result.value.should be_true + + result = p.parse(Tokens.from_string("_testing")) + result.value.should be_false + end + end +end + +describe Plus do + describe "#parse" do + tokens = Tokens.from_string("testing") + p_t = Token(Char).new('t') + p_e = Token(Char).new('e') + p_at = Token(Char).new('@') + + it "fails if the first parser fails" do + p = p_at + p_e + expect_raises(ParserFail) { p.parse(tokens) } + end + + it "fails if the second parser fails" do + p = p_t + p_at + expect_raises(ParserFail) { p.parse(tokens) } + end + + it "fails if both parsers fail" do + p = p_at + p_at + expect_raises(ParserFail) { p.parse(tokens) } + end + + it "succeeds if both parsers succeed" do + p = p_t + p_e + result = p.parse(tokens) + + result.tokens.should eq(tokens[2..]) + result.value[0].should eq('t') + result.value[1].should eq('e') + end + + it "evaluates parsers from left to right (left associative)" do + p_succeeds = p_t + p_e + p_fails = p_e + p_t + + p_succeeds.parse(tokens) # should not raise an exception + expect_raises(ParserFail) { p_fails.parse(tokens) } + + p_s = Token(Char).new('s') + + r = (p_t + p_e + p_s).parse(tokens) # should not raise an exception + r.value.should be_a({ {Char, Char}, Char}) + + r = (p_t + (p_e + p_s)).parse(tokens) # should not raise an exception + r.value.should be_a({Char, {Char, Char} }) + end + end +end + +# most behavior shouldn't need to be tested +# since it is based on tested bbehavior from +# Plus and Map +describe Left do + describe "#parse" do + it "returns the value of the first parser if both succeed" do + tokens = Tokens.from_string("testing") + letter_t = Token.new('t') + letter_e = Token.new('e') + result = (letter_t << letter_e).parse(tokens) + + result.value.should eq('t') + result.tokens.should eq(tokens[2..]) + end + end +end + +# same deal as Left +describe Right do + describe "#parse" do + it "returns the value of the second parser if both succeed" do + tokens = Tokens.from_string("testing") + letter_t = Token.new('t') + letter_e = Token.new('e') + result = (letter_t >> letter_e).parse(tokens) + + result.value.should eq('e') + result.tokens.should eq(tokens[2..]) + end + end +end + +describe Phrase do + p = Phrase.new(Token.new('t')) + + describe "#parse" do + it "fails if the wrapped parser fails" do + tokens = Tokens.from_string("_") + + expect_raises(ParserFail) { p.parse(tokens) } + end + + it "fails if not all of the input tokens are parsed" do + tokens = Tokens.from_string("tt") + + expect_raises(ParserFail) { p.parse(tokens) } + end + + it "succeeds if the wrapped parser successfully parses all of the input" do + tokens = Tokens.from_string("t") + result = p.parse(tokens) + + result.tokens.empty?.should be_true + result.value.should eq('t') + end + end +end + +describe Recover do + p = Token.new('t').recover('@') + + describe "#parse" do + it "succeeds and returns the wrapped parser's value if it succeeds" do + tokens = Tokens.from_string("testing") + result = p.parse(tokens) + + result.tokens.should eq(tokens[1..]) + result.value.should eq('t') + end + + it "succeeds and returns the default value without modifying the input if the wrapped parser fails" do + tokens = Tokens.from_string("_____") + result = p.parse(tokens) + + result.tokens.should eq(tokens) + result.value.should eq('@') + end + end +end + +describe Optional do + p = Optional.new(Token.new('t')) + + describe "#parse" do + it "succeeds and returns the wrapped parser's value if it succeeds" do + tokens = Tokens.from_string("testing") + result = p.parse(tokens) + + result.tokens.should eq(tokens[1..]) + result.value.should eq('t') + end + + it "succeeds and returns a value of `nil` without modifying the input if the wrapped parser fails" do + tokens = Tokens.from_string("_____") + result = p.parse(tokens) + + result.tokens.should eq(tokens) + result.value.should be_nil + end + end +end + +describe Sequence do + # HACK: ps has to be declared this way due to contravariance + # https://crystal-lang.org/reference/1.7/syntax_and_semantics/inheritance.html#covariance-and-contravariance + ps = [] of Parser(Char, Char) + ps = ps + "abcd".chars.map { |c| Token.new(c) } + p = Sequence.new(ps) + + describe "#parse" do + it "runs each wrapped parser in order, returns each result" do + tokens = Tokens.from_string("abcd") + result = p.parse(tokens) + + result.value.should eq("abcd".chars) + result.tokens.empty?.should be_true + end + + it "fails if any of the wrapped parsers fail" do + fail_strings = ["", "abed", "bbcd", "abce"] + fail_strings.each do |s| + tokens = Tokens.from_string(s) + expect_raises(ParserFail) { p.parse(tokens) } + end + end + + it "succeeds and returns empty array if parser iterable is empty" do + tokens = Tokens.from_string("abcd") + empty_p = Sequence.new([] of Parser(Char, Char)) + result = empty_p.parse(tokens) + + result.value.empty?.should be_true + result.tokens.should eq(tokens) + end + end +end + +describe TokenSeq do + p = TokenSeq.new("test".chars) + + describe "#parse" do + it "fails if the input stream is too short" do + input = Tokens.from_string("") + expect_raises(ParserFail) { p.parse(input) } + end + + it "fails if it encounters an unexpected token" do + input = Tokens.from_string("text") + expect_raises(ParserFail) { p.parse(input) } + end + + it "succeeds if the input starts with the expected tokens" do + input = Tokens.from_string("testing") + result = p.parse(input) + + result.tokens.should eq(input[4..]) + result.value.should eq("test".chars) + end + end +end + +describe Many do + p = Many.new(Token.new('a')) + + describe "#parse" do + it "returns an empty array if the wrapped parser never succeeds" do + tokens = Tokens.from_string("bb") + result = p.parse(tokens) + + result.value.empty?.should be_true + result.tokens.should eq(tokens) + end + + it "stops parsing when the wrapped parser fails, returns all successes" do + tokens = Tokens.from_string("aaabcd") + result = p.parse(tokens) + + result.value.should eq("aaa".chars) + result.tokens.should eq(tokens[3..]) + + tokens = Tokens.from_string("aaa") + result = p.parse(tokens) + + result.value.should eq("aaa".chars) + result.tokens.should eq(tokens[3..]) + end + + it "stops parsing when the wapped parser succeeds without consuming any input" do + a_optional : Parser(Char, Char?) + a_optional = Optional.new(Token.new('a')) + tokens = Tokens.from_string("aaa") + result = Many(Char, Char?).new(a_optional).parse(tokens) + + result.value.should eq("aaa".chars) + result.tokens.should eq(tokens[3..]) + end + end +end + +describe Some do + p = Some.new(Token.new('a')) + describe "#parse" do + it "fails if the wrapped parser never succeeds" do + tokens = Tokens.from_string("") + expect_raises(ParserFail) { p.parse(tokens) } + end + + it "stops parsing when the wrapped parser fails, returns all successes" do + tokens = Tokens.from_string("aaabcd") + result = p.parse(tokens) + + result.value.should eq("aaa".chars) + result.tokens.should eq(tokens[3..]) + + tokens = Tokens.from_string("aaa") + result = p.parse(tokens) + + result.value.should eq("aaa".chars) + result.tokens.should eq(tokens[3..]) + end + end +end + +describe Exactly do + letter_a = Token.new('a') + tokens = Tokens.from_string("aaabcd") + + describe "#parse" do + it "tries to parse exactly n of the wrapper parser" do + p = Exactly.new(3, letter_a) + result = p.parse(tokens) + + result.value.should eq("aaa".chars) + result.tokens.should eq(tokens[3..]) + end + + it "always succeeds with an empty array if n < 1" do + p = Exactly.new(0, letter_a) + result = p.parse(tokens) + + result.value.empty?.should be_true + result.tokens.should eq(tokens) + + p = Exactly.new(-42, letter_a) + result = p.parse(tokens) + + result.value.empty?.should be_true + result.tokens.should eq(tokens) + end + + it "does not take extra matching tokens" do + p = Exactly.new(2, letter_a) + result = p.parse(tokens) + + result.value.should eq("aa".chars) + result.tokens.should eq(tokens[2..]) + end + + it "fails if there are not enough matching tokens" do + p = Exactly.new(60, letter_a) + expect_raises(ParserFail) { p.parse(tokens) } + end + end +end + +describe AtLeast do + letter_a = Token.new('a') + tokens = Tokens.from_string("aaaabcd") + + describe "#parse" do + it "fails if there are not enough matching tokens to parse" do + p = AtLeast.new(5, letter_a) + expect_raises(ParserFail) { p.parse(tokens) } + #expect_raises(ParserFail) { raise ParserFail.new("sdgseg") } + end + + it "parses n or more times with the given parser" do + p0 = AtLeast.new(0, letter_a) + p2 = AtLeast.new(2, letter_a) + p4 = AtLeast.new(4, letter_a) + + result0 = p0.parse(tokens) + result2 = p2.parse(tokens) + result4 = p4.parse(tokens) + + result0.value.should eq("aaaa".chars) + result0.tokens.should eq(tokens[4..]) + + result2.should eq(result0) + result4.should eq(result0) + end + end +end + +describe AtMost do + letter_a = Token.new('a') + tokens = Tokens.from_string("aaaabcd") + + describe "#parse" do + it "does not parse more than n times" do + p0 = AtMost.new(0, letter_a) + p2 = AtMost.new(2, letter_a) + p6 = AtMost.new(6, letter_a) + + r0 = p0.parse(tokens) + r0.value.empty?.should be_true + r0.tokens.should eq(tokens) + + r2 = p2.parse(tokens) + r2.value.should eq("aa".chars) + r2.tokens.should eq(tokens[2..]) + + r6 = p6.parse(tokens) + r6.value.should eq("aaaa".chars) + r6.tokens.should eq(tokens[4..]) + end + end +end + +describe Between do + letter_a = Token.new('a') + tokens = Tokens.from_string("aaaabcd") + + describe "#parse" do + it "parses at least i times, up to a limit of j times" do + p0_4 = Between.new(0, 4, letter_a) + r0_4 = p0_4.parse(tokens) + + r0_4.value.should eq("aaaa".chars) + r0_4.tokens.should eq(tokens[4..]) + end + + it "fails if there are not enough parser successes" do + p = Between.new(5, 6, letter_a) + expect_raises(ParserFail) { p.parse(tokens) } + end + end +end + +describe FirstOf do + tokens = Tokens.from_string("abcd") + letter_a = Token.new('a') + f = Flunk(Char, Char).new + + describe "#parse" do + it "cannot be instantiated with an empty Enumerable" do + expect_raises(ArgumentError) { FirstOf.new([] of Parser(Char, Char)) } + end + + it "uses the result of the first successful parser" do + a1 = [letter_a, f, f, f] of Parser(Char, Char) + a2 = [f, letter_a, f, f] of Parser(Char, Char) + a3 = [f, f, letter_a, f] of Parser(Char, Char) + a4 = [f, f, f, letter_a] of Parser(Char, Char) + + [a1, a2, a3, a4].each do |arr| + p = FirstOf.new(arr) + r = p.parse(tokens) + r.value.should eq('a') + r.tokens.should eq(tokens[1..]) + end + end + + it "only fails if no parsers are successful" do + x = Token.new('x') + y = Token.new('x') + z = Token.new('x') + p = FirstOf.new([x, y, z] of Parser(Char, Char)) + expect_raises(ParserFail) { p.parse(tokens) } + end + end +end + +describe SepBy do + describe "#parse" do + letter_a = Token.new('a') + comma = Token.new(',') + tokens = Tokens.from_string("a,a,a,a") + + it "fails if no elements can be parsed" do + p = SepBy(Char, Char, Char).new(comma, comma) + expect_raises(ParserFail) { p.parse(tokens) } + end + + it "succeeds if only one element can be parsed" do + t1 = Tokens.from_string("a") + t2 = Tokens.from_string("a,") + p = SepBy(Char, Char, Char).new(letter_a, comma) + + result = p.parse(t1) + result.value.should eq(['a']) + result.tokens.should eq(t1[1..]) + + result = p.parse(t2) + result.value.should eq(['a']) + result.tokens.should eq(t2[1..]) + end + + it "parses 1 element, then 0 or more (sep >> element)" do + p = SepBy(Char, Char, Char).new(letter_a, comma) + + result = p.parse(tokens) + result.value.should eq("aaaa".chars) + result.tokens.empty?.should be_true + + # drop last char in tokens, should parse three elements + result = p.parse(tokens[..5]) + result.value.should eq("aaa".chars) + result.tokens.should eq(Tokens.from_string(",")) + end + end +end + +describe "Practical use" do + describe "Use case: text surrounded by whitespace" do + space = Satisfy(Char).new { |c| c.whitespace? } + non_space = Satisfy(Char).new { |c| !c.whitespace? } + + # TODO: Figure out why mapping on this parser breaks + # initialization of `body`. + word_chars = Some.new(non_space) + ws = Some.new(space) + + bookend = Optional.new(ws) + body = SepBy.new(word_chars, ws) + tokenizer = (bookend >> body << bookend).map do |arrs| + arrs.map { |chars| chars.join } + end + + good_strings = [ + "test with no trailing whitespace", + " test with whitespace in the front", + "test with whitespace in the back", + " test surrounded by whitespace ", + ] + + good_strings.each do |s| + tokens = Tokens.from_string(s) + result = tokenizer.parse(tokens) + result.value.should eq(s.strip.split(/\s+/)) + result.tokens.empty?.should be_true + end + + bad_strings = [ + "", + " ", + ] + + bad_strings.each do |s| + tokens = Tokens.from_string(s) + expect_raises(ParserFail) { tokenizer.parse(tokens) } + end + end +end + diff --git a/spec/parcom_spec.cr b/spec/parcom_spec.cr index 25ae2e9..d25c786 100644 --- a/spec/parcom_spec.cr +++ b/spec/parcom_spec.cr @@ -4,762 +4,6 @@ require "../src/parcom.cr" include Parcom -describe Tokens do - describe ".from_string" do - it "constructs a Tokens(Char) from a String" do - tokens = Tokens.from_string("abcd") - tokens.tokens.should eq("abcd".chars) - end - end - - describe "#initialize" do - it "wraps an array with the contents of the given iterable" do - set = Set{'a', 'b', 'c', 'd'} - tokens = Tokens.new(set) - tokens.tokens.should eq(set.to_a) - - arr = "abcd".chars - tokens = Tokens.new(arr) - tokens.tokens.should eq(arr) - end - end - - context do - tokens_empty = Tokens.new([] of Char) - tokens = Tokens.from_string("abcd") - - describe "#[]" do - it "returns the token at the given index" do - tokens[2].should eq('c') - expect_raises(IndexError) { tokens_empty[2] } - end - - it "returns a new Tokens similar to Array#[](Int, Int)" do - tokens[1, 5].should eq(Tokens.new(['b', 'c', 'd'])) - expect_raises(IndexError) { tokens_empty[1, 5] } - end - - it "returns a new Tokens similar to Array#[](Range)" do - tokens[1..3].should eq(Tokens.new(['b', 'c', 'd'])) - expect_raises(IndexError) { tokens_empty[1..3] } - end - end - - describe "#[]?" do - it "analogous to `Array#[]?`" do - # we should only need to check the nil-returning cases - tokens_empty[2]?.should be_nil - tokens_empty[1, 5]?.should be_nil - tokens_empty[1..3]?.should be_nil - end - end - - describe "#empty?" do - it "exposes the `#empty?` method of the wrapped array" do - tokens.empty?.should be_false - tokens_empty.empty?.should be_true - end - end - end -end - -describe Result do - describe "#initialize" do - it "sets values for #tokens and #value" do - tokens = Tokens.from_string("esting") - value = 't' - result = Result(Char, Char).new(tokens, value) - - result.tokens.should eq(tokens) - result.value.should eq(value) - end - end -end - -describe Parser do - p = AnyToken(Char).new - - describe "#parse?" do - it "returns `nil` if the parser fails" do - result = p.parse?(Tokens.new([] of Char)) - - result.should be_nil - end - - it "returns a `Result(T, V)` if the parser succeeds" do - tokens = Tokens.from_string("testing") - result = p.parse(tokens) - - result.should be_a(Result(Char, Char)) - end - end -end - -describe Flunk do - describe "#parse" do - it "always fails" do - tokens = Tokens.from_string("testing") - - expect_raises(ParserFail) { Flunk(Char, Char).new.parse(tokens) } - end - end -end - -describe AnyToken do - context do - p = AnyToken(Char).new - - describe "#parse" do - it "succeeds when input is non-empty" do - tokens = Tokens.from_string("testing") - result = p.parse(tokens) - - result.tokens.should eq(tokens[1..]) - result.value.should eq('t') - end - - it "fails when input is empty" do - expect_raises(ParserFail) { p.parse(Tokens.new([] of Char)) } - end - end - end -end - -describe EOF do - p = EOF(Char).new - - describe "#parse" do - it "succeeds when input is empty" do - result = p.parse(Tokens.new([] of Char)) - - result.tokens.empty?.should be_true - result.value.should be_nil - end - - it "fails when input is non-empty" do - tokens = Tokens.from_string("testing") - - expect_raises(ParserFail) { p.parse(tokens) } - end - end -end - -describe Peek do - tokens = Tokens.from_string("testing") - p = AnyToken(Char).new - result_normal = p.parse(tokens) - result_peek = Peek.new(p).parse(tokens) - - describe "#parse" do - it "does not modify the result of the wrapped parser" do - result_peek.value.should eq(result_normal.value) - end - - it "does not consume any input" do - result_peek.tokens.should eq(tokens) - end - end -end - -describe Assert do - test_f = ->(x : Char) { x == 't' } - p = AnyToken(Char).new.assert { |x| x == 't' } - - describe "#parse" do - it "fails if the wrapped parser fails" do - expect_raises(ParserFail) do - p.parse(Tokens.new([] of Char)) - end - end - - it "fails if the result value fails the test" do - tokens = Tokens.from_string("_testing") - - expect_raises(ParserFail) { p.parse(tokens) } - end - - it "succeeds if the wrapped parser succeeds and the test passes" do - tokens = Tokens.from_string("testing") - expected_char = tokens[0] - result = p.parse(tokens) - - result.value.should eq(expected_char) - test_f.call(expected_char).should be_true - end - end -end - -describe Satisfy do - p = Satisfy(Char).new { |x| x == 't' } - - describe "#parse" do - it "fails if the input is empty" do - expect_raises(ParserFail) { p.parse(Tokens.new([] of Char)) } - end - - it "fails if the token fails the test" do - tokens = Tokens.from_string("_testing") - - expect_raises(ParserFail) { p.parse(tokens) } - end - - it "succeeds if the token passes the test" do - tokens = Tokens.from_string("testing") - expected_char = tokens[0] - result = p.parse(tokens) - - result.value.should eq(expected_char) - end - end -end - -describe Token do - tokens = Tokens.from_string("testing") - - describe "#parse" do - it "fails if the input is empty" do - p = Token(Char).new('t') - - expect_raises(ParserFail) { p.parse(Tokens.new([] of Char)) } - end - - it "fails if the token is not the expected token" do - p = Token(Char).new('#') - - expect_raises(ParserFail) { p.parse(tokens) } - end - - it "succeeds if the token is the expected token" do - expected_char = tokens[0] - p = Token(Char).new(expected_char) - result = p.parse(tokens) - - result.value.should eq(expected_char) - end - end -end - -describe Map do - describe "#parse" do - it "fails if the wrapped parser fails" do - p = AnyToken(Char).new.map { |x| x } - - expect_raises(ParserFail) { p.parse(Tokens.new([] of Char)) } - end - - it "changes the result value via the provided proc" do - p = AnyToken(Char).new.map { |x| x.letter? } - - result = p.parse(Tokens.from_string("testing")) - result.value.should be_true - - result = p.parse(Tokens.from_string("_testing")) - result.value.should be_false - end - end -end - -describe Plus do - describe "#parse" do - tokens = Tokens.from_string("testing") - p_t = Token(Char).new('t') - p_e = Token(Char).new('e') - p_at = Token(Char).new('@') - - it "fails if the first parser fails" do - p = p_at + p_e - expect_raises(ParserFail) { p.parse(tokens) } - end - - it "fails if the second parser fails" do - p = p_t + p_at - expect_raises(ParserFail) { p.parse(tokens) } - end - - it "fails if both parsers fail" do - p = p_at + p_at - expect_raises(ParserFail) { p.parse(tokens) } - end - - it "succeeds if both parsers succeed" do - p = p_t + p_e - result = p.parse(tokens) - - result.tokens.should eq(tokens[2..]) - result.value[0].should eq('t') - result.value[1].should eq('e') - end - - it "evaluates parsers from left to right (left associative)" do - p_succeeds = p_t + p_e - p_fails = p_e + p_t - - p_succeeds.parse(tokens) # should not raise an exception - expect_raises(ParserFail) { p_fails.parse(tokens) } - - p_s = Token(Char).new('s') - - r = (p_t + p_e + p_s).parse(tokens) # should not raise an exception - r.value.should be_a({ {Char, Char}, Char}) - - r = (p_t + (p_e + p_s)).parse(tokens) # should not raise an exception - r.value.should be_a({Char, {Char, Char} }) - end - end -end - -# most behavior shouldn't need to be tested -# since it is based on tested bbehavior from -# Plus and Map -describe Left do - describe "#parse" do - it "returns the value of the first parser if both succeed" do - tokens = Tokens.from_string("testing") - letter_t = Token.new('t') - letter_e = Token.new('e') - result = (letter_t << letter_e).parse(tokens) - - result.value.should eq('t') - result.tokens.should eq(tokens[2..]) - end - end -end - -# same deal as Left -describe Right do - describe "#parse" do - it "returns the value of the second parser if both succeed" do - tokens = Tokens.from_string("testing") - letter_t = Token.new('t') - letter_e = Token.new('e') - result = (letter_t >> letter_e).parse(tokens) - - result.value.should eq('e') - result.tokens.should eq(tokens[2..]) - end - end -end - -describe Phrase do - p = Phrase.new(Token.new('t')) - - describe "#parse" do - it "fails if the wrapped parser fails" do - tokens = Tokens.from_string("_") - - expect_raises(ParserFail) { p.parse(tokens) } - end - - it "fails if not all of the input tokens are parsed" do - tokens = Tokens.from_string("tt") - - expect_raises(ParserFail) { p.parse(tokens) } - end - - it "succeeds if the wrapped parser successfully parses all of the input" do - tokens = Tokens.from_string("t") - result = p.parse(tokens) - - result.tokens.empty?.should be_true - result.value.should eq('t') - end - end -end - -describe Recover do - p = Token.new('t').recover('@') - - describe "#parse" do - it "succeeds and returns the wrapped parser's value if it succeeds" do - tokens = Tokens.from_string("testing") - result = p.parse(tokens) - - result.tokens.should eq(tokens[1..]) - result.value.should eq('t') - end - - it "succeeds and returns the default value without modifying the input if the wrapped parser fails" do - tokens = Tokens.from_string("_____") - result = p.parse(tokens) - - result.tokens.should eq(tokens) - result.value.should eq('@') - end - end -end - -describe Optional do - p = Optional.new(Token.new('t')) - - describe "#parse" do - it "succeeds and returns the wrapped parser's value if it succeeds" do - tokens = Tokens.from_string("testing") - result = p.parse(tokens) - - result.tokens.should eq(tokens[1..]) - result.value.should eq('t') - end - - it "succeeds and returns a value of `nil` without modifying the input if the wrapped parser fails" do - tokens = Tokens.from_string("_____") - result = p.parse(tokens) - - result.tokens.should eq(tokens) - result.value.should be_nil - end - end -end - -describe Sequence do - # HACK: ps has to be declared this way due to contravariance - # https://crystal-lang.org/reference/1.7/syntax_and_semantics/inheritance.html#covariance-and-contravariance - ps = [] of Parser(Char, Char) - ps = ps + "abcd".chars.map { |c| Token.new(c) } - p = Sequence.new(ps) - - describe "#parse" do - it "runs each wrapped parser in order, returns each result" do - tokens = Tokens.from_string("abcd") - result = p.parse(tokens) - - result.value.should eq("abcd".chars) - result.tokens.empty?.should be_true - end - - it "fails if any of the wrapped parsers fail" do - fail_strings = ["", "abed", "bbcd", "abce"] - fail_strings.each do |s| - tokens = Tokens.from_string(s) - expect_raises(ParserFail) { p.parse(tokens) } - end - end - - it "succeeds and returns empty array if parser iterable is empty" do - tokens = Tokens.from_string("abcd") - empty_p = Sequence.new([] of Parser(Char, Char)) - result = empty_p.parse(tokens) - - result.value.empty?.should be_true - result.tokens.should eq(tokens) - end - end -end - -describe TokenSeq do - p = TokenSeq.new("test".chars) - - describe "#parse" do - it "fails if the input stream is too short" do - input = Tokens.from_string("") - expect_raises(ParserFail) { p.parse(input) } - end - - it "fails if it encounters an unexpected token" do - input = Tokens.from_string("text") - expect_raises(ParserFail) { p.parse(input) } - end - - it "succeeds if the input starts with the expected tokens" do - input = Tokens.from_string("testing") - result = p.parse(input) - - result.tokens.should eq(input[4..]) - result.value.should eq("test".chars) - end - end -end - -describe Many do - p = Many.new(Token.new('a')) - - describe "#parse" do - it "returns an empty array if the wrapped parser never succeeds" do - tokens = Tokens.from_string("bb") - result = p.parse(tokens) - - result.value.empty?.should be_true - result.tokens.should eq(tokens) - end - - it "stops parsing when the wrapped parser fails, returns all successes" do - tokens = Tokens.from_string("aaabcd") - result = p.parse(tokens) - - result.value.should eq("aaa".chars) - result.tokens.should eq(tokens[3..]) - - tokens = Tokens.from_string("aaa") - result = p.parse(tokens) - - result.value.should eq("aaa".chars) - result.tokens.should eq(tokens[3..]) - end - - it "stops parsing when the wapped parser succeeds without consuming any input" do - a_optional : Parser(Char, Char?) - a_optional = Optional.new(Token.new('a')) - tokens = Tokens.from_string("aaa") - result = Many(Char, Char?).new(a_optional).parse(tokens) - - result.value.should eq("aaa".chars) - result.tokens.should eq(tokens[3..]) - end - end -end - -describe Some do - p = Some.new(Token.new('a')) - describe "#parse" do - it "fails if the wrapped parser never succeeds" do - tokens = Tokens.from_string("") - expect_raises(ParserFail) { p.parse(tokens) } - end - - it "stops parsing when the wrapped parser fails, returns all successes" do - tokens = Tokens.from_string("aaabcd") - result = p.parse(tokens) - - result.value.should eq("aaa".chars) - result.tokens.should eq(tokens[3..]) - - tokens = Tokens.from_string("aaa") - result = p.parse(tokens) - - result.value.should eq("aaa".chars) - result.tokens.should eq(tokens[3..]) - end - end -end - -describe Exactly do - letter_a = Token.new('a') - tokens = Tokens.from_string("aaabcd") - - describe "#parse" do - it "tries to parse exactly n of the wrapper parser" do - p = Exactly.new(3, letter_a) - result = p.parse(tokens) - - result.value.should eq("aaa".chars) - result.tokens.should eq(tokens[3..]) - end - - it "always succeeds with an empty array if n < 1" do - p = Exactly.new(0, letter_a) - result = p.parse(tokens) - - result.value.empty?.should be_true - result.tokens.should eq(tokens) - - p = Exactly.new(-42, letter_a) - result = p.parse(tokens) - - result.value.empty?.should be_true - result.tokens.should eq(tokens) - end - - it "does not take extra matching tokens" do - p = Exactly.new(2, letter_a) - result = p.parse(tokens) - - result.value.should eq("aa".chars) - result.tokens.should eq(tokens[2..]) - end - - it "fails if there are not enough matching tokens" do - p = Exactly.new(60, letter_a) - expect_raises(ParserFail) { p.parse(tokens) } - end - end -end - -describe AtLeast do - letter_a = Token.new('a') - tokens = Tokens.from_string("aaaabcd") - - describe "#parse" do - it "fails if there are not enough matching tokens to parse" do - p = AtLeast.new(5, letter_a) - expect_raises(ParserFail) { p.parse(tokens) } - #expect_raises(ParserFail) { raise ParserFail.new("sdgseg") } - end - - it "parses n or more times with the given parser" do - p0 = AtLeast.new(0, letter_a) - p2 = AtLeast.new(2, letter_a) - p4 = AtLeast.new(4, letter_a) - - result0 = p0.parse(tokens) - result2 = p2.parse(tokens) - result4 = p4.parse(tokens) - - result0.value.should eq("aaaa".chars) - result0.tokens.should eq(tokens[4..]) - - result2.should eq(result0) - result4.should eq(result0) - end - end -end - -describe AtMost do - letter_a = Token.new('a') - tokens = Tokens.from_string("aaaabcd") - - describe "#parse" do - it "does not parse more than n times" do - p0 = AtMost.new(0, letter_a) - p2 = AtMost.new(2, letter_a) - p6 = AtMost.new(6, letter_a) - - r0 = p0.parse(tokens) - r0.value.empty?.should be_true - r0.tokens.should eq(tokens) - - r2 = p2.parse(tokens) - r2.value.should eq("aa".chars) - r2.tokens.should eq(tokens[2..]) - - r6 = p6.parse(tokens) - r6.value.should eq("aaaa".chars) - r6.tokens.should eq(tokens[4..]) - end - end -end - -describe Between do - letter_a = Token.new('a') - tokens = Tokens.from_string("aaaabcd") - - describe "#parse" do - it "parses at least i times, up to a limit of j times" do - p0_4 = Between.new(0, 4, letter_a) - r0_4 = p0_4.parse(tokens) - - r0_4.value.should eq("aaaa".chars) - r0_4.tokens.should eq(tokens[4..]) - end - - it "fails if there are not enough parser successes" do - p = Between.new(5, 6, letter_a) - expect_raises(ParserFail) { p.parse(tokens) } - end - end -end - -describe FirstOf do - tokens = Tokens.from_string("abcd") - letter_a = Token.new('a') - f = Flunk(Char, Char).new - - describe "#parse" do - it "cannot be instantiated with an empty Enumerable" do - expect_raises(ArgumentError) { FirstOf.new([] of Parser(Char, Char)) } - end - - it "uses the result of the first successful parser" do - a1 = [letter_a, f, f, f] of Parser(Char, Char) - a2 = [f, letter_a, f, f] of Parser(Char, Char) - a3 = [f, f, letter_a, f] of Parser(Char, Char) - a4 = [f, f, f, letter_a] of Parser(Char, Char) - - [a1, a2, a3, a4].each do |arr| - p = FirstOf.new(arr) - r = p.parse(tokens) - r.value.should eq('a') - r.tokens.should eq(tokens[1..]) - end - end - - it "only fails if no parsers are successful" do - x = Token.new('x') - y = Token.new('x') - z = Token.new('x') - p = FirstOf.new([x, y, z] of Parser(Char, Char)) - expect_raises(ParserFail) { p.parse(tokens) } - end - end -end - -describe SepBy do - describe "#parse" do - letter_a = Token.new('a') - comma = Token.new(',') - tokens = Tokens.from_string("a,a,a,a") - - it "fails if no elements can be parsed" do - p = SepBy(Char, Char, Char).new(comma, comma) - expect_raises(ParserFail) { p.parse(tokens) } - end - - it "succeeds if only one element can be parsed" do - t1 = Tokens.from_string("a") - t2 = Tokens.from_string("a,") - p = SepBy(Char, Char, Char).new(letter_a, comma) - - result = p.parse(t1) - result.value.should eq(['a']) - result.tokens.should eq(t1[1..]) - - result = p.parse(t2) - result.value.should eq(['a']) - result.tokens.should eq(t2[1..]) - end - - it "parses 1 element, then 0 or more (sep >> element)" do - p = SepBy(Char, Char, Char).new(letter_a, comma) - - result = p.parse(tokens) - result.value.should eq("aaaa".chars) - result.tokens.empty?.should be_true - - # drop last char in tokens, should parse three elements - result = p.parse(tokens[..5]) - result.value.should eq("aaa".chars) - result.tokens.should eq(Tokens.from_string(",")) - end - end -end - -describe "Practical use" do - describe "Use case: text surrounded by whitespace" do - space = Satisfy(Char).new { |c| c.whitespace? } - non_space = Satisfy(Char).new { |c| !c.whitespace? } - - # TODO: Figure out why mapping on this parser breaks - # initialization of `body`. - word_chars = Some.new(non_space) - ws = Some.new(space) - - bookend = Optional.new(ws) - body = SepBy.new(word_chars, ws) - tokenizer = (bookend >> body << bookend).map do |arrs| - arrs.map { |chars| chars.join } - end - - good_strings = [ - "test with no trailing whitespace", - " test with whitespace in the front", - "test with whitespace in the back", - " test surrounded by whitespace ", - ] - - good_strings.each do |s| - tokens = Tokens.from_string(s) - result = tokenizer.parse(tokens) - result.value.should eq(s.strip.split(/\s+/)) - result.tokens.empty?.should be_true - end - - bad_strings = [ - "", - " ", - ] - - bad_strings.each do |s| - tokens = Tokens.from_string(s) - expect_raises(ParserFail) { tokenizer.parse(tokens) } - end - end +pending State do end diff --git a/src/__OLD_parcom.cr b/src/__OLD_parcom.cr new file mode 100644 index 0000000..ddb2e50 --- /dev/null +++ b/src/__OLD_parcom.cr @@ -0,0 +1,84 @@ +require "./parcom/*" + +module Parcom + VERSION = "0.1.0" + + # A ParserFail exception should be raised by `Parser#parse` when + # a parse attempt is unsuccessful. + # Raising this exception in the `#parse` method of a Parser "Foo" + # usually follows this pattern to allow for error tracing: + # + # ``` + # class Foo(T, V) < Parser(T, V) + # def parse(tokens : Tokens(T)) : Result(T, V) + # helper.parse(tokens) + # rescue ex : ParserFail + # raise ParserFail.new("Foo: #{ex.message}") + # end + # ``` + class ParserFail < Exception + end + + # `Tokens` is an `Array` wrapper struct to store the input + # stream of one or more `Parser` objects. + # A `Tokens` can be created from any `Iterable`, along with + # `String` objects using a special constructor. + struct Tokens(T) + getter tokens + + # Constructs a `Tokens(Char)` from a `String`. + def self.from_string(s : String) : Tokens(Char) + Tokens.new(s.chars) + end + + # Constructs a `Tokens` from an `Iterable`. + def initialize(ts : Iterable(T)) + if ts.responds_to?(:to_a) + @tokens = ts.to_a + else + @tokens = [] of T + ts.each { |t| @tokens << t } + end + end + + # Exposes `Array#[](Int)`. + def [](index : Int) : T + @tokens[index] + end + + # Exposes `Array#[](Int, Int)`, but wraps the returned array in a new `Tokens`. + def [](start : Int, count : Int) : Tokens(T) + Tokens.new(@tokens[start, count]) + end + + # Exposes `Array#[](Range)`, but wraps the returned array in a new `Tokens`. + def [](range : Range) : Tokens(T) + Tokens.new(@tokens[range]) + end + + # Like `#[]`, but returns `nil` instead of raising an `IndexError`. + def []?(*args) + self.[](*args) + rescue IndexError + nil + end + + # Exposes `Array#empty?`. + def empty? : Bool + @tokens.empty? + end + end + + # A `Result` stores a `Tokens` object and a parsed value, + # and is effectively used to store the state of a parser chain. + # This is used instead of a `Tuple` or `NamedTuple` because: + # 1. This is more idiomatic than a `Tuple`. + # 2. Crystal does not support generic named tuples. + struct Result(T, V) + getter tokens, value + + def initialize(@tokens : Tokens(T), @value : V) + end + end +end + diff --git a/src/__OLD_parcom/alt.cr b/src/__OLD_parcom/alt.cr new file mode 100644 index 0000000..dedd41d --- /dev/null +++ b/src/__OLD_parcom/alt.cr @@ -0,0 +1,28 @@ +require "./parser.cr" + +module Parcom + # `Alt` is a `Parser` that accepts two other parsers and tries + # to parse with one of them. + # If the first (left) parser succeeds, its result is returned. + # If the first parser fails, it will try the second (right) parser. + class Alt(T, V) < Parser(T, V) + # Accepts the two parsers to try to parse with. + def initialize(@p1 : Parser(T, V), @p2 : Parser(T, V)) + end + + # Tries to parse using both parsers. + # + # It will initially try to parse with the first parser. + # If it fails, it will try to parse with the second parser. + def parse(tokens : Tokens(T)) : Result(T, V) + @p1.parse(tokens) + rescue ex1 : ParserFail + begin + @p2.parse(tokens) + rescue ex2 : ParserFail + raise ParserFail.new("Alt (#{ex1.message}), (#{ex2.message})") + end + end + end +end + diff --git a/src/__OLD_parcom/any_token.cr b/src/__OLD_parcom/any_token.cr new file mode 100644 index 0000000..1f65bfc --- /dev/null +++ b/src/__OLD_parcom/any_token.cr @@ -0,0 +1,18 @@ +require "./parser.cr" + +module Parcom + # `AnyToken` is a `Parser` that consumes exactly one token from + # the input stream and returns it. + # It fails if the input stream is empty. + class AnyToken(T) < Parser(T, T) + # Parses the first token in the input, fails if the input is empty. + def parse(tokens : Tokens(T)) : Result(T, T) + if tokens.empty? + raise ParserFail.new("AnyToken: input was empty") + else + Result.new(tokens[1..], tokens[0]) + end + end + end +end + diff --git a/src/__OLD_parcom/assert.cr b/src/__OLD_parcom/assert.cr new file mode 100644 index 0000000..5a3e621 --- /dev/null +++ b/src/__OLD_parcom/assert.cr @@ -0,0 +1,45 @@ +require "./parser.cr" + +module Parcom + # `Assert` is a `Parser` that runs another `Parser` and then + # performs a test on its result. The parser will then either fail if + # the result does not pass the test, or forward it on if it does. + # + # Example: + # ``` + # letter = Assert.new(AnyToken(Char).new) { |x| x.letter? } + # non_letter = Assert.new(AnyToken(Char).new) { |x| !x.letter? } + # input = Tokens.from_string("hi") + # letter.parse(input) # this succeeds + # non_letter.parse(input) # this fails + # ``` + # + # `Assert` instance can also be created by calling + # `Parser#assert` on any parser: + # ``` + # # This parser is identical to the above example. + # letter = AnyToken.new(Char).assert { |x| x.letter? } + # ``` + class Assert(T, V) < Parser(T, V) + # Accepts the parser to run and a `Bool`-retuning block + # containing the test to perform on the other parser's result. + def initialize(@p : Parser(T, V), &block : V -> Bool) + @f = block + end + + # Runs the parser it was initialized with, and returns that parser's + # result if it passes the provided test. Raises `ParserFail` otherwise. + def parse(tokens : Tokens(T)) : Result(T, V) + result = @p.parse(tokens) + rescue ex : ParserFail + raise ParserFail.new("Assert (pre-assertion): #{ex.message}") + else + unless @f.call(result.value) + raise ParserFail.new("Assert: predicate failed for <#{result.value}>") + end + + return result + end + end +end + diff --git a/src/__OLD_parcom/at_least.cr b/src/__OLD_parcom/at_least.cr new file mode 100644 index 0000000..2fb8dcf --- /dev/null +++ b/src/__OLD_parcom/at_least.cr @@ -0,0 +1,29 @@ +require "./parser.cr" +require "./map.cr" +require "./exactly.cr" + +module Parcom + # `AtLeast` is a `Parser` that tries to parser with another parser + # a specific number of times. The results of each successful parse + # are returned in an array. If the number of successes is less than + # the specified number, the parser fails. + class AtLeast(T, V) < Parser(T, Array(V)) + @p : Map(T, {Array(V), Array(V)}, Array(V)) + + # Accepts the number of parsing attempts, and the parser to use. + # If a negative int is given, it is treated as if it were 0. + def initialize(i : Int, p : Parser(T, V)) + @p = (Exactly.new(i, p) + Many.new(p)).map do |tup| + tup[0] + tup[1] + end + end + + # Tries to parse the given number of times, or more. + def parse(tokens : Tokens(T)) : Result(T, Array(V)) + @p.parse(tokens) + rescue ex : ParserFail + raise ParserFail.new("AtLeast: #{ex.message}") + end + end +end + diff --git a/src/__OLD_parcom/at_most.cr b/src/__OLD_parcom/at_most.cr new file mode 100644 index 0000000..b70164f --- /dev/null +++ b/src/__OLD_parcom/at_most.cr @@ -0,0 +1,29 @@ +require "./parser.cr" +require "./map.cr" +require "./exactly.cr" +require "./optional.cr" + +module Parcom + # `AtMost` is a `Parser` that tries to parse with another parser a specific + # number of times. The results of each successful parse are returned in an + # array. If the specific number of parses succeed, the parsing stops, even + # if it is possible to keep parsing. If the parser is never successful, it + # succeeds with an empty array. + class AtMost(T, V) < Parser(T, Array(V)) + @p : Map(T, Array(V?), Array(V)) + + # Accepts the number of parsing attempts, and the parser to use. + def initialize(i : Int, p : Parser(T, V)) + @p = Exactly.new(i, Optional.new(p)).map(&.compact) + end + + # Tries to parse up to the given number of times. + # If the parser never succeeds, returns an empty array. + def parse(tokens : Tokens(T)) : Result(T, Array(V)) + @p.parse(tokens) + rescue ex : ParserFail + raise ParserFail.new("AtMost: #{ex.message}") + end + end +end + diff --git a/src/__OLD_parcom/between.cr b/src/__OLD_parcom/between.cr new file mode 100644 index 0000000..05519e4 --- /dev/null +++ b/src/__OLD_parcom/between.cr @@ -0,0 +1,37 @@ +require "./parser.cr" +require "./map.cr" +require "./exactly.cr" +require "./at_most.cr" + +module Parcom + # `Between` is a `Parser` that tries to parse with another parser a number + # of times within a specified range. The results of each successful parse + # are returned in an array. If the number of successful parses is out of + # the lower bound of the range, the parser fails. If the number of successful + # parses reaches thhe upper bound of the range, the parsing stops, even if it + # is possible to keep parsing. + class Between(T, V) < Parser(T, Array(V)) + @p : Map(T, {Array(V), Array(V)}, Array(V)) + + # Accepts the lower and upper bounds for the number of parsing attempts, + # as well as the parser to use. This method works the same way whether or + # not the larger value is passed first or second. If a negative int value + # is given for either parameter, it is treated as `0`. + # TODO: Overload to allow for Range objects. + def initialize(i : Int, j : Int, p : Parser(T, V)) + lower = i < j ? i : j + upper = (i - j).abs + @p = (Exactly.new(lower, p) + AtMost.new(upper, p)).map do |tup| + tup[0] + tup[1] + end + end + + # Tries to parse a numebr of times within the given range. + def parse(tokens : Tokens(T)) : Result(T, Array(V)) + @p.parse(tokens) + rescue ex : ParserFail + raise ParserFail.new("Between: #{ex.message}") + end + end +end + diff --git a/src/__OLD_parcom/eof.cr b/src/__OLD_parcom/eof.cr new file mode 100644 index 0000000..650da56 --- /dev/null +++ b/src/__OLD_parcom/eof.cr @@ -0,0 +1,23 @@ +require "./parser.cr" + +module Parcom + # `EOF` is a `Parser` succeeds if the input stream is empty. + # + # This parser retuns `nil` when successful because there is no + # other meaningful value to return. + # + # This parser is also one of the few cases where it is ideal to not + # modify or take from the input stream, as it should be empty anyway. + class EOF(T) < Parser(T, Nil) + # Succeeds is the input stream is empty and returns `nil`. + # Raises a `ParserFail` exception when the input is not empty. + def parse(tokens : Tokens(T)) : Result(T, Nil) + if tokens.empty? + Result.new(tokens, nil) + else + raise ParserFail.new("Eof: input was not empty") + end + end + end +end + diff --git a/src/__OLD_parcom/exactly.cr b/src/__OLD_parcom/exactly.cr new file mode 100644 index 0000000..a34fd0d --- /dev/null +++ b/src/__OLD_parcom/exactly.cr @@ -0,0 +1,28 @@ +require "./parser.cr" + +module Parcom + # `Exactly` is a `Parser` that tries to parse with another parser + # a specific number of times. The results of each successful parse + # are returned in an array. If the number of successes is less than + # OR greater than the specified number, the parser fails. + class Exactly(T, V) < Parser(T, Array(V)) + @p : Sequence(T, V) + + # Accepts the number of parsing attempts, and the parser to use. + # If a negative int is given, it is treated as if it were `0`. + def initialize(i : Int, p : Parser(T, V)) + i = i.negative? ? 0 : i + # put `i` copies of `p` into an array + ps = ([p] of Parser(T, V)) * i + @p = Sequence.new(ps) + end + + # Tries to parse the given number of times. + def parse(tokens : Tokens(T)) : Result(T, Array(V)) + @p.parse(tokens) + rescue ex : ParserFail + raise ParserFail.new("Exactly: #{ex.message}") + end + end +end + diff --git a/src/__OLD_parcom/first_of.cr b/src/__OLD_parcom/first_of.cr new file mode 100644 index 0000000..c4077eb --- /dev/null +++ b/src/__OLD_parcom/first_of.cr @@ -0,0 +1,40 @@ +require "./parser.cr" + +module Parcom + # `FirstOf` is a `Parser` that accepts multiple parsers, and tries to parse + # with all of them, in order. As soon as one of the parsers succeeds, + # that parser's result is returned. If none of the parsers are successful, + # the parsing fails. + class FirstOf(T, V) < Parser(T, V) + @p : Parser(T, V) + + # Accepts the parsers to use. Raises an `ArgumentError` if + # no parsers are provided. + def initialize(ps : Iterable(Parser(T, V))) + ps_iter = ps.each + p = ps_iter.next + + if p.is_a?(Iterator::Stop) + raise ArgumentError.new("FirstOf requires atleast one parser, got none") + end + + # Combine all the parsers into one by wrapping them with `Alt`. + @p = p + + loop do + p = ps_iter.next + break if p.is_a?(Iterator::Stop) + @p = @p | p + end + end + + # Tries to parse with each of the given parsers. Either returns the first + # successful result or fails if no parsers succeed. + def parse(tokens : Tokens(T)) : Result(T, V) + @p.parse(tokens) + rescue ex : ParserFail + raise ParserFail.new("FirstOf: #{ex.message}") + end + end +end + diff --git a/src/__OLD_parcom/flunk.cr b/src/__OLD_parcom/flunk.cr new file mode 100644 index 0000000..00a0bd6 --- /dev/null +++ b/src/__OLD_parcom/flunk.cr @@ -0,0 +1,12 @@ +require "./parser.cr" + +module Parcom + # `Flunk` is a `Parser` that always fails. + class Flunk(T, V) < Parser(T, V) + # Always raises `ParserFail` + def parse(tokens) : Result(T, V) + raise ParserFail.new("Flunk: parsing failed") + end + end +end + diff --git a/src/__OLD_parcom/left.cr b/src/__OLD_parcom/left.cr new file mode 100644 index 0000000..201f497 --- /dev/null +++ b/src/__OLD_parcom/left.cr @@ -0,0 +1,26 @@ +require "./parser.cr" +require "./map.cr" + +module Parcom + # `Left` is a `Parser` that tries to parse with two different + # parsers in succession and fails if either of the two parsers fails. + # This parser behaves the same as `Plus`, but only returns the result + # of the first parser. + class Left(T, V, U) < Parser(T, V) + @p : Map(T, {V, U}, V) + + # Accepts the two parsers to use, in order. + def initialize(p1 : Parser(T, V), p2 : Parser(T, U)) + @p = (p1 + p2).map(&.first) + end + + # Tries to parse with the two given parsers, and returns the + # result of the first parser if they both succeed. + def parse(tokens : Tokens(T)) : Result(T, V) + @p.parse(tokens) + rescue ex : ParserFail + raise ParserFail.new("Left: #{ex.message}") + end + end +end + diff --git a/src/__OLD_parcom/many.cr b/src/__OLD_parcom/many.cr new file mode 100644 index 0000000..a734c63 --- /dev/null +++ b/src/__OLD_parcom/many.cr @@ -0,0 +1,36 @@ +require "./parser.cr" + +module Parcom + # `Many` is a `Parser` that repeatedly tries to parse with another parser. + # The `Many` object will collect all success values in an array, and return + # them with the final state of the input stream. If the wrapped parser ever + # fails or succeeds without parsing any input, the `Many` object will stop + # attempting to parse will will return the array of prrevious successes. + # + # `Many` will return an empty array if the parser never succeeds or consumes + # input. For cases where at least one parserr success is needed, use `Some`. + class Many(T, V) < Parser(T, Array(V)) + # Accepts the parser to use. + def initialize(@p : Parser(T, V)) + end + + # Continuously parses with the wrapped parser, returns all successes. + # Parsing stops when the parser fails, or succeeds without consuming input. + def parse(tokens : Tokens(T)) : Result(T, Array(V)) + parsed = [] of V + + loop do + result = @p.parse?(tokens) + break unless !result.nil? && result.tokens != tokens + + parsed << result.value + tokens = result.tokens + end + + Result.new(tokens, parsed) + rescue ex : ParserFail + raise ParserFail.new("Many: #{ex.message}") + end + end +end + diff --git a/src/__OLD_parcom/map.cr b/src/__OLD_parcom/map.cr new file mode 100644 index 0000000..34961b5 --- /dev/null +++ b/src/__OLD_parcom/map.cr @@ -0,0 +1,30 @@ +require "./parser.cr" + +module Parcom + # `Map` is a `Parser` that tries to parse using another parser, + # and then changing the result of that parser to a different value. + # + # Example: + # ``` + # # Where `Digits` is some parser that returns an array of digit characters + # parse_i32 = Digits.new.map { |x| x.join.to_i32 } + # result = parse_i32.parse(Tokens.from_string("1234")) + # result.value # => 1234 (Int32) + # ``` + class Map(T, V, U) < Parser(T, U) + # Accepts the parser to use and the function to apply to the result. + def initialize(@p : Parser(T, V), &block : V -> U) + @f = block + end + + # Tries to parse with the given parser and applies the + # function to the result if successful. + def parse(tokens : Tokens(T)) : Result(T, U) + result = @p.parse(tokens) + Result.new(result.tokens, @f.call(result.value)) + rescue ex : ParserFail + raise ParserFail.new("Map: #{ex.message}") + end + end +end + diff --git a/src/__OLD_parcom/optional.cr b/src/__OLD_parcom/optional.cr new file mode 100644 index 0000000..b368d4e --- /dev/null +++ b/src/__OLD_parcom/optional.cr @@ -0,0 +1,40 @@ +require "./parser.cr" + +module Parcom + # `Optional` is a `Parser` that tries to parse with another parser, + # but does not fail the parser chain if the wrapped parser fails. + # If the wrapped parser fails, an `Optional` will not modify the input + # stream, and return `nil` instead. + # + # Example: + # ``` + # digit = Satisfy(Char).new { |c| c.digit? } + # digits = Many.new(digit) + # plus_sign = Optional.new(Token.new('+')) + # positive_int = plus_sign >> digits + # + # input1 = Tokens.from_string("+12345") + # input2 = Tokens.from_string("12345") + # + # # Both of these has the same result + # positive_int.parse(input1) + # positive_int.parse(input2) + # ``` + class Optional(T, V) < Parser(T, V?) + # Accepts the parser to use. + def initialize(@p : Parser(T, V)) + end + + # Tries to parse with the given parser, succeeds with `nil` + # instead of failing. This parser does not consume input if + # the wrapped parser fails. + def parse(tokens : Tokens(T)) : Result(T, V?) + r = @p.parse?(tokens) + + new_tokens = r.nil? ? tokens : r.tokens + new_value = r.nil? ? nil : r.value + Result.new(new_tokens, new_value) + end + end +end + diff --git a/src/__OLD_parcom/parser.cr b/src/__OLD_parcom/parser.cr new file mode 100644 index 0000000..ca350df --- /dev/null +++ b/src/__OLD_parcom/parser.cr @@ -0,0 +1,87 @@ +require "../parcom.cr" + +module Parcom + # `Parser` is the base class of all parser objects and contains + # a handful of helper methods that can allow for cleaner syntax. + # + # The generic type `T` represents the type of tokens a parser will + # operate on (usually `Char`). + # The generic type `V` represents the type of value that a parser + # will return when parsing is successful. + abstract class Parser(T, V) + # Accepts a `Tokens(T)` and either retuns a `Result(T, V)`, or raises a + # `ParserFail` exception with a message providing context of where + # in the chain the parsing failed. + # + # The `V` in the returned `Result` is arbitrary, but the `Tokens(T)` in + # the returned result MUST contain the parser's input, with some tokens + # removed from the front proprtional to what was parsed. + # + # Example: + # ``` + # hello = ParsesHello.new # inherits from Parser(Char, String) + # input = Tokens.from_string("Hello world!") + # + # result = hello.parse(input) + # result.value # => "Hello" + # result.tokens # => [' ', 'w', 'o', 'r', 'l', 'd', '!'] + # + # bad_input = Tokens.from_string("Good evening world!") + # hello.parse(bad_input) # raises ParserFail + # ``` + abstract def parse(tokens : Tokens(T)) : Result(T, V) + + # Same as `#parse`, but returns `nil` instead of raising `ParserFail`. + def parse?(tokens : Tokens(T)) : Result(T, V)? + self.parse(tokens) + rescue ParserFail + return nil + end + + # Constructs an `Alt` parser with `self` and `other`. + def |(other : Parser(T, V)) : Alt(T, V) + Alt.new(self, other) + end + + # Constructs a `Plus` parser with `self` and `other`. + def +(other : Parser(T, U)) : Plus(T, V, U) forall U + Plus.new(self, other) + end + + # Constructs a `Left` parser with `self` and `other`. + def <<(other : Parser(T, U)) : Left(T, V, U) forall U + Left.new(self, other) + end + + # Constructs a `Right` parser with `self` and `other`. + def >>(other : Parser(T, U)) : Right(T, V, U) forall U + Right.new(self, other) + end + + # Constructs an `Assert` parser with `self` and the given block. + def assert(&block : V -> Bool) : Assert(T, V) + Assert.new(self, &block) + end + + # Constructs an `Assert` parser with `self` and the given proc. + def assert(f : V -> Bool) : Assert(T, V) + Assert.new(self, &f) + end + + # Constructs a `Map` parser with `self` and the given block. + def map(&block : V -> U) : Map(T, V, U) forall U + Map.new(self, &block) + end + + # Constructs a `Map` parser with `self` and the given proc. + def map(f : V -> U) : Map(T, V, U) forall U + Map.new(self, &f) + end + + # Constructs a `Recover` parser with `self` and the given `V`. + def recover(default : V) : Recover(T, V) + Recover.new(self, default) + end + end +end + diff --git a/src/__OLD_parcom/peek.cr b/src/__OLD_parcom/peek.cr new file mode 100644 index 0000000..2b6f657 --- /dev/null +++ b/src/__OLD_parcom/peek.cr @@ -0,0 +1,23 @@ +require "./parser.cr" + +module Parcom + # `Peek` is a `Parser` that runs another `Parser` while + # leaving the input stream unmodified. + class Peek(T, V) < Parser(T, V) + # Accepts the parser to run. + def initialize(@p : Parser(T, V)) + end + + # Runs the parser it was initialized with, and returns + # that parser's result along with the original input. + # + # Re-raises a `ParserFail` exception if the other parser fails. + def parse(tokens : Tokens(T)) : Result(T, V) + result = @p.parse(tokens) + Result.new(tokens, result.value) + rescue ex : ParserFail + raise ParserFail.new("Peek: #{ex.message}") + end + end +end + diff --git a/src/__OLD_parcom/phrase.cr b/src/__OLD_parcom/phrase.cr new file mode 100644 index 0000000..1996fd4 --- /dev/null +++ b/src/__OLD_parcom/phrase.cr @@ -0,0 +1,33 @@ +require "./parser.cr" +require "./eof.cr" +require "./map.cr" + +module Parcom + # `Phrase` is a `Parser` that tries to parse with another parser, + # but will fail if any of the input has not been consumed. + # + # Example: + # ``` + # letter_a = Token.new('a') + # tokens = Tokens.from_string("aaa") + # one_a = Phrase(Char, Char).new(letter_a) + # result = one_a.parse(tokens) # This fails + # ``` + class Phrase(T, V) < Parser(T, V) + @p : Map(T, {V, Nil}, V) + + # Accepts the parser to parse with. + def initialize(p : Parser(T, V)) + @p = (p + EOF(T).new).map &.first + end + + # Tries to parse with the given parser, fails if there + # is any un-parsed input remaining. + def parse(tokens : Tokens(T)) : Result(T, V) + @p.parse(tokens) + rescue ex : ParserFail + raise ParserFail.new("Phrase: #{ex.message}") + end + end +end + diff --git a/src/__OLD_parcom/plus.cr b/src/__OLD_parcom/plus.cr new file mode 100644 index 0000000..53c9b4f --- /dev/null +++ b/src/__OLD_parcom/plus.cr @@ -0,0 +1,54 @@ +require "./parser.cr" + +module Parcom + # `Plus` is a parser that tries to parse with two different + # parsers in succession and fails if either of the two parsers fails. + # The return type of this parser is `{V, U}`, where `V` is the return + # type of the first parser and `U` is the return type of the second. + # + # Example: + # ``` + # parse1 = Token.new('1') + # parse2 = Token.new('2') + # tokens = Tokens.from_string("12") + # result = (parse1 + parse2).parse(tokens) # succeeds + # result = (parse2 + parse1).parse(tokens) # fails, wrong order + # ``` + # + # `Plus` parsers can be chained together, but the resulting return type + # can become unweildly with too many combined parsers: + # ``` + # letter_a = Token.new('a') + # a5 = letter_a + letter_a + letter_a + letter_a + letter_a + # tokens = Tokens.from_string("aaaaa") + # a5.parse(tokens) # succeeds + # # `a5` has a return type of { { { {Char, Char}, Char}, Char}, Char} + # ``` + # + # If you need to parse more than two things in this manner, + # consider using `Many`, `Some`, `Sequence`, or `TokenSeq` instead. + class Plus(T, V, U) < Parser(T, {V, U}) + # Accepts the two parsers to use, in order. + def initialize(@p1 : Parser(T, V), @p2 : Parser(T, U)) + end + + # Tries to parse with the two given parsers, and returns + # their results in a tuple if the both succeed. + def parse(tokens : Tokens(T)) : Result(T, {V, U}) + begin + r1 = @p1.parse(tokens) + rescue ex : ParserFail + raise ParserFail.new("Plus (left): #{ex.message}") + end + + begin + r2 = @p2.parse(r1.tokens) + rescue ex : ParserFail + raise ParserFail.new("Plus (right): #{ex.message}") + end + + Result.new(r2.tokens, {r1.value, r2.value}) + end + end +end + diff --git a/src/__OLD_parcom/recover.cr b/src/__OLD_parcom/recover.cr new file mode 100644 index 0000000..378f8d6 --- /dev/null +++ b/src/__OLD_parcom/recover.cr @@ -0,0 +1,27 @@ +require "./parser.cr" +require "./optional.cr" +require "./map.cr" + +module Parcom + # `Recover` is a `Parser` that tries to parse with another parser, + # but does not fail the parser chain if the wrapped parser fails. + # If the wrapped parser fails, a `Recover` will not modify the input + # stream, and return an arbitrary value of type `V` instead. + class Recover(T, V) < Parser(T, V) + @p : Map(T, V?, V) + + # Accepts the parser to use, and the default value + # to return if the parser fails. + def initialize(p : Parser(T, V), default : V) + @p = Optional.new(p).map { |x| x.nil? ? default : x } + end + + # Tries to parse with the given parser, succeeds with the + # default value instead of failing. This parser does not + # consume input if the wrapped parser fails. + def parse(tokens : Tokens(T)) : Result(T, V) + @p.parse(tokens) + end + end +end + diff --git a/src/__OLD_parcom/right.cr b/src/__OLD_parcom/right.cr new file mode 100644 index 0000000..a0489b1 --- /dev/null +++ b/src/__OLD_parcom/right.cr @@ -0,0 +1,26 @@ +require "./parser.cr" +require "./map.cr" + +module Parcom + # `Right` is a `Parser` that tries to parse with two different + # parsers in succession and fails if either of the two parsers fails. + # This parser behaves the same as `Plus`, but only returns the result + # of the second parser. + class Right(T, V, U) < Parser(T, U) + @p : Map(T, {V, U}, U) + + # Accepts the two parsers to use, in order. + def initialize(p1 : Parser(T, V), p2 : Parser(T, U)) + @p = (p1 + p2).map(&.last) + end + + # Tries to parse with the two given parsers, and returns the + # result of the second parser if they both succeed. + def parse(tokens : Tokens(T)) : Result(T, U) + @p.parse(tokens) + rescue ex : ParserFail + raise ParserFail.new("Left: #{ex.message}") + end + end +end + diff --git a/src/__OLD_parcom/satisfy.cr b/src/__OLD_parcom/satisfy.cr new file mode 100644 index 0000000..9734635 --- /dev/null +++ b/src/__OLD_parcom/satisfy.cr @@ -0,0 +1,33 @@ +require "./parser.cr" +require "./any_token.cr" +require "./assert.cr" + +module Parcom + # `Satisfy` is a `Parser` that parses a single token + # if that token passes a predefined test, similar + # to `Assert`. This class is effectively a shorthand + # for the following: + # ``` + # # These parsers are equivalent. + # letter_assert = Assert.new(AnyToken(Char).new) { |x| x.letter? } + # letter_satisfy = Satisfy(Char).new { |x| x.letter? } + # ``` + class Satisfy(T) < Parser(T, T) + @p : Assert(T, T) + + # Accepts the `Bool`-returning block containing the test + # to run on the parsed token. + def initialize(&block : T -> Bool) + @p = AnyToken(T).new.assert(&block) + end + + # Returns the first token of the input if that + # token passes the test. + def parse(tokens : Tokens(T)) : Result(T, T) + @p.parse(tokens) + rescue ex : ParserFail + raise ParserFail.new("Satisfy: #{ex.message}") + end + end +end + diff --git a/src/__OLD_parcom/sep_by.cr b/src/__OLD_parcom/sep_by.cr new file mode 100644 index 0000000..fa19027 --- /dev/null +++ b/src/__OLD_parcom/sep_by.cr @@ -0,0 +1,38 @@ +require "./parser.cr" +require "./map.cr" +require "./many.cr" + +module Parcom + # `SepBy` is a `Parser` that tries to parse one or more times with one + # parser, alternating with a second parser. + # + # Example: + # ``` + # letter_a = Token.new('a') + # letter_b = Token.new('b') + # p = SepBy(Char, Car, Char).new(letter_a, letter_b) + # tokens = Tokens.from_string("ababababa") + # + # result = p.parse(tokens) + # puts result.value # => ['a', 'a', 'a', 'a', 'a'] + # ``` + class SepBy(T, V, U) < Parser(T, Array(V)) + @p : Map(T, {V, Array(V)}, Array(V)) + + # Accepts the parser that parses the result values, and the + # parser that parses the sepatators. + def initialize(elem : Parser(T, V), sep : Parser(T, U)) + @p = (elem + Many(T, U).new(sep >> elem)).map do |tup| + [tup[0]] + tup[1] + end + end + + # Tries to parse, alternating the first and second parsers. + def parse(tokens : Tokens(T)) : Result(T, Array(V)) + @p.parse(tokens) + rescue ex : ParserFail + raise ParserFail.new("SepBy: #{ex.message}") + end + end +end + diff --git a/src/__OLD_parcom/sequence.cr b/src/__OLD_parcom/sequence.cr new file mode 100644 index 0000000..6a05cde --- /dev/null +++ b/src/__OLD_parcom/sequence.cr @@ -0,0 +1,33 @@ +require "./parser.cr" + +module Parcom + # `Sequence` is a `Parser` that combines multiple parsers and + # tries to parse all of them in succession. If all of the parsers + # succeed, the values parsed are returned in an array, in the order + # they were parsed in. If any of the parsers fail, + # the `Sequence` also fails. + class Sequence(T, V) < Parser(T, Array(V)) + # Accepts the parsers to use. + def initialize(@ps : Iterable(Parser(T, V))) + end + + # Tries each parser in order, and returns their results. + # Fail if any of the wrapped parsers fail. + # TODO: this can probably be optimised more for Arrays + # TODO: might be better to use #zip + def parse(tokens : Tokens(T)) : Result(T, Array(V)) + parsed = [] of V + + @ps.each do |p| + r = p.parse(tokens) + parsed << r.value + tokens = r.tokens + end + + Result.new(tokens, parsed) + rescue ex : ParserFail + raise ParserFail.new("Sequence: #{ex.message}") + end + end +end + diff --git a/src/__OLD_parcom/some.cr b/src/__OLD_parcom/some.cr new file mode 100644 index 0000000..a2e3563 --- /dev/null +++ b/src/__OLD_parcom/some.cr @@ -0,0 +1,33 @@ +require "./parser.cr" +require "./assert.cr" +require "./many.cr" + +module Parcom + # `Some` is a `Parser` that repeatedly tries to parse with another parser. + # The `Some` object will collect all success values in an array, and + # return them with the final state of the input stream. If the wrapped + # parser ever fails or succeeds without parsing any input, the `Some` + # object will stop attempting to parse and will return the array of + # previous successes. + # + # `Some` will raise a `ParserFail` exception if the parser never succeeds + # or consumes input. For cases where parsing should allow for 0 successes, + # use `Many`. + class Some(T, V) < Parser(T, Array(V)) + @p : Assert(T, Array(V)) + + # Accepts the parser to use. + def initialize(p : Parser(T, V)) + @p = Many.new(p).assert { |arr| !arr.empty? } + end + + # Continuously parses with the wrapped parser, returns all successes. + # Fails if there are no successes. + def parse(tokens : Tokens(T)) : Result(T, Array(V)) + @p.parse(tokens) + rescue ex : ParserFail + raise ParserFail.new("Some: #{ex.message}") + end + end +end + diff --git a/src/__OLD_parcom/token.cr b/src/__OLD_parcom/token.cr new file mode 100644 index 0000000..b4e1fef --- /dev/null +++ b/src/__OLD_parcom/token.cr @@ -0,0 +1,20 @@ +require "./satisfy" + +module Parcom + # `Token` is a `Parser` that tries to parse a specific token + # from the input stream. + class Token(T) < Parser(T, T) + # Accepts the token that this parser will try to parse. + def initialize(@expected : T) + @p = Satisfy(T).new { |x| x == @expected } + end + + # Tries to parse the specified token from the input stream. + def parse(tokens : Tokens(T)) : Result(T, T) + @p.parse(tokens) + rescue ex : ParserFail + raise ParserFail.new("Token <#{@expected}>: #{ex.message}") + end + end +end + diff --git a/src/__OLD_parcom/token_seq.cr b/src/__OLD_parcom/token_seq.cr new file mode 100644 index 0000000..45900f9 --- /dev/null +++ b/src/__OLD_parcom/token_seq.cr @@ -0,0 +1,38 @@ +require "./parser.cr" +require "./sequence.cr" + +module Parcom + # `TokenSeq` is a `Parser` that attempts to parse a specific + # string of tokens. If the expected tokens are not at the start + # of the input stream, the parser fails. + # + # Example: + # ``` + # parse_test = TokenSeq(Char).new("test".chars) + # tokens = Tokens.from_string("testing") + # + # result = parse_test.parse(tokens) + # + # puts result.value # => ['t', 'e', 's', 't'] + # puts result.tokens # => ['i', 'n', 'g'] + # ``` + class TokenSeq(T) < Parser(T, Array(T)) + @p : Sequence(T, T) + + # Accepts the tokens to try and parse, in order. + def initialize(expected : Iterable(T)) + ps = [] of Parser(T, T) + expected.each { |t| ps << Token.new(t) } + + @p = Sequence.new(ps) + end + + # Tries to parse the list of tokens. + def parse(tokens : Tokens(T)) : Result(T, Array(T)) + @p.parse(tokens) + rescue ex : ParserFail + raise ParserFail.new("TokenSeq: #{ex.message}") + end + end +end + diff --git a/src/parcom.cr b/src/parcom.cr index ddb2e50..d12b011 100644 --- a/src/parcom.cr +++ b/src/parcom.cr @@ -5,17 +5,6 @@ module Parcom # A ParserFail exception should be raised by `Parser#parse` when # a parse attempt is unsuccessful. - # Raising this exception in the `#parse` method of a Parser "Foo" - # usually follows this pattern to allow for error tracing: - # - # ``` - # class Foo(T, V) < Parser(T, V) - # def parse(tokens : Tokens(T)) : Result(T, V) - # helper.parse(tokens) - # rescue ex : ParserFail - # raise ParserFail.new("Foo: #{ex.message}") - # end - # ``` class ParserFail < Exception end @@ -74,10 +63,51 @@ module Parcom # This is used instead of a `Tuple` or `NamedTuple` because: # 1. This is more idiomatic than a `Tuple`. # 2. Crystal does not support generic named tuples. - struct Result(T, V) + struct Result(T, U) getter tokens, value - def initialize(@tokens : Tokens(T), @value : V) + def initialize(@tokens : Tokens(T), @value : U) + end + end + + class Parser(T, U) + getter name + + def initialize(@name : String, @f : Tokens(T) -> Result(T, U)) + end + + def initialize(@name : String, &block : Tokens(T) -> Result(T, U)) + @f = block + end + + def named(name : String) : self + @name = name + self + end + + def parse(tokens : Tokens(T)) : Result(T, U) + @f.call(tokens) + end + + def parse(tokens : Tokens(T)) : Result(T, U)? + parse(tokens) + rescue + nil + end + + def assert(f : U -> Bool) : Parser(T, U) + p = self + Parser.new("#{p.name} assertion") do |tokens| + result = p.parse(tokens) + unless f.call(r.value) + raise ParserFail.new("Assertion failed for value #{r.value}") + end + result + end + end + + def assert(&block : U -> Bool) : Parser(T, U) + assert(block) end end end diff --git a/src/parcom/alt.cr b/src/parcom/alt.cr deleted file mode 100644 index dedd41d..0000000 --- a/src/parcom/alt.cr +++ /dev/null @@ -1,28 +0,0 @@ -require "./parser.cr" - -module Parcom - # `Alt` is a `Parser` that accepts two other parsers and tries - # to parse with one of them. - # If the first (left) parser succeeds, its result is returned. - # If the first parser fails, it will try the second (right) parser. - class Alt(T, V) < Parser(T, V) - # Accepts the two parsers to try to parse with. - def initialize(@p1 : Parser(T, V), @p2 : Parser(T, V)) - end - - # Tries to parse using both parsers. - # - # It will initially try to parse with the first parser. - # If it fails, it will try to parse with the second parser. - def parse(tokens : Tokens(T)) : Result(T, V) - @p1.parse(tokens) - rescue ex1 : ParserFail - begin - @p2.parse(tokens) - rescue ex2 : ParserFail - raise ParserFail.new("Alt (#{ex1.message}), (#{ex2.message})") - end - end - end -end - diff --git a/src/parcom/any_token.cr b/src/parcom/any_token.cr deleted file mode 100644 index 1f65bfc..0000000 --- a/src/parcom/any_token.cr +++ /dev/null @@ -1,18 +0,0 @@ -require "./parser.cr" - -module Parcom - # `AnyToken` is a `Parser` that consumes exactly one token from - # the input stream and returns it. - # It fails if the input stream is empty. - class AnyToken(T) < Parser(T, T) - # Parses the first token in the input, fails if the input is empty. - def parse(tokens : Tokens(T)) : Result(T, T) - if tokens.empty? - raise ParserFail.new("AnyToken: input was empty") - else - Result.new(tokens[1..], tokens[0]) - end - end - end -end - diff --git a/src/parcom/assert.cr b/src/parcom/assert.cr deleted file mode 100644 index 5a3e621..0000000 --- a/src/parcom/assert.cr +++ /dev/null @@ -1,45 +0,0 @@ -require "./parser.cr" - -module Parcom - # `Assert` is a `Parser` that runs another `Parser` and then - # performs a test on its result. The parser will then either fail if - # the result does not pass the test, or forward it on if it does. - # - # Example: - # ``` - # letter = Assert.new(AnyToken(Char).new) { |x| x.letter? } - # non_letter = Assert.new(AnyToken(Char).new) { |x| !x.letter? } - # input = Tokens.from_string("hi") - # letter.parse(input) # this succeeds - # non_letter.parse(input) # this fails - # ``` - # - # `Assert` instance can also be created by calling - # `Parser#assert` on any parser: - # ``` - # # This parser is identical to the above example. - # letter = AnyToken.new(Char).assert { |x| x.letter? } - # ``` - class Assert(T, V) < Parser(T, V) - # Accepts the parser to run and a `Bool`-retuning block - # containing the test to perform on the other parser's result. - def initialize(@p : Parser(T, V), &block : V -> Bool) - @f = block - end - - # Runs the parser it was initialized with, and returns that parser's - # result if it passes the provided test. Raises `ParserFail` otherwise. - def parse(tokens : Tokens(T)) : Result(T, V) - result = @p.parse(tokens) - rescue ex : ParserFail - raise ParserFail.new("Assert (pre-assertion): #{ex.message}") - else - unless @f.call(result.value) - raise ParserFail.new("Assert: predicate failed for <#{result.value}>") - end - - return result - end - end -end - diff --git a/src/parcom/at_least.cr b/src/parcom/at_least.cr deleted file mode 100644 index 2fb8dcf..0000000 --- a/src/parcom/at_least.cr +++ /dev/null @@ -1,29 +0,0 @@ -require "./parser.cr" -require "./map.cr" -require "./exactly.cr" - -module Parcom - # `AtLeast` is a `Parser` that tries to parser with another parser - # a specific number of times. The results of each successful parse - # are returned in an array. If the number of successes is less than - # the specified number, the parser fails. - class AtLeast(T, V) < Parser(T, Array(V)) - @p : Map(T, {Array(V), Array(V)}, Array(V)) - - # Accepts the number of parsing attempts, and the parser to use. - # If a negative int is given, it is treated as if it were 0. - def initialize(i : Int, p : Parser(T, V)) - @p = (Exactly.new(i, p) + Many.new(p)).map do |tup| - tup[0] + tup[1] - end - end - - # Tries to parse the given number of times, or more. - def parse(tokens : Tokens(T)) : Result(T, Array(V)) - @p.parse(tokens) - rescue ex : ParserFail - raise ParserFail.new("AtLeast: #{ex.message}") - end - end -end - diff --git a/src/parcom/at_most.cr b/src/parcom/at_most.cr deleted file mode 100644 index b70164f..0000000 --- a/src/parcom/at_most.cr +++ /dev/null @@ -1,29 +0,0 @@ -require "./parser.cr" -require "./map.cr" -require "./exactly.cr" -require "./optional.cr" - -module Parcom - # `AtMost` is a `Parser` that tries to parse with another parser a specific - # number of times. The results of each successful parse are returned in an - # array. If the specific number of parses succeed, the parsing stops, even - # if it is possible to keep parsing. If the parser is never successful, it - # succeeds with an empty array. - class AtMost(T, V) < Parser(T, Array(V)) - @p : Map(T, Array(V?), Array(V)) - - # Accepts the number of parsing attempts, and the parser to use. - def initialize(i : Int, p : Parser(T, V)) - @p = Exactly.new(i, Optional.new(p)).map(&.compact) - end - - # Tries to parse up to the given number of times. - # If the parser never succeeds, returns an empty array. - def parse(tokens : Tokens(T)) : Result(T, Array(V)) - @p.parse(tokens) - rescue ex : ParserFail - raise ParserFail.new("AtMost: #{ex.message}") - end - end -end - diff --git a/src/parcom/basic.cr b/src/parcom/basic.cr new file mode 100644 index 0000000..6594555 --- /dev/null +++ b/src/parcom/basic.cr @@ -0,0 +1,10 @@ +require "../parcom.cr" + +module Parcom + module Basic + def pure(value : U) : Parser(T, U) forall T, U + Parser.new("Pure #{value}") { |tokens| Result.new(tokens, value) } + end + end +end + diff --git a/src/parcom/between.cr b/src/parcom/between.cr deleted file mode 100644 index 05519e4..0000000 --- a/src/parcom/between.cr +++ /dev/null @@ -1,37 +0,0 @@ -require "./parser.cr" -require "./map.cr" -require "./exactly.cr" -require "./at_most.cr" - -module Parcom - # `Between` is a `Parser` that tries to parse with another parser a number - # of times within a specified range. The results of each successful parse - # are returned in an array. If the number of successful parses is out of - # the lower bound of the range, the parser fails. If the number of successful - # parses reaches thhe upper bound of the range, the parsing stops, even if it - # is possible to keep parsing. - class Between(T, V) < Parser(T, Array(V)) - @p : Map(T, {Array(V), Array(V)}, Array(V)) - - # Accepts the lower and upper bounds for the number of parsing attempts, - # as well as the parser to use. This method works the same way whether or - # not the larger value is passed first or second. If a negative int value - # is given for either parameter, it is treated as `0`. - # TODO: Overload to allow for Range objects. - def initialize(i : Int, j : Int, p : Parser(T, V)) - lower = i < j ? i : j - upper = (i - j).abs - @p = (Exactly.new(lower, p) + AtMost.new(upper, p)).map do |tup| - tup[0] + tup[1] - end - end - - # Tries to parse a numebr of times within the given range. - def parse(tokens : Tokens(T)) : Result(T, Array(V)) - @p.parse(tokens) - rescue ex : ParserFail - raise ParserFail.new("Between: #{ex.message}") - end - end -end - diff --git a/src/parcom/eof.cr b/src/parcom/eof.cr deleted file mode 100644 index 650da56..0000000 --- a/src/parcom/eof.cr +++ /dev/null @@ -1,23 +0,0 @@ -require "./parser.cr" - -module Parcom - # `EOF` is a `Parser` succeeds if the input stream is empty. - # - # This parser retuns `nil` when successful because there is no - # other meaningful value to return. - # - # This parser is also one of the few cases where it is ideal to not - # modify or take from the input stream, as it should be empty anyway. - class EOF(T) < Parser(T, Nil) - # Succeeds is the input stream is empty and returns `nil`. - # Raises a `ParserFail` exception when the input is not empty. - def parse(tokens : Tokens(T)) : Result(T, Nil) - if tokens.empty? - Result.new(tokens, nil) - else - raise ParserFail.new("Eof: input was not empty") - end - end - end -end - diff --git a/src/parcom/exactly.cr b/src/parcom/exactly.cr deleted file mode 100644 index a34fd0d..0000000 --- a/src/parcom/exactly.cr +++ /dev/null @@ -1,28 +0,0 @@ -require "./parser.cr" - -module Parcom - # `Exactly` is a `Parser` that tries to parse with another parser - # a specific number of times. The results of each successful parse - # are returned in an array. If the number of successes is less than - # OR greater than the specified number, the parser fails. - class Exactly(T, V) < Parser(T, Array(V)) - @p : Sequence(T, V) - - # Accepts the number of parsing attempts, and the parser to use. - # If a negative int is given, it is treated as if it were `0`. - def initialize(i : Int, p : Parser(T, V)) - i = i.negative? ? 0 : i - # put `i` copies of `p` into an array - ps = ([p] of Parser(T, V)) * i - @p = Sequence.new(ps) - end - - # Tries to parse the given number of times. - def parse(tokens : Tokens(T)) : Result(T, Array(V)) - @p.parse(tokens) - rescue ex : ParserFail - raise ParserFail.new("Exactly: #{ex.message}") - end - end -end - diff --git a/src/parcom/first_of.cr b/src/parcom/first_of.cr deleted file mode 100644 index c4077eb..0000000 --- a/src/parcom/first_of.cr +++ /dev/null @@ -1,40 +0,0 @@ -require "./parser.cr" - -module Parcom - # `FirstOf` is a `Parser` that accepts multiple parsers, and tries to parse - # with all of them, in order. As soon as one of the parsers succeeds, - # that parser's result is returned. If none of the parsers are successful, - # the parsing fails. - class FirstOf(T, V) < Parser(T, V) - @p : Parser(T, V) - - # Accepts the parsers to use. Raises an `ArgumentError` if - # no parsers are provided. - def initialize(ps : Iterable(Parser(T, V))) - ps_iter = ps.each - p = ps_iter.next - - if p.is_a?(Iterator::Stop) - raise ArgumentError.new("FirstOf requires atleast one parser, got none") - end - - # Combine all the parsers into one by wrapping them with `Alt`. - @p = p - - loop do - p = ps_iter.next - break if p.is_a?(Iterator::Stop) - @p = @p | p - end - end - - # Tries to parse with each of the given parsers. Either returns the first - # successful result or fails if no parsers succeed. - def parse(tokens : Tokens(T)) : Result(T, V) - @p.parse(tokens) - rescue ex : ParserFail - raise ParserFail.new("FirstOf: #{ex.message}") - end - end -end - diff --git a/src/parcom/flunk.cr b/src/parcom/flunk.cr deleted file mode 100644 index 00a0bd6..0000000 --- a/src/parcom/flunk.cr +++ /dev/null @@ -1,12 +0,0 @@ -require "./parser.cr" - -module Parcom - # `Flunk` is a `Parser` that always fails. - class Flunk(T, V) < Parser(T, V) - # Always raises `ParserFail` - def parse(tokens) : Result(T, V) - raise ParserFail.new("Flunk: parsing failed") - end - end -end - diff --git a/src/parcom/left.cr b/src/parcom/left.cr deleted file mode 100644 index 201f497..0000000 --- a/src/parcom/left.cr +++ /dev/null @@ -1,26 +0,0 @@ -require "./parser.cr" -require "./map.cr" - -module Parcom - # `Left` is a `Parser` that tries to parse with two different - # parsers in succession and fails if either of the two parsers fails. - # This parser behaves the same as `Plus`, but only returns the result - # of the first parser. - class Left(T, V, U) < Parser(T, V) - @p : Map(T, {V, U}, V) - - # Accepts the two parsers to use, in order. - def initialize(p1 : Parser(T, V), p2 : Parser(T, U)) - @p = (p1 + p2).map(&.first) - end - - # Tries to parse with the two given parsers, and returns the - # result of the first parser if they both succeed. - def parse(tokens : Tokens(T)) : Result(T, V) - @p.parse(tokens) - rescue ex : ParserFail - raise ParserFail.new("Left: #{ex.message}") - end - end -end - diff --git a/src/parcom/many.cr b/src/parcom/many.cr deleted file mode 100644 index a734c63..0000000 --- a/src/parcom/many.cr +++ /dev/null @@ -1,36 +0,0 @@ -require "./parser.cr" - -module Parcom - # `Many` is a `Parser` that repeatedly tries to parse with another parser. - # The `Many` object will collect all success values in an array, and return - # them with the final state of the input stream. If the wrapped parser ever - # fails or succeeds without parsing any input, the `Many` object will stop - # attempting to parse will will return the array of prrevious successes. - # - # `Many` will return an empty array if the parser never succeeds or consumes - # input. For cases where at least one parserr success is needed, use `Some`. - class Many(T, V) < Parser(T, Array(V)) - # Accepts the parser to use. - def initialize(@p : Parser(T, V)) - end - - # Continuously parses with the wrapped parser, returns all successes. - # Parsing stops when the parser fails, or succeeds without consuming input. - def parse(tokens : Tokens(T)) : Result(T, Array(V)) - parsed = [] of V - - loop do - result = @p.parse?(tokens) - break unless !result.nil? && result.tokens != tokens - - parsed << result.value - tokens = result.tokens - end - - Result.new(tokens, parsed) - rescue ex : ParserFail - raise ParserFail.new("Many: #{ex.message}") - end - end -end - diff --git a/src/parcom/map.cr b/src/parcom/map.cr deleted file mode 100644 index 34961b5..0000000 --- a/src/parcom/map.cr +++ /dev/null @@ -1,30 +0,0 @@ -require "./parser.cr" - -module Parcom - # `Map` is a `Parser` that tries to parse using another parser, - # and then changing the result of that parser to a different value. - # - # Example: - # ``` - # # Where `Digits` is some parser that returns an array of digit characters - # parse_i32 = Digits.new.map { |x| x.join.to_i32 } - # result = parse_i32.parse(Tokens.from_string("1234")) - # result.value # => 1234 (Int32) - # ``` - class Map(T, V, U) < Parser(T, U) - # Accepts the parser to use and the function to apply to the result. - def initialize(@p : Parser(T, V), &block : V -> U) - @f = block - end - - # Tries to parse with the given parser and applies the - # function to the result if successful. - def parse(tokens : Tokens(T)) : Result(T, U) - result = @p.parse(tokens) - Result.new(result.tokens, @f.call(result.value)) - rescue ex : ParserFail - raise ParserFail.new("Map: #{ex.message}") - end - end -end - diff --git a/src/parcom/optional.cr b/src/parcom/optional.cr deleted file mode 100644 index b368d4e..0000000 --- a/src/parcom/optional.cr +++ /dev/null @@ -1,40 +0,0 @@ -require "./parser.cr" - -module Parcom - # `Optional` is a `Parser` that tries to parse with another parser, - # but does not fail the parser chain if the wrapped parser fails. - # If the wrapped parser fails, an `Optional` will not modify the input - # stream, and return `nil` instead. - # - # Example: - # ``` - # digit = Satisfy(Char).new { |c| c.digit? } - # digits = Many.new(digit) - # plus_sign = Optional.new(Token.new('+')) - # positive_int = plus_sign >> digits - # - # input1 = Tokens.from_string("+12345") - # input2 = Tokens.from_string("12345") - # - # # Both of these has the same result - # positive_int.parse(input1) - # positive_int.parse(input2) - # ``` - class Optional(T, V) < Parser(T, V?) - # Accepts the parser to use. - def initialize(@p : Parser(T, V)) - end - - # Tries to parse with the given parser, succeeds with `nil` - # instead of failing. This parser does not consume input if - # the wrapped parser fails. - def parse(tokens : Tokens(T)) : Result(T, V?) - r = @p.parse?(tokens) - - new_tokens = r.nil? ? tokens : r.tokens - new_value = r.nil? ? nil : r.value - Result.new(new_tokens, new_value) - end - end -end - diff --git a/src/parcom/parser.cr b/src/parcom/parser.cr deleted file mode 100644 index ca350df..0000000 --- a/src/parcom/parser.cr +++ /dev/null @@ -1,87 +0,0 @@ -require "../parcom.cr" - -module Parcom - # `Parser` is the base class of all parser objects and contains - # a handful of helper methods that can allow for cleaner syntax. - # - # The generic type `T` represents the type of tokens a parser will - # operate on (usually `Char`). - # The generic type `V` represents the type of value that a parser - # will return when parsing is successful. - abstract class Parser(T, V) - # Accepts a `Tokens(T)` and either retuns a `Result(T, V)`, or raises a - # `ParserFail` exception with a message providing context of where - # in the chain the parsing failed. - # - # The `V` in the returned `Result` is arbitrary, but the `Tokens(T)` in - # the returned result MUST contain the parser's input, with some tokens - # removed from the front proprtional to what was parsed. - # - # Example: - # ``` - # hello = ParsesHello.new # inherits from Parser(Char, String) - # input = Tokens.from_string("Hello world!") - # - # result = hello.parse(input) - # result.value # => "Hello" - # result.tokens # => [' ', 'w', 'o', 'r', 'l', 'd', '!'] - # - # bad_input = Tokens.from_string("Good evening world!") - # hello.parse(bad_input) # raises ParserFail - # ``` - abstract def parse(tokens : Tokens(T)) : Result(T, V) - - # Same as `#parse`, but returns `nil` instead of raising `ParserFail`. - def parse?(tokens : Tokens(T)) : Result(T, V)? - self.parse(tokens) - rescue ParserFail - return nil - end - - # Constructs an `Alt` parser with `self` and `other`. - def |(other : Parser(T, V)) : Alt(T, V) - Alt.new(self, other) - end - - # Constructs a `Plus` parser with `self` and `other`. - def +(other : Parser(T, U)) : Plus(T, V, U) forall U - Plus.new(self, other) - end - - # Constructs a `Left` parser with `self` and `other`. - def <<(other : Parser(T, U)) : Left(T, V, U) forall U - Left.new(self, other) - end - - # Constructs a `Right` parser with `self` and `other`. - def >>(other : Parser(T, U)) : Right(T, V, U) forall U - Right.new(self, other) - end - - # Constructs an `Assert` parser with `self` and the given block. - def assert(&block : V -> Bool) : Assert(T, V) - Assert.new(self, &block) - end - - # Constructs an `Assert` parser with `self` and the given proc. - def assert(f : V -> Bool) : Assert(T, V) - Assert.new(self, &f) - end - - # Constructs a `Map` parser with `self` and the given block. - def map(&block : V -> U) : Map(T, V, U) forall U - Map.new(self, &block) - end - - # Constructs a `Map` parser with `self` and the given proc. - def map(f : V -> U) : Map(T, V, U) forall U - Map.new(self, &f) - end - - # Constructs a `Recover` parser with `self` and the given `V`. - def recover(default : V) : Recover(T, V) - Recover.new(self, default) - end - end -end - diff --git a/src/parcom/peek.cr b/src/parcom/peek.cr deleted file mode 100644 index 2b6f657..0000000 --- a/src/parcom/peek.cr +++ /dev/null @@ -1,23 +0,0 @@ -require "./parser.cr" - -module Parcom - # `Peek` is a `Parser` that runs another `Parser` while - # leaving the input stream unmodified. - class Peek(T, V) < Parser(T, V) - # Accepts the parser to run. - def initialize(@p : Parser(T, V)) - end - - # Runs the parser it was initialized with, and returns - # that parser's result along with the original input. - # - # Re-raises a `ParserFail` exception if the other parser fails. - def parse(tokens : Tokens(T)) : Result(T, V) - result = @p.parse(tokens) - Result.new(tokens, result.value) - rescue ex : ParserFail - raise ParserFail.new("Peek: #{ex.message}") - end - end -end - diff --git a/src/parcom/phrase.cr b/src/parcom/phrase.cr deleted file mode 100644 index 1996fd4..0000000 --- a/src/parcom/phrase.cr +++ /dev/null @@ -1,33 +0,0 @@ -require "./parser.cr" -require "./eof.cr" -require "./map.cr" - -module Parcom - # `Phrase` is a `Parser` that tries to parse with another parser, - # but will fail if any of the input has not been consumed. - # - # Example: - # ``` - # letter_a = Token.new('a') - # tokens = Tokens.from_string("aaa") - # one_a = Phrase(Char, Char).new(letter_a) - # result = one_a.parse(tokens) # This fails - # ``` - class Phrase(T, V) < Parser(T, V) - @p : Map(T, {V, Nil}, V) - - # Accepts the parser to parse with. - def initialize(p : Parser(T, V)) - @p = (p + EOF(T).new).map &.first - end - - # Tries to parse with the given parser, fails if there - # is any un-parsed input remaining. - def parse(tokens : Tokens(T)) : Result(T, V) - @p.parse(tokens) - rescue ex : ParserFail - raise ParserFail.new("Phrase: #{ex.message}") - end - end -end - diff --git a/src/parcom/plus.cr b/src/parcom/plus.cr deleted file mode 100644 index 53c9b4f..0000000 --- a/src/parcom/plus.cr +++ /dev/null @@ -1,54 +0,0 @@ -require "./parser.cr" - -module Parcom - # `Plus` is a parser that tries to parse with two different - # parsers in succession and fails if either of the two parsers fails. - # The return type of this parser is `{V, U}`, where `V` is the return - # type of the first parser and `U` is the return type of the second. - # - # Example: - # ``` - # parse1 = Token.new('1') - # parse2 = Token.new('2') - # tokens = Tokens.from_string("12") - # result = (parse1 + parse2).parse(tokens) # succeeds - # result = (parse2 + parse1).parse(tokens) # fails, wrong order - # ``` - # - # `Plus` parsers can be chained together, but the resulting return type - # can become unweildly with too many combined parsers: - # ``` - # letter_a = Token.new('a') - # a5 = letter_a + letter_a + letter_a + letter_a + letter_a - # tokens = Tokens.from_string("aaaaa") - # a5.parse(tokens) # succeeds - # # `a5` has a return type of { { { {Char, Char}, Char}, Char}, Char} - # ``` - # - # If you need to parse more than two things in this manner, - # consider using `Many`, `Some`, `Sequence`, or `TokenSeq` instead. - class Plus(T, V, U) < Parser(T, {V, U}) - # Accepts the two parsers to use, in order. - def initialize(@p1 : Parser(T, V), @p2 : Parser(T, U)) - end - - # Tries to parse with the two given parsers, and returns - # their results in a tuple if the both succeed. - def parse(tokens : Tokens(T)) : Result(T, {V, U}) - begin - r1 = @p1.parse(tokens) - rescue ex : ParserFail - raise ParserFail.new("Plus (left): #{ex.message}") - end - - begin - r2 = @p2.parse(r1.tokens) - rescue ex : ParserFail - raise ParserFail.new("Plus (right): #{ex.message}") - end - - Result.new(r2.tokens, {r1.value, r2.value}) - end - end -end - diff --git a/src/parcom/recover.cr b/src/parcom/recover.cr deleted file mode 100644 index 378f8d6..0000000 --- a/src/parcom/recover.cr +++ /dev/null @@ -1,27 +0,0 @@ -require "./parser.cr" -require "./optional.cr" -require "./map.cr" - -module Parcom - # `Recover` is a `Parser` that tries to parse with another parser, - # but does not fail the parser chain if the wrapped parser fails. - # If the wrapped parser fails, a `Recover` will not modify the input - # stream, and return an arbitrary value of type `V` instead. - class Recover(T, V) < Parser(T, V) - @p : Map(T, V?, V) - - # Accepts the parser to use, and the default value - # to return if the parser fails. - def initialize(p : Parser(T, V), default : V) - @p = Optional.new(p).map { |x| x.nil? ? default : x } - end - - # Tries to parse with the given parser, succeeds with the - # default value instead of failing. This parser does not - # consume input if the wrapped parser fails. - def parse(tokens : Tokens(T)) : Result(T, V) - @p.parse(tokens) - end - end -end - diff --git a/src/parcom/right.cr b/src/parcom/right.cr deleted file mode 100644 index a0489b1..0000000 --- a/src/parcom/right.cr +++ /dev/null @@ -1,26 +0,0 @@ -require "./parser.cr" -require "./map.cr" - -module Parcom - # `Right` is a `Parser` that tries to parse with two different - # parsers in succession and fails if either of the two parsers fails. - # This parser behaves the same as `Plus`, but only returns the result - # of the second parser. - class Right(T, V, U) < Parser(T, U) - @p : Map(T, {V, U}, U) - - # Accepts the two parsers to use, in order. - def initialize(p1 : Parser(T, V), p2 : Parser(T, U)) - @p = (p1 + p2).map(&.last) - end - - # Tries to parse with the two given parsers, and returns the - # result of the second parser if they both succeed. - def parse(tokens : Tokens(T)) : Result(T, U) - @p.parse(tokens) - rescue ex : ParserFail - raise ParserFail.new("Left: #{ex.message}") - end - end -end - diff --git a/src/parcom/satisfy.cr b/src/parcom/satisfy.cr deleted file mode 100644 index 9734635..0000000 --- a/src/parcom/satisfy.cr +++ /dev/null @@ -1,33 +0,0 @@ -require "./parser.cr" -require "./any_token.cr" -require "./assert.cr" - -module Parcom - # `Satisfy` is a `Parser` that parses a single token - # if that token passes a predefined test, similar - # to `Assert`. This class is effectively a shorthand - # for the following: - # ``` - # # These parsers are equivalent. - # letter_assert = Assert.new(AnyToken(Char).new) { |x| x.letter? } - # letter_satisfy = Satisfy(Char).new { |x| x.letter? } - # ``` - class Satisfy(T) < Parser(T, T) - @p : Assert(T, T) - - # Accepts the `Bool`-returning block containing the test - # to run on the parsed token. - def initialize(&block : T -> Bool) - @p = AnyToken(T).new.assert(&block) - end - - # Returns the first token of the input if that - # token passes the test. - def parse(tokens : Tokens(T)) : Result(T, T) - @p.parse(tokens) - rescue ex : ParserFail - raise ParserFail.new("Satisfy: #{ex.message}") - end - end -end - diff --git a/src/parcom/sep_by.cr b/src/parcom/sep_by.cr deleted file mode 100644 index fa19027..0000000 --- a/src/parcom/sep_by.cr +++ /dev/null @@ -1,38 +0,0 @@ -require "./parser.cr" -require "./map.cr" -require "./many.cr" - -module Parcom - # `SepBy` is a `Parser` that tries to parse one or more times with one - # parser, alternating with a second parser. - # - # Example: - # ``` - # letter_a = Token.new('a') - # letter_b = Token.new('b') - # p = SepBy(Char, Car, Char).new(letter_a, letter_b) - # tokens = Tokens.from_string("ababababa") - # - # result = p.parse(tokens) - # puts result.value # => ['a', 'a', 'a', 'a', 'a'] - # ``` - class SepBy(T, V, U) < Parser(T, Array(V)) - @p : Map(T, {V, Array(V)}, Array(V)) - - # Accepts the parser that parses the result values, and the - # parser that parses the sepatators. - def initialize(elem : Parser(T, V), sep : Parser(T, U)) - @p = (elem + Many(T, U).new(sep >> elem)).map do |tup| - [tup[0]] + tup[1] - end - end - - # Tries to parse, alternating the first and second parsers. - def parse(tokens : Tokens(T)) : Result(T, Array(V)) - @p.parse(tokens) - rescue ex : ParserFail - raise ParserFail.new("SepBy: #{ex.message}") - end - end -end - diff --git a/src/parcom/sequence.cr b/src/parcom/sequence.cr deleted file mode 100644 index 6a05cde..0000000 --- a/src/parcom/sequence.cr +++ /dev/null @@ -1,33 +0,0 @@ -require "./parser.cr" - -module Parcom - # `Sequence` is a `Parser` that combines multiple parsers and - # tries to parse all of them in succession. If all of the parsers - # succeed, the values parsed are returned in an array, in the order - # they were parsed in. If any of the parsers fail, - # the `Sequence` also fails. - class Sequence(T, V) < Parser(T, Array(V)) - # Accepts the parsers to use. - def initialize(@ps : Iterable(Parser(T, V))) - end - - # Tries each parser in order, and returns their results. - # Fail if any of the wrapped parsers fail. - # TODO: this can probably be optimised more for Arrays - # TODO: might be better to use #zip - def parse(tokens : Tokens(T)) : Result(T, Array(V)) - parsed = [] of V - - @ps.each do |p| - r = p.parse(tokens) - parsed << r.value - tokens = r.tokens - end - - Result.new(tokens, parsed) - rescue ex : ParserFail - raise ParserFail.new("Sequence: #{ex.message}") - end - end -end - diff --git a/src/parcom/some.cr b/src/parcom/some.cr deleted file mode 100644 index a2e3563..0000000 --- a/src/parcom/some.cr +++ /dev/null @@ -1,33 +0,0 @@ -require "./parser.cr" -require "./assert.cr" -require "./many.cr" - -module Parcom - # `Some` is a `Parser` that repeatedly tries to parse with another parser. - # The `Some` object will collect all success values in an array, and - # return them with the final state of the input stream. If the wrapped - # parser ever fails or succeeds without parsing any input, the `Some` - # object will stop attempting to parse and will return the array of - # previous successes. - # - # `Some` will raise a `ParserFail` exception if the parser never succeeds - # or consumes input. For cases where parsing should allow for 0 successes, - # use `Many`. - class Some(T, V) < Parser(T, Array(V)) - @p : Assert(T, Array(V)) - - # Accepts the parser to use. - def initialize(p : Parser(T, V)) - @p = Many.new(p).assert { |arr| !arr.empty? } - end - - # Continuously parses with the wrapped parser, returns all successes. - # Fails if there are no successes. - def parse(tokens : Tokens(T)) : Result(T, Array(V)) - @p.parse(tokens) - rescue ex : ParserFail - raise ParserFail.new("Some: #{ex.message}") - end - end -end - diff --git a/src/parcom/token.cr b/src/parcom/token.cr deleted file mode 100644 index b4e1fef..0000000 --- a/src/parcom/token.cr +++ /dev/null @@ -1,20 +0,0 @@ -require "./satisfy" - -module Parcom - # `Token` is a `Parser` that tries to parse a specific token - # from the input stream. - class Token(T) < Parser(T, T) - # Accepts the token that this parser will try to parse. - def initialize(@expected : T) - @p = Satisfy(T).new { |x| x == @expected } - end - - # Tries to parse the specified token from the input stream. - def parse(tokens : Tokens(T)) : Result(T, T) - @p.parse(tokens) - rescue ex : ParserFail - raise ParserFail.new("Token <#{@expected}>: #{ex.message}") - end - end -end - diff --git a/src/parcom/token_seq.cr b/src/parcom/token_seq.cr deleted file mode 100644 index 45900f9..0000000 --- a/src/parcom/token_seq.cr +++ /dev/null @@ -1,38 +0,0 @@ -require "./parser.cr" -require "./sequence.cr" - -module Parcom - # `TokenSeq` is a `Parser` that attempts to parse a specific - # string of tokens. If the expected tokens are not at the start - # of the input stream, the parser fails. - # - # Example: - # ``` - # parse_test = TokenSeq(Char).new("test".chars) - # tokens = Tokens.from_string("testing") - # - # result = parse_test.parse(tokens) - # - # puts result.value # => ['t', 'e', 's', 't'] - # puts result.tokens # => ['i', 'n', 'g'] - # ``` - class TokenSeq(T) < Parser(T, Array(T)) - @p : Sequence(T, T) - - # Accepts the tokens to try and parse, in order. - def initialize(expected : Iterable(T)) - ps = [] of Parser(T, T) - expected.each { |t| ps << Token.new(t) } - - @p = Sequence.new(ps) - end - - # Tries to parse the list of tokens. - def parse(tokens : Tokens(T)) : Result(T, Array(T)) - @p.parse(tokens) - rescue ex : ParserFail - raise ParserFail.new("TokenSeq: #{ex.message}") - end - end -end - -- cgit v1.2.1