require "./parcom/*" module Parcom VERSION = "0.1.0" # A ParserFail exception should be raised by `Parser#parse` when # a parse attempt is unsuccessful. # Raising this exception in the `#parse` method of a Parser "Foo" # usually follows this pattern to allow for error tracing: # # ``` # class Foo(T, V) < Parser(T, V) # def parse(tokens : Tokens(T)) : Result(T, V) # helper.parse(tokens) # rescue ex : ParserFail # raise ParserFail.new("Foo: #{ex.message}") # end # ``` class ParserFail < Exception end # `Tokens` is an `Array` wrapper struct to store the input # stream of one or more `Parser` objects. # A `Tokens` can be created from any `Iterable`, along with # `String` objects using a special constructor. struct Tokens(T) getter tokens # Constructs a `Tokens(Char)` from a `String`. def self.from_string(s : String) : Tokens(Char) Tokens.new(s.chars) end # Constructs a `Tokens` from an `Iterable`. def initialize(ts : Iterable(T)) if ts.responds_to?(:to_a) @tokens = ts.to_a else @tokens = [] of T ts.each { |t| @tokens << t } end end # Exposes `Array#[](Int)`. def [](index : Int) : T @tokens[index] end # Exposes `Array#[](Int, Int)`, but wraps the returned array in a new `Tokens`. def [](start : Int, count : Int) : Tokens(T) Tokens.new(@tokens[start, count]) end # Exposes `Array#[](Range)`, but wraps the returned array in a new `Tokens`. def [](range : Range) : Tokens(T) Tokens.new(@tokens[range]) end # Like `#[]`, but returns `nil` instead of raising an `IndexError`. def []?(*args) self.[](*args) rescue IndexError nil end # Exposes `Array#empty?`. def empty? : Bool @tokens.empty? end end # A `Result` stores a `Tokens` object and a parsed value, # and is effectively used to store the state of a parser chain. # This is used instead of a `Tuple` or `NamedTuple` because: # 1. This is more idiomatic than a `Tuple`. # 2. Crystal does not support generic named tuples. struct Result(T, V) getter tokens, value def initialize(@tokens : Tokens(T), @value : V) end end class Assert(T, V) < Parser(T, V) def initialize(@p : Parser(T, V), &block : V -> Bool) @f = block end def parse(tokens : Tokens(T)) : Result(T, V) result = @p.parse(tokens) rescue ex : ParserFail raise ParserFail.new("Assert (pre-assertion): #{ex.message}") else unless @f.call(result.value) raise ParserFail.new("Assert: predicate failed for <#{result.value}>") end return result end end class Satisfy(T) < Parser(T, T) @p : Assert(T, T) def initialize(&block : T -> Bool) @p = AnyToken(T).new.assert(&block) end def parse(tokens : Tokens(T)) : Result(T, T) @p.parse(tokens) rescue ex : ParserFail raise ParserFail.new("Satisfy: #{ex.message}") end end class Token(T) < Parser(T, T) def initialize(@expected : T) @p = Satisfy(T).new { |x| x == @expected } end def parse(tokens : Tokens(T)) : Result(T, T) @p.parse(tokens) rescue ex : ParserFail raise ParserFail.new("Token <#{@expected}>: #{ex.message}") end end class Alt(T, V) < Parser(T, V) def initialize(@p1 : Parser(T, V), @p2 : Parser(T, V)) end def parse(tokens : Tokens(T)) : Result(T, V) @p1.parse(tokens) rescue ex1 : ParserFail begin @p2.parse(tokens) rescue ex2 : ParserFail raise ParserFail.new("Alt (#{ex1.message}), (#{ex2.message})") end end end class Map(T, V, U) < Parser(T, U) def initialize(@p : Parser(T, V), &block : V -> U) @f = block end def parse(tokens : Tokens(T)) : Result(T, U) result = @p.parse(tokens) Result.new(result.tokens, @f.call(result.value)) rescue ex : ParserFail raise ParserFail.new("Map: #{ex.message}") end end class Phrase(T, V) @p : Map(T, {V, Nil}, V) def initialize(p : Parser(T, V)) @p = (p + EOF(T).new).map &.first end def parse(tokens : Tokens(T)) : Result(T, V) @p.parse(tokens) rescue ex : ParserFail raise ParserFail.new("Phrase: #{ex.message}") end end class Plus(T, V, U) < Parser(T, {V, U}) def initialize(@p1 : Parser(T, V), @p2 : Parser(T, U)) end def parse(tokens : Tokens(T)) : Result(T, {V, U}) begin r1 = @p1.parse(tokens) rescue ex : ParserFail raise ParserFail.new("Plus (left): #{ex.message}") end begin r2 = @p2.parse(r1.tokens) rescue ex : ParserFail raise ParserFail.new("Plus (right): #{ex.message}") end Result.new(r2.tokens, {r1.value, r2.value}) end end class Left(T, V, U) < Parser(T, V) @p : Map(T, {V, U}, V) def initialize(p1 : Parser(T, V), p2 : Parser(T, U)) @p = (p1 + p2).map(&.first) end def parse(tokens : Tokens(T)) : Result(T, V) @p.parse(tokens) rescue ex : ParserFail raise ParserFail.new("Left: #{ex.message}") end end class Right(T, V, U) < Parser(T, U) @p : Map(T, {V, U}, U) def initialize(p1 : Parser(T, V), p2 : Parser(T, U)) @p = (p1 + p2).map(&.last) end def parse(tokens : Tokens(T)) : Result(T, U) @p.parse(tokens) rescue ex : ParserFail raise ParserFail.new("Right: #{ex.message}") end end class Recover(T, V) < Parser(T, V) @p : Map(T, V?, V) def initialize(p : Parser(T, V), default : V) @p = Optional.new(p).map { |x| x.nil? ? default : x } end def parse(tokens : Tokens(T)) : Result(T, V) @p.parse(tokens) end end class Optional(T, V) < Parser(T, V?) def initialize(@p : Parser(T, V)) end def parse(tokens : Tokens(T)) : Result(T, V?) r = @p.parse?(tokens) new_tokens = r.nil? ? tokens : r.tokens new_value = r.nil? ? nil : r.value Result.new(new_tokens, new_value) end end class Sequence(T, V) < Parser(T, Array(V)) def initialize(@ps : Iterable(Parser(T, V))) end # TODO: this can probably be optimised more for Arrays # TODO: might be better to use #zip def parse(tokens : Tokens(T)) : Result(T, Array(V)) parsed = [] of V @ps.each do |p| r = p.parse(tokens) parsed << r.value tokens = r.tokens end Result.new(tokens, parsed) rescue ex : ParserFail raise ParserFail.new("Sequence: #{ex.message}") end end class TokenSeq(T) < Parser(T, Array(T)) @p : Sequence(T, T) def initialize(expected : Iterable(T)) ps = [] of Parser(T, T) expected.each { |t| ps << Token.new(t) } @p = Sequence.new(ps) end def parse(tokens : Tokens(T)) : Result(T, Array(T)) @p.parse(tokens) rescue ex : ParserFail raise ParserFail.new("TokenSeq: #{ex.message}") end end class Many(T, V) < Parser(T, Array(V)) def initialize(@p : Parser(T, V)) end def parse(tokens : Tokens(T)) : Result(T, Array(V)) parsed = [] of V loop do result = @p.parse?(tokens) if result.nil? break else parsed << result.value tokens = result.tokens end end Result.new(tokens, parsed) rescue ex : ParserFail raise ParserFail.new("Many: #{ex.message}") end end class Some(T, V) < Parser(T, Array(V)) @p : Assert(T, Array(V)) def initialize(p : Parser(T, V)) @p = Many.new(p).assert { |arr| !arr.empty? } end def parse(tokens : Tokens(T)) : Result(T, Array(V)) @p.parse(tokens) rescue ex : ParserFail raise ParserFail.new("Some: #{ex.message}") end end class Exactly(T, V) < Parser(T, Array(V)) @p : Sequence(T, V) def initialize(i : Int, p : Parser(T, V)) i = i.negative? ? 0 : i @p = Sequence.new(([p] of Parser(T, V)) * i) end def parse(tokens : Tokens(T)) : Result(T, Array(V)) @p.parse(tokens) rescue ex : ParserFail raise ParserFail.new("Exactly: #{ex.message}") end end class AtLeast(T, V) < Parser(T, Array(V)) @p : Map(T, {Array(V), Array(V)}, Array(V)) def initialize(i : Int, p : Parser(T, V)) @p = (Exactly.new(i, p) + Many.new(p)).map do |tup| tup[0] + tup[1] end end def parse(tokens : Tokens(T)) : Result(T, Array(V)) @p.parse(tokens) rescue ex : ParserFail raise ParserFail.new("AtLeast: #{ex.message}") end end class AtMost(T, V) < Parser(T, Array(V)) @p : Map(T, Array(V?), Array(V)) def initialize(i : Int, p : Parser(T, V)) @p = Exactly.new(i, Optional.new(p)).map(&.compact) end def parse(tokens : Tokens(T)) : Result(T, Array(V)) @p.parse(tokens) rescue ex : ParserFail raise ParserFail.new("AtMost: #{ex.message}") end end class Between(T, V) < Parser(T, Array(V)) @p : Map(T, {Array(V), Array(V)}, Array(V)) def initialize(i : Int, j : Int, p : Parser(T, V)) lower = i < j ? i : j upper = (i - j).abs @p = (Exactly.new(lower, p) + AtMost.new(upper, p)).map do |tup| tup[0] + tup[1] end end def parse(tokens : Tokens(T)) : Result(T, Array(V)) @p.parse(tokens) rescue ex : ParserFail raise ParserFail.new("Between: #{ex.message}") end end class FirstOf(T, V) < Parser(T, V) @p : Parser(T, V) def initialize(ps : Iterable(Parser(T, V))) ps_iter = ps.each p = ps_iter.next if p.is_a?(Iterator::Stop) raise ArgumentError.new("FirstOf requires atleast one parser, got none") end @p = p p = ps_iter.next until p.is_a?(Iterator::Stop) @p = @p | p p = ps_iter.next end end def parse(tokens : Tokens(T)) : Result(T, V) @p.parse(tokens) rescue ex : ParserFail raise ParserFail.new("FirstOf: #{ex.message}") end end class SepBy(T, V, U) < Parser(T, Array(V)) @p : Map(T, {V, Array(V)}, Array(V)) def initialize(elem : Parser(T, V), sep : Parser(T, U)) @p = (elem + Many(T, U).new(sep >> elem)).map do |tup| [tup[0]] + tup[1] end end def parse(tokens : Tokens(T)) : Result(T, Array(V)) @p.parse(tokens) rescue ex : ParserFail raise ParserFail.new("SepBy: #{ex.message}") end end end