src/parcom.cr


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125

require "./parcom/*"

module Parcom
  VERSION = "0.2.0"

  # A ParserFail exception should be raised by `Parser#parse` when
  # a parse attempt is unsuccessful.
  class ParserFail < Exception
  end

  # Provides a more convenient syntax for combining parsers via `Parser#and_then`.
  # The first argument is a string literal used for the name of the parser.
  # The second and third arguments are types used for the parser's type.
  # These are followed by any number of 2-tuples containing a variable name and
  # an expression resolving to a `Parser(t.Class, _)`, whose success value will
  # be stored in the aformentioned variable. The `finally` named argument is an
  # expression that resolves to a `Parser(t.class, u.class)`.
  #
  # Example:
  # ```
  # any_word = Parser(Char, Char).satisfy(&.letter?).some.map(&.join)
  # ws = Parser(Char, Array(Char)).satisfy(&.whitespace?).many
  # two_of_same_word = parser_chain "two words", Char, String,
  #   {word, any_word},
  #   {_,    ws},
  #   finally: Parser.token_sequence(word.chars).map(&.join)
  #
  # tokens = Tokens.from_string("foo   foo")
  # result = two_of_same_word.parse(tokens)
  # result.value # => "foo"
  #
  # # The above definition of `two_of_same word`
  # # is an alternative way of doing this:
  # two_of_same_word = any_word.and_then do |word|
  #   ws.and_then do |_|
  #     Parser.token_sequence(word.chars).map(&.join)
  #   end
  # end.named("two words")
  # ```
  #
  # This macro is based on Haskell's do-notation.
  macro parser_chain(name, t, u, *steps, finally)
    Parser({{t}}, {{u}}).new({{name}}) do |tokens|
      {% for tup, index in steps %}
        {{tup.last}}.and_then do |{{tup.first}}|
      {% end %}
      {{finally}}
      {% for _, _ in steps %}
        end
      {% end %}
      .parse(tokens)
    end
  end

  # `Tokens` is an `Array` wrapper struct to store the input
  # stream of one or more `Parser` objects.
  # A `Tokens` can be created from any `Iterable`, along with
  # `String` objects using a special constructor.
  struct Tokens(T)
    getter tokens

    # Constructs a `Tokens(Char)` from a `String`.
    def self.from_string(s : String) : Tokens(Char)
      Tokens.new(s.chars)
    end

    # Constructs a `Tokens` from an `Iterable`.
    def initialize(ts : Iterable(T))
      if ts.responds_to?(:to_a)
        @tokens = ts.to_a
      else
        @tokens = [] of T
        ts.each { |t| @tokens << t }
      end
    end

    # Exposes `Array#[](Int)`.
    def [](index : Int) : T
      @tokens[index]
    end

    # Exposes `Array#[](Int, Int)`, but wraps the returned array in a new `Tokens`.
    def [](start : Int, count : Int) : Tokens(T)
      Tokens.new(@tokens[start, count])
    end

    # Exposes `Array#[](Range)`, but wraps the returned array in a new `Tokens`.
    def [](range : Range) : Tokens(T)
      Tokens.new(@tokens[range])
    end

    # Like `#[]`, but returns `nil` instead of raising an `IndexError`.
    def []?(*args)
      self.[](*args)
    rescue IndexError
      nil
    end

    # Exposes `Array#empty?`.
    def empty? : Bool
      @tokens.empty?
    end
  end

  # A `Result` stores a `Tokens` object and a parsed value,
  # and is effectively used to store the state of a parser chain.
  # This is used instead of a `Tuple` or `NamedTuple` because:
  #   1. This is more idiomatic than a `Tuple`.
  #   2. Crystal does not support generic named tuples.
  struct Result(T, U)
    getter tokens, value

    def initialize(@tokens : Tokens(T), @value : U)
    end

    def map(f : U -> V) : Result(T, V) forall V
      Result.new(@tokens, f.call(@value))
    end

    def map(&block : U -> V) : Result(T, V) forall V
      map(block)
    end
  end
end