diff options
Diffstat (limited to 'spec/practical/not_xml_spec.cr')
| -rw-r--r-- | spec/practical/not_xml_spec.cr | 171 |
1 files changed, 171 insertions, 0 deletions
diff --git a/spec/practical/not_xml_spec.cr b/spec/practical/not_xml_spec.cr new file mode 100644 index 0000000..f0b6911 --- /dev/null +++ b/spec/practical/not_xml_spec.cr @@ -0,0 +1,171 @@ +require "../spec_helper.cr" + +include Parcom + +# Adapted from: https://bodil.lol/parser-combinators/ + +struct Attribute + getter name, value + + def initialize(@name : String, @value : String) + end + + def to_s(io : IO) + io << @name << "=" << @value + end +end + +struct Element + getter name, attributes, children + + def initialize(@name : String, @attributes : Array(Attribute), @children : Array(Element)) + end + + def to_s(io : IO) + io << String.build do |str| + str << "<" << @name << " " << @attributes.join(" ") + if @children.empty? + str << "/>" + else + str << ">" + @children.each { |c| str << c } + str << "</" << @name << ">" + end + end + end +end + +def match_literal(s : String) : Parser(Char, String) + Parser(Char, Char).token_sequence(s.chars).map(&.join) +end + +def letter + Parser(Char, Char).satisfy { |c| c.letter? } +end + +def alphanum_dash + Parser(Char, Char).satisfy { |c| c == '-' || c.alphanumeric? } +end + +def identifier + (letter + alphanum_dash.many).map do |c, cs| + cs.unshift(c).join + end +end + +def equal + Parser.token('=') +end + +def quote + Parser.token('"') +end + +def string_body + Parser(Char, Char).satisfy { |c| c != '"' }.many.map(&.join) +end + +def string_literal + quote >> string_body << quote +end + +def ws_char + Parser(Char, Char).satisfy { |c| c.whitespace? } +end + +def within_ws(p) + ws_char.many >> p << ws_char.many +end + +def attr_pair + (identifier + (equal >> string_literal)).map do |tup| + Attribute.new(tup.first, tup.last) + end +end + +def attributes + (ws_char.some >> attr_pair).many +end + +def element_start + match_literal("<") >> (identifier + attributes) +end + +def single_element + (element_start << match_literal("/>")).map do |tup| + Element.new(tup.first, tup.last, [] of Element) + end +end + +def open_element + (element_start << match_literal(">")).map do |tup| + Element.new(tup.first, tup.last, [] of Element) + end +end + +def element + within_ws(single_element | parent_element) +end + +def close_element(name : String) + match_literal("</") >> match_literal(name) << match_literal(">") +end + +# TODO: implement Parser#and_then +def parent_element + Parser(Char, Element).new("parent element") do |tokens| + result_open = open_element.parse(tokens) + tokens = result_open.tokens + base_element = result_open.value + closing_name = base_element.name + + result_children = (element.many << close_element(closing_name)).parse(tokens) + Result.new( + result_children.tokens, + Element.new( + base_element.name, + base_element.attributes, + result_children.value + ) + ) + end +end + +# Adapted from: https://bodil.lol/parser-combinators/ + +describe "markup language similar to XML" do + expected_value = Element.new( + "top", + [Attribute.new("label", "Top")], + [ + Element.new( + "semi-bottom", + [Attribute.new("label", "Bottom")], + [] of Element + ), + Element.new( + "middle", + [] of Attribute, + [ + Element.new( + "bottom", + [Attribute.new("label", "another bottom")], + [] of Element + ) + ], + ) + ] + ) + + xml = " +<top label=\"Top\"> + <semi-bottom label=\"Bottom\"/> + <middle> + <bottom label=\"another bottom\"/> + </middle> +</top>" + + result = element.parse(Tokens.from_string(xml)) + result.value.should eq(expected_value) +end + |
