From 988818905a8ceb2595382cc03d01903d0f1c2984 Mon Sep 17 00:00:00 2001 From: Matthew Hall Date: Tue, 21 Mar 2023 20:26:24 +1300 Subject: Example for Xcruciating Markup Language --- spec/practical/not_xml_spec.cr | 171 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 171 insertions(+) create mode 100644 spec/practical/not_xml_spec.cr (limited to 'spec/practical/not_xml_spec.cr') diff --git a/spec/practical/not_xml_spec.cr b/spec/practical/not_xml_spec.cr new file mode 100644 index 0000000..f0b6911 --- /dev/null +++ b/spec/practical/not_xml_spec.cr @@ -0,0 +1,171 @@ +require "../spec_helper.cr" + +include Parcom + +# Adapted from: https://bodil.lol/parser-combinators/ + +struct Attribute + getter name, value + + def initialize(@name : String, @value : String) + end + + def to_s(io : IO) + io << @name << "=" << @value + end +end + +struct Element + getter name, attributes, children + + def initialize(@name : String, @attributes : Array(Attribute), @children : Array(Element)) + end + + def to_s(io : IO) + io << String.build do |str| + str << "<" << @name << " " << @attributes.join(" ") + if @children.empty? + str << "/>" + else + str << ">" + @children.each { |c| str << c } + str << "" + end + end + end +end + +def match_literal(s : String) : Parser(Char, String) + Parser(Char, Char).token_sequence(s.chars).map(&.join) +end + +def letter + Parser(Char, Char).satisfy { |c| c.letter? } +end + +def alphanum_dash + Parser(Char, Char).satisfy { |c| c == '-' || c.alphanumeric? } +end + +def identifier + (letter + alphanum_dash.many).map do |c, cs| + cs.unshift(c).join + end +end + +def equal + Parser.token('=') +end + +def quote + Parser.token('"') +end + +def string_body + Parser(Char, Char).satisfy { |c| c != '"' }.many.map(&.join) +end + +def string_literal + quote >> string_body << quote +end + +def ws_char + Parser(Char, Char).satisfy { |c| c.whitespace? } +end + +def within_ws(p) + ws_char.many >> p << ws_char.many +end + +def attr_pair + (identifier + (equal >> string_literal)).map do |tup| + Attribute.new(tup.first, tup.last) + end +end + +def attributes + (ws_char.some >> attr_pair).many +end + +def element_start + match_literal("<") >> (identifier + attributes) +end + +def single_element + (element_start << match_literal("/>")).map do |tup| + Element.new(tup.first, tup.last, [] of Element) + end +end + +def open_element + (element_start << match_literal(">")).map do |tup| + Element.new(tup.first, tup.last, [] of Element) + end +end + +def element + within_ws(single_element | parent_element) +end + +def close_element(name : String) + match_literal("> match_literal(name) << match_literal(">") +end + +# TODO: implement Parser#and_then +def parent_element + Parser(Char, Element).new("parent element") do |tokens| + result_open = open_element.parse(tokens) + tokens = result_open.tokens + base_element = result_open.value + closing_name = base_element.name + + result_children = (element.many << close_element(closing_name)).parse(tokens) + Result.new( + result_children.tokens, + Element.new( + base_element.name, + base_element.attributes, + result_children.value + ) + ) + end +end + +# Adapted from: https://bodil.lol/parser-combinators/ + +describe "markup language similar to XML" do + expected_value = Element.new( + "top", + [Attribute.new("label", "Top")], + [ + Element.new( + "semi-bottom", + [Attribute.new("label", "Bottom")], + [] of Element + ), + Element.new( + "middle", + [] of Attribute, + [ + Element.new( + "bottom", + [Attribute.new("label", "another bottom")], + [] of Element + ) + ], + ) + ] + ) + + xml = " + + + + + +" + + result = element.parse(Tokens.from_string(xml)) + result.value.should eq(expected_value) +end + -- cgit v1.2.1