Thu, 23 Mar 2023 15:12:30 +0000
Serialize XML in a consistent order by default
This overrides all XML serialization to emit attributes in an ordered form, so
the XML will match across multiple runs. This can be useful for comparing
different runs, or even two stanzas printed in the same run (e.g. if there is
a mismatch).
require "verse" local xml = require "scansion.xml"; local s_format, s_gsub = string.format, string.gsub; local unsorted_pairs = pairs; local sorted_pairs = require "util.iterators".sorted_pairs; local escape_table = { ["'"] = "'", ["\""] = """, ["<"] = "<", [">"] = ">", ["&"] = "&" }; local function xml_escape(str) return (s_gsub(str, "['&<>\"]", escape_table)); end -- Dummy functions compatible with util.termcolours, -- just in case we add colour in the future local function getstyle() return "" end local function getstring(style, text) if not style then text = style; end return text; end local default_config = { indent = 2; preserve_whitespace = false; sorted = true; }; local function new(user_config) local config = setmetatable({}, { __index = function (_, k) return user_config[k] or default_config[k]; end }); local style_attrk = getstyle("yellow"); local style_attrv = getstyle("red"); local style_tagname = getstyle("red"); local style_punc = getstyle("magenta"); local pairs = user_config.sorted and sorted_pairs or unsorted_pairs; local attr_format = " "..getstring(style_attrk, "%s")..getstring(style_punc, "=")..getstring(style_attrv, "'%s'"); local open_tag_format = getstring(style_punc, "<")..getstring(style_tagname, "%s").."%s"..getstring(style_punc, ">"); local close_tag_format = getstring(style_punc, "</")..getstring(style_tagname, "%s")..getstring(style_punc, ">"); local tag_format = open_tag_format.."%s"..close_tag_format; local short_close_tag_format = getstring(style_punc, "<")..getstring(style_tagname, "%s").."%s"..getstring(style_punc, "/>"); local function pretty_print(t, ind) ind = ind or config.indent; local children_text = ""; for i, child in ipairs(t) do if type(child) == "string" then if config.preserve_whitespace or child:match("%S") then children_text = children_text .. "\n"..string.rep(" ", ind) .. xml_escape(child); end else children_text = children_text .. "\n" .. pretty_print(child, ind+config.indent); end end local attr_string = ""; if t.attr then for k, v in pairs(t.attr) do if type(k) == "string" then attr_string = attr_string .. s_format(attr_format, k, tostring(v)); end end end local use_tag_format = tag_format; if #t == 0 then use_tag_format = short_close_tag_format; end if children_text ~= "" then children_text = children_text .. "\n" .. string.rep(" ", ind); end return string.rep(" ", ind)..s_format(use_tag_format, t.name, attr_string, children_text, t.name); end return function (s, ind) local doc = xml.parse(s); if not doc then return s; -- Not valid XML, don't prettify end return pretty_print(doc, ind); end end return { new = new; }