scansion/pretty.lua

Thu, 23 Mar 2023 15:12:30 +0000

author
Matthew Wild <mwild1@gmail.com>
date
Thu, 23 Mar 2023 15:12:30 +0000
changeset 174
662bd8c5ae28
parent 152
ba8219ac7484
child 178
e547ddf8b64d
permissions
-rw-r--r--

Serialize XML in a consistent order by default

This overrides all XML serialization to emit attributes in an ordered form, so
the XML will match across multiple runs. This can be useful for comparing
different runs, or even two stanzas printed in the same run (e.g. if there is
a mismatch).

require "verse"
local xml = require "scansion.xml";
local s_format, s_gsub = string.format, string.gsub;

local unsorted_pairs = pairs;
local sorted_pairs = require "util.iterators".sorted_pairs;

local escape_table = { ["'"] = "&apos;", ["\""] = "&quot;", ["<"] = "&lt;", [">"] = "&gt;", ["&"] = "&amp;" };
local function xml_escape(str) return (s_gsub(str, "['&<>\"]", escape_table)); end

-- Dummy functions compatible with util.termcolours,
-- just in case we add colour in the future
local function getstyle() return "" end
local function getstring(style, text)
	if not style then
		text = style;
	end
	return text;
end

local default_config = {
	indent = 2;
	preserve_whitespace = false;
	sorted = true;
};

local function new(user_config)
	local config = setmetatable({}, { __index = function (_, k) return user_config[k] or default_config[k]; end });
	local style_attrk = getstyle("yellow");
	local style_attrv = getstyle("red");
	local style_tagname = getstyle("red");
	local style_punc = getstyle("magenta");

	local pairs = user_config.sorted and sorted_pairs or unsorted_pairs;

	local attr_format = " "..getstring(style_attrk, "%s")..getstring(style_punc, "=")..getstring(style_attrv, "'%s'");
	local open_tag_format = getstring(style_punc, "<")..getstring(style_tagname, "%s").."%s"..getstring(style_punc, ">");
	local close_tag_format = getstring(style_punc, "</")..getstring(style_tagname, "%s")..getstring(style_punc, ">");
	local tag_format = open_tag_format.."%s"..close_tag_format;
	local short_close_tag_format = getstring(style_punc, "<")..getstring(style_tagname, "%s").."%s"..getstring(style_punc, "/>");
	local function pretty_print(t, ind)
		ind = ind or config.indent;
		local children_text = "";
		for i, child in ipairs(t) do
			if type(child) == "string" then
				if config.preserve_whitespace or child:match("%S") then
					children_text = children_text .. "\n"..string.rep(" ", ind) .. xml_escape(child);
				end
			else
				children_text = children_text .. "\n" .. pretty_print(child, ind+config.indent);
			end
		end

		local attr_string = "";
		if t.attr then
			for k, v in pairs(t.attr) do
				if type(k) == "string" then
					attr_string = attr_string .. s_format(attr_format, k, tostring(v));
				end
			end
		end

		local use_tag_format = tag_format;
		if #t == 0 then
			use_tag_format = short_close_tag_format;
		end
		if children_text ~= "" then
			children_text = children_text .. "\n" .. string.rep(" ", ind);
		end

		return string.rep(" ", ind)..s_format(use_tag_format, t.name, attr_string, children_text, t.name);
	end

	return function (s, ind)
		local doc = xml.parse(s);
		if not doc then
			return s; -- Not valid XML, don't prettify
		end
		return pretty_print(doc, ind);
	end
end

return {
	new = new;
}

mercurial