Add lxp.totable by Tom?s Guisasola

Fri, 23 Apr 2021 21:18:24 +0100

author
Matthew Wild <mwild1@gmail.com>
date
Fri, 23 Apr 2021 21:18:24 +0100
changeset 37
233463804681
parent 36
4a61f00ee916
child 38
1ad8000fedc0

Add lxp.totable by Tom?s Guisasola

src/lxp/totable.lua file | annotate | diff | comparison | revisions
tests/test-table.lua file | annotate | diff | comparison | revisions
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/lxp/totable.lua	Fri Apr 23 21:18:24 2021 +0100
@@ -0,0 +1,117 @@
+-- See Copyright Notice in license.html
+-- Based on Luiz Henrique de Figueiredo's lxml:
+-- http://www.tecgraf.puc-rio.br/~lhf/ftp/lua/#lxml
+
+local lxp = require "lxp"
+
+local table = require"table"
+local tinsert, tremove = table.insert, table.remove
+local assert, pairs, tostring, type = assert, pairs, tostring, type
+
+-- auxiliary functions -------------------------------------------------------
+local function starttag (p, tag, attr)
+	local stack = p:getcallbacks().stack
+	local newelement = {[0] = tag}
+	for i = 1, #attr do
+		local attrname = attr[i]
+		local attrvalue = attr[attrname]
+		newelement[attrname] = attrvalue
+	end
+	tinsert(stack, newelement)
+end
+
+local function endtag (p, tag)
+	local stack = p:getcallbacks().stack
+	local element = tremove(stack)
+	assert(element[0] == tag, "Error while closing element: table[0] should be `"..tostring(tag).."' but is `"..tostring(element[0]).."'")
+	local level = #stack
+	tinsert(stack[level], element)
+end
+
+local function text (p, txt)
+	local stack = p:getcallbacks().stack
+	local element = stack[#stack]
+	local n = #element
+	if type(element[n]) == "string" and n > 0 then
+		element[n] = element[n] .. txt
+	else
+		tinsert(element, txt)
+	end
+end
+
+-- main function -------------------------------------------------------------
+local function parse (o)
+	local c = {
+		StartElement = starttag,
+		EndElement = endtag,
+		CharacterData = text,
+		_nonstrict = true,
+		stack = {{}},
+	}
+	local p = lxp.new(c)
+	if type(o) == "string" then
+		local status, err, line, col, pos = p:parse(o)
+		if not status then return nil, err, line, col, pos end
+	else
+		for l in pairs(o) do
+			local status, err, line, col, pos = p:parse(l)
+			if not status then return nil, err, line, col, pos end
+		end
+	end
+	local status, err, line, col, pos = p:parse() -- close document
+	if not status then return nil, err, line, col, pos end
+	p:close()
+	return c.stack[1][1]
+end
+
+-- utility functions ---------------------------------------------------------
+local function compact (t) -- remove empty entries
+	local n = 0
+	for i = 1, #t do
+		local v = t[i]
+		if v then
+			n = n+1
+			if n ~= i then
+				t[n] = v
+				t[i] = nil
+			end
+		else
+			t[i] = nil
+		end
+	end
+end
+
+local function clean (t) -- remove empty strings
+	for i = 1, #t do
+		local v = t[i]
+		local tv = type(v)
+		if tv == "table" then
+			clean (v)
+		elseif tv == "string" and v:match"^%s*$" then
+			t[i] = false
+		end
+	end
+	compact (t)
+end
+
+local function torecord (t) -- move 1-value subtables to table entries
+	for i = 1, #t do
+		local v = t[i]
+		if type(v) == "table" then
+			if #v == 1 and type(v[1]) == "string" and t[v[0]] == nil then
+				t[v[0]] = v[1]
+				t[i] = false
+			else
+				torecord (v)
+			end
+		end
+	end
+	compact (t)
+end
+
+return {
+	clean = clean,
+	compact = compact,
+	parse = parse,
+	torecord = torecord,
+}
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tests/test-table.lua	Fri Apr 23 21:18:24 2021 +0100
@@ -0,0 +1,278 @@
+#!/usr/local/bin/lua
+
+local totable = require "lxp.totable"
+
+local tests = {
+	{
+		[[<abc a1="A1" a2="A2">inside tag `abc'</abc>]],
+		{
+			[0] = "abc",
+			a1 = "A1",
+			a2 = "A2",
+			"inside tag `abc'",
+		},
+	},
+	{
+		[[<qwerty q1="q1" q2="q2">
+	<asdf>some text</asdf>
+</qwerty>]],
+		{
+			[0] = "qwerty",
+			q1 = "q1",
+			q2 = "q2",
+			"\n\t",
+			{
+				[0] = "asdf",
+				"some text",
+			},
+			"\n",
+		},
+	},
+	{
+		[[
+<!-- http://www.w3schools.com/xml/simple.xml -->
+<breakfast_menu>
+	<food>
+		<name>Belgian Waffles</name>
+		<price>$5.95</price>
+		<description>Two of our famous Belgian Waffles with plenty of real maple syrup</description>
+		<calories>650</calories>
+	</food>
+	<food>
+		<name>Strawberry Belgian Waffles</name>
+		<price>$7.95</price>
+		<description>Light Belgian waffles covered with strawberries and whipped cream</description>
+		<calories>900</calories>
+	</food>
+	<food>
+		<name>Berry-Berry Belgian Waffles</name>
+		<price>$8.95</price>
+		<description>Light Belgian waffles covered with an assortment of fresh berries and whipped cream</description>
+		<calories>900</calories>
+	</food>
+	<food>
+		<name>French Toast</name>
+		<price>$4.50</price>
+		<description>Thick slices made from our homemade sourdough bread</description>
+		<calories>600</calories>
+	</food>
+	<food>
+		<name>Homestyle Breakfast</name>
+		<price>$6.95</price>
+		<description>Two eggs, bacon or sausage, toast, and our ever-popular hash browns</description>
+		<calories>950</calories>
+	</food>
+</breakfast_menu>]],
+		{
+			[0] = "breakfast_menu",
+			[1] = "\n\t",
+			[2] = {
+				[0] = "food",
+				[1] = "\n\t\t",
+				[2] = { [0] = "name", [1] = "Belgian Waffles", },
+				[3] = "\n\t\t",
+				[4] = { [0] = "price", [1] = "$5.95", },
+				[5] = "\n\t\t",
+				[6] = {
+					[0] = "description",
+					[1] = "Two of our famous Belgian Waffles with plenty of real maple syrup",
+				},
+				[7] = "\n\t\t",
+				[8] = { [0] = "calories", [1] = "650", },
+				[9] = "\n\t",
+			},
+			[3] = "\n\t",
+			[4] = {
+				[0] = "food",
+				[1] = "\n\t\t",
+				[2] = { [0] = "name", [1] = "Strawberry Belgian Waffles", },
+				[3] = "\n\t\t",
+				[4] = { [0] = "price", [1] = "$7.95", },
+				[5] = "\n\t\t",
+				[6] = {
+					[0] = "description",
+					[1] = "Light Belgian waffles covered with strawberries and whipped cream",
+				},
+				[7] = "\n\t\t",
+				[8] = { [0] = "calories", [1] = "900", },
+				[9] = "\n\t",
+			},
+			[5] = "\n\t",
+			[6] = {
+				[0] = "food",
+				[1] = "\n\t\t",
+				[2] = { [0] = "name", [1] = "Berry-Berry Belgian Waffles", },
+				[3] = "\n\t\t",
+				[4] = { [0] = "price", [1] = "$8.95", },
+				[5] = "\n\t\t",
+				[6] = {
+					[0] = "description",
+					[1] = "Light Belgian waffles covered with an assortment of fresh berries and whipped cream",
+				},
+				[7] = "\n\t\t",
+				[8] = { [0] = "calories", [1] = "900", },
+				[9] = "\n\t",
+			},
+			[7] = "\n\t",
+			[8] = {
+				[0] = "food",
+				[1] = "\n\t\t",
+				[2] = { [0] = "name", [1] = "French Toast", },
+				[3] = "\n\t\t",
+				[4] = { [0] = "price", [1] = "$4.50", },
+				[5] = "\n\t\t",
+				[6] = {
+					[0] = "description",
+					[1] = "Thick slices made from our homemade sourdough bread",
+				},
+				[7] = "\n\t\t",
+				[8] = { [0] = "calories", [1] = "600", },
+				[9] = "\n\t",
+			},
+			[9] = "\n\t",
+			[10] = {
+				[0] = "food",
+				[1] = "\n\t\t",
+				[2] = { [0] = "name", [1] = "Homestyle Breakfast", },
+				[3] = "\n\t\t",
+				[4] = { [0] = "price", [1] = "$6.95", },
+				[5] = "\n\t\t",
+				[6] = {
+					[0] = "description",
+					[1] = "Two eggs, bacon or sausage, toast, and our ever-popular hash browns",
+				},
+				[7] = "\n\t\t",
+				[8] = { [0] = "calories", [1] = "950", },
+				[9] = "\n\t",
+			},
+			[11] = "\n",
+		},
+		clean = {
+			[0] = "breakfast_menu",
+			[1] = {
+				[0] = "food",
+				[1] = { [0] = "name", [1] = "Belgian Waffles", },
+				[2] = { [0] = "price", [1] = "$5.95", },
+				[3] = {
+					[0] = "description",
+					[1] = "Two of our famous Belgian Waffles with plenty of real maple syrup",
+				},
+				[4] = { [0] = "calories", [1] = "650", },
+			},
+			[2] = {
+				[0] = "food",
+				[1] = { [0] = "name", [1] = "Strawberry Belgian Waffles", },
+				[2] = { [0] = "price", [1] = "$7.95", },
+				[3] = {
+					[0] = "description",
+					[1] = "Light Belgian waffles covered with strawberries and whipped cream",
+				},
+				[4] = { [0] = "calories", [1] = "900", },
+			},
+			[3] = {
+				[0] = "food",
+				[1] = { [0] = "name", [1] = "Berry-Berry Belgian Waffles", },
+				[2] = { [0] = "price", [1] = "$8.95", },
+				[3] = {
+					[0] = "description",
+					[1] = "Light Belgian waffles covered with an assortment of fresh berries and whipped cream",
+				},
+				[4] = { [0] = "calories", [1] = "900", },
+			},
+			[4] = {
+				[0] = "food",
+				[1] = { [0] = "name", [1] = "French Toast", },
+				[2] = { [0] = "price", [1] = "$4.50", },
+				[3] = {
+					[0] = "description",
+					[1] = "Thick slices made from our homemade sourdough bread",
+				},
+				[4] = { [0] = "calories", [1] = "600", },
+			},
+			[5] = {
+				[0] = "food",
+				[1] = { [0] = "name", [1] = "Homestyle Breakfast", },
+				[2] = { [0] = "price", [1] = "$6.95", },
+				[3] = {
+					[0] = "description",
+					[1] = "Two eggs, bacon or sausage, toast, and our ever-popular hash browns",
+				},
+				[4] = { [0] = "calories", [1] = "950", },
+			},
+		},
+		torecord = {
+			[0] = "breakfast_menu",
+			[1] = {
+				[0] = "food",
+				name = "Belgian Waffles",
+				price = "$5.95",
+				description = "Two of our famous Belgian Waffles with plenty of real maple syrup",
+				calories = "650",
+			},
+			[2] = {
+				[0] = "food",
+				name = "Strawberry Belgian Waffles",
+				price = "$7.95",
+				description = "Light Belgian waffles covered with strawberries and whipped cream",
+				calories = "900",
+			},
+			[3] = {
+				[0] = "food",
+				name = "Berry-Berry Belgian Waffles",
+				price = "$8.95",
+				description = "Light Belgian waffles covered with an assortment of fresh berries and whipped cream",
+				calories = "900",
+			},
+			[4] = {
+				[0] = "food",
+				name = "French Toast",
+				price = "$4.50",
+				description = "Thick slices made from our homemade sourdough bread",
+				calories = "600",
+			},
+			[5] = {
+				[0] = "food",
+				name = "Homestyle Breakfast",
+				price = "$6.95",
+				description = "Two eggs, bacon or sausage, toast, and our ever-popular hash browns",
+				calories = "950",
+			},
+		},
+	},
+}
+
+
+function table.equal (t1, t2)
+	for nome, val in pairs (t1) do
+		local tv = type(val)
+		if tv == "table" then
+			if type(t2[nome]) ~= "table" then
+				return false, "Different types at entry `"..nome.."': t1."..nome.." is "..tv.." while t2."..nome.." is "..type(t2[nome]).." ["..tostring(t2[nome]).."]"
+			else
+				local ok, msg = table.equal (val, t2[nome])
+				if not ok then
+					return false, "["..nome.."]\t"..tostring(val).." ~= "..tostring(t2[nome]).."; "..msg
+				end
+			end
+		else
+			if val ~= t2[nome] then
+				return false, "["..nome.."]\t"..tostring(val).." ~= "..tostring(t2[nome])
+			end
+		end
+	end
+	return true
+end
+
+
+for i, s in ipairs(tests) do
+	local ds = assert (totable.parse ([[<?xml version="1.0" encoding="ISO-8859-1"?>]]..s[1]))
+	assert(table.equal (ds, s[2]))
+end
+
+local t = totable.parse ([[<?xml version="1.0" encoding="ISO-8859-1"?>]]..tests[3][1])
+totable.clean (t)
+assert (table.equal (t, tests[3].clean))
+totable.torecord (t)
+assert (table.equal (t, tests[3].torecord))
+
+print"OK"

mercurial