tests/test-lom.lua

changeset 36
4a61f00ee916
parent 0
24d141cb2d1e
--- a/tests/test-lom.lua	Fri Apr 23 21:03:34 2021 +0100
+++ b/tests/test-lom.lua	Fri Apr 23 21:15:05 2021 +0100
@@ -2,49 +2,158 @@
 
 local lom = require "lxp.lom"
 
+local u_acute_utf8 = string.char(195)..string.char(186) -- C3 BA
+local u_acute_latin1 = string.char(250) -- FA
+
 local tests = {
-	[[<abc a1="A1" a2="A2">inside tag `abc'</abc>]],
-	[[<qwerty q1="q1" q2="q2">
+	{
+		[[<abc a1="A1" a2="A2">inside tag `abc'</abc>]],
+		{
+			tag="abc",
+			attr = { "a1", "a2", a1 = "A1", a2 = "A2", },
+			"inside tag `abc'",
+		},
+	},
+	{
+		[[<qwerty q1="q1" q2="q2">
 	<asdf>some text</asdf>
 </qwerty>]],
+		{
+			tag = "qwerty",
+			attr = { "q1", "q2", q1 = "q1", q2 = "q2", },
+			"\n\t",
+			{
+				tag = "asdf",
+				attr = {},
+				"some text",
+			},
+			"\n",
+		},
+	},
+	{
+		[[<ul><li>conteudo 1</li><li>conte]]..u_acute_utf8..[[do 2</li></ul>]],
+		encoding = "UTF-8",
+		{
+			tag = "ul",
+			attr = {},
+			{
+				tag = "li",
+				attr = {},
+				"conteudo 1",
+			},
+			{
+				tag = "li",
+				attr = {},
+				"conteúdo 2",
+			},
+		},
+	},
+	{
+		[[<ul><li>Conteudo 1</li><li>Conte]]..u_acute_latin1..[[do 2</li><li>Conte&uacute;do 3</li></ul>]],
+		encoding = "ISO-8859-1",
+		doctype = [[<!DOCTYPE test [<!ENTITY uacute "&#250;">]>]], -- Ok!
+		{
+			tag = "ul",
+			attr = {},
+			{
+				tag = "li",
+				attr = {},
+				"Conteudo 1",
+			},
+			{
+				tag = "li",
+				attr = {},
+				"Conteúdo 2", -- Latin-1 becomes UTF-8
+			},
+			{
+				tag = "li",
+				attr = {},
+				"Conteúdo 3", -- entity becomes a UTF-8 character
+			},
+		},
+	},
+	{
+		[[<ul><li>Conte&uacute;do</li></ul>]],
+		--doctype = [[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">]], --> ignora as entidades
+		--doctype = [[<!DOCTYPE html SYSTEM "about:legacy-compat">]], --> ignora as entidades
+		--doctype = [[<!DOCTYPE html>]], --> undefined entity
+		--doctype = [[<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">]], --> sintax error
+		--doctype = [[<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" SYSTEM "http://www.w3.org/TR/html4/strict.dtd">]], --> syntax error
+		--doctype = [[<!DOCTYPE HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1//EN//HTML">]], --> syntax error
+		--doctype = [[<!DOCTYPE HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent">]], --> ignora entidades
+		--doctype = [[<!DOCTYPE isolat1 PUBLIC "//W3C//ENTITIES Added Latin 1//EN//XML" "http://www.w3.org/2003/entities/2007/isolat1.ent">]], --> ignora entidades
+		doctype = [[<!DOCTYPE test [<!ENTITY uacute "&#250;">]>]], -- Ok!
+		encoding = "UTF-8",
+		{
+			tag = "ul",
+			attr = {},
+			{
+				tag = "li",
+				attr = {},
+				"Conteúdo", -- entity becomes a UTF-8 character
+			},
+		},
+	},
 }
 
-function table._tostring (tab, indent, spacing)
-	local s = {}
-	spacing = spacing or ""
-	indent = indent or "\t"
-    table.insert (s, "{\n")
-    for nome, val in pairs (tab) do
-        table.insert (s, spacing..indent)
-        local t = type(nome)
-		if t == "string" then
-            table.insert (s, string.format ("[%q] = ", tostring (nome)))
-		elseif t == "number" or t == "boolean" then
-            table.insert (s, string.format ("[%s] = ", tostring (nome)))
-        else
-            table.insert (s, t)
-        end
-        t = type(val)
-        if t == "string" or t == "number" then
-            table.insert (s, string.format ("%q", val))
-        elseif t == "table" then
-            table.insert (s, table._tostring (val, indent, spacing..indent))
-        else
-            table.insert (s, t)
-        end
-        table.insert (s, ",\n")
-    end
-    table.insert (s, spacing.."}")
-	return table.concat (s)
-end
-
-function table.print (tab, indent, spacing)
-	io.write (table._tostring (tab, indent, spacing))
+function table.equal (t1, t2)
+	for nome, val in pairs (t1) do
+		local tv = type(val)
+		if tv == "table" then
+			if type(t2[nome]) ~= "table" then
+				return false, "Different types at entry `"..nome.."': t1."..nome.." is "..tv.." while t2."..nome.." is "..type(t2[nome]).." ["..tostring(t2[nome]).."]"
+			else
+				local ok, msg = table.equal (val, t2[nome])
+				if not ok then
+					return false, "["..nome.."]\t"..tostring(val).." ~= "..tostring(t2[nome]).."; "..msg
+				end
+			end
+		else
+			if val ~= t2[nome] then
+				return false, "["..nome.."]\t["..tostring(val).."] ~= ["..tostring(t2[nome])..']'
+			end
+		end
+	end
+	return true
 end
 
 
 for i, s in ipairs(tests) do
-	--s = string.gsub (s, "[\n\r\t]", "")
-	local ds = assert (lom.parse ([[<?xml version="1.0" encoding="ISO-8859-1"?>]]..s))
-	print(table._tostring(ds))
+	io.write'.'
+	local encoding = s.encoding or "ISO-8859-1"
+	local header = [[<?xml version="1.0" encoding="]]..encoding..[["?>]]..(s.doctype or '')
+	local doc = header..s[1]
+
+	local o1 = assert (lom.parse (doc))
+	assert(table.equal (o1, s[2]))
+
+	local o2 = assert (lom.parse (string.gmatch(doc, ".-%>")))
+	assert(table.equal (o2, s[2]))
 end
+
+local o = assert (lom.parse ([[
+<?xml version="1.0"?>
+<a1>
+	<b1>
+		<c1>t111</c1>
+		<c2>t112</c2>
+	</b1>
+	<b2>
+		<c1>t121</c1>
+		<c2>t122</c2>
+	</b2>
+</a1>]]))
+assert (o.tag == "a1")
+assert (o[1] == "\n\t")
+assert (o[2].tag == "b1")
+assert (o[2][2].tag == "c1")
+local c1 = lom.find_elem (o, "c1")
+assert (type(c1) == "table")
+assert (c1.tag == "c1")
+assert (c1[1] == "t111")
+local next_child = lom.list_children (o)
+assert (next_child().tag == "b1")
+assert (next_child().tag == "b2")
+assert (next_child() == nil)
+
+print"OK"

mercurial