tests/test-lom.lua

changeset 36
4a61f00ee916
parent 0
24d141cb2d1e
equal deleted inserted replaced
35:d2d0bc06eac2 36:4a61f00ee916
1 #!/usr/local/bin/lua 1 #!/usr/local/bin/lua
2 2
3 local lom = require "lxp.lom" 3 local lom = require "lxp.lom"
4 4
5 local u_acute_utf8 = string.char(195)..string.char(186) -- C3 BA
6 local u_acute_latin1 = string.char(250) -- FA
7
5 local tests = { 8 local tests = {
6 [[<abc a1="A1" a2="A2">inside tag `abc'</abc>]], 9 {
7 [[<qwerty q1="q1" q2="q2"> 10 [[<abc a1="A1" a2="A2">inside tag `abc'</abc>]],
11 {
12 tag="abc",
13 attr = { "a1", "a2", a1 = "A1", a2 = "A2", },
14 "inside tag `abc'",
15 },
16 },
17 {
18 [[<qwerty q1="q1" q2="q2">
8 <asdf>some text</asdf> 19 <asdf>some text</asdf>
9 </qwerty>]], 20 </qwerty>]],
21 {
22 tag = "qwerty",
23 attr = { "q1", "q2", q1 = "q1", q2 = "q2", },
24 "\n\t",
25 {
26 tag = "asdf",
27 attr = {},
28 "some text",
29 },
30 "\n",
31 },
32 },
33 {
34 [[<ul><li>conteudo 1</li><li>conte]]..u_acute_utf8..[[do 2</li></ul>]],
35 encoding = "UTF-8",
36 {
37 tag = "ul",
38 attr = {},
39 {
40 tag = "li",
41 attr = {},
42 "conteudo 1",
43 },
44 {
45 tag = "li",
46 attr = {},
47 "conteúdo 2",
48 },
49 },
50 },
51 {
52 [[<ul><li>Conteudo 1</li><li>Conte]]..u_acute_latin1..[[do 2</li><li>Conte&uacute;do 3</li></ul>]],
53 encoding = "ISO-8859-1",
54 doctype = [[<!DOCTYPE test [<!ENTITY uacute "&#250;">]>]], -- Ok!
55 {
56 tag = "ul",
57 attr = {},
58 {
59 tag = "li",
60 attr = {},
61 "Conteudo 1",
62 },
63 {
64 tag = "li",
65 attr = {},
66 "Conteúdo 2", -- Latin-1 becomes UTF-8
67 },
68 {
69 tag = "li",
70 attr = {},
71 "Conteúdo 3", -- entity becomes a UTF-8 character
72 },
73 },
74 },
75 {
76 [[<ul><li>Conte&uacute;do</li></ul>]],
77 --doctype = [[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">]], --> ignora as entidades
78 --doctype = [[<!DOCTYPE html SYSTEM "about:legacy-compat">]], --> ignora as entidades
79 --doctype = [[<!DOCTYPE html>]], --> undefined entity
80 --doctype = [[<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">]], --> sintax error
81 --doctype = [[<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" SYSTEM "http://www.w3.org/TR/html4/strict.dtd">]], --> syntax error
82 --doctype = [[<!DOCTYPE HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1//EN//HTML">]], --> syntax error
83 --doctype = [[<!DOCTYPE HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent">]], --> ignora entidades
84 --doctype = [[<!DOCTYPE isolat1 PUBLIC "//W3C//ENTITIES Added Latin 1//EN//XML" "http://www.w3.org/2003/entities/2007/isolat1.ent">]], --> ignora entidades
85 doctype = [[<!DOCTYPE test [<!ENTITY uacute "&#250;">]>]], -- Ok!
86 encoding = "UTF-8",
87 {
88 tag = "ul",
89 attr = {},
90 {
91 tag = "li",
92 attr = {},
93 "Conteúdo", -- entity becomes a UTF-8 character
94 },
95 },
96 },
10 } 97 }
11 98
12 function table._tostring (tab, indent, spacing) 99 function table.equal (t1, t2)
13 local s = {} 100 for nome, val in pairs (t1) do
14 spacing = spacing or "" 101 local tv = type(val)
15 indent = indent or "\t" 102 if tv == "table" then
16 table.insert (s, "{\n") 103 if type(t2[nome]) ~= "table" then
17 for nome, val in pairs (tab) do 104 return false, "Different types at entry `"..nome.."': t1."..nome.." is "..tv.." while t2."..nome.." is "..type(t2[nome]).." ["..tostring(t2[nome]).."]"
18 table.insert (s, spacing..indent) 105 else
19 local t = type(nome) 106 local ok, msg = table.equal (val, t2[nome])
20 if t == "string" then 107 if not ok then
21 table.insert (s, string.format ("[%q] = ", tostring (nome))) 108 return false, "["..nome.."]\t"..tostring(val).." ~= "..tostring(t2[nome]).."; "..msg
22 elseif t == "number" or t == "boolean" then 109 end
23 table.insert (s, string.format ("[%s] = ", tostring (nome))) 110 end
24 else 111 else
25 table.insert (s, t) 112 if val ~= t2[nome] then
26 end 113 return false, "["..nome.."]\t["..tostring(val).."] ~= ["..tostring(t2[nome])..']'
27 t = type(val) 114 end
28 if t == "string" or t == "number" then 115 end
29 table.insert (s, string.format ("%q", val)) 116 end
30 elseif t == "table" then 117 return true
31 table.insert (s, table._tostring (val, indent, spacing..indent))
32 else
33 table.insert (s, t)
34 end
35 table.insert (s, ",\n")
36 end
37 table.insert (s, spacing.."}")
38 return table.concat (s)
39 end
40
41 function table.print (tab, indent, spacing)
42 io.write (table._tostring (tab, indent, spacing))
43 end 118 end
44 119
45 120
46 for i, s in ipairs(tests) do 121 for i, s in ipairs(tests) do
47 --s = string.gsub (s, "[\n\r\t]", "") 122 io.write'.'
48 local ds = assert (lom.parse ([[<?xml version="1.0" encoding="ISO-8859-1"?>]]..s)) 123 local encoding = s.encoding or "ISO-8859-1"
49 print(table._tostring(ds)) 124 local header = [[<?xml version="1.0" encoding="]]..encoding..[["?>]]..(s.doctype or '')
125 local doc = header..s[1]
126
127 local o1 = assert (lom.parse (doc))
128 assert(table.equal (o1, s[2]))
129
130 local o2 = assert (lom.parse (string.gmatch(doc, ".-%>")))
131 assert(table.equal (o2, s[2]))
50 end 132 end
133
134 local o = assert (lom.parse ([[
135 <?xml version="1.0"?>
136 <a1>
137 <b1>
138 <c1>t111</c1>
139 <c2>t112</c2>
140 </b1>
141 <b2>
142 <c1>t121</c1>
143 <c2>t122</c2>
144 </b2>
145 </a1>]]))
146 assert (o.tag == "a1")
147 assert (o[1] == "\n\t")
148 assert (o[2].tag == "b1")
149 assert (o[2][2].tag == "c1")
150 local c1 = lom.find_elem (o, "c1")
151 assert (type(c1) == "table")
152 assert (c1.tag == "c1")
153 assert (c1[1] == "t111")
154 local next_child = lom.list_children (o)
155 assert (next_child().tag == "b1")
156 assert (next_child().tag == "b2")
157 assert (next_child() == nil)
158
159 print"OK"

mercurial