1 #!/usr/local/bin/lua |
1 #!/usr/local/bin/lua |
2 |
2 |
3 local lom = require "lxp.lom" |
3 local lom = require "lxp.lom" |
4 |
4 |
|
5 local u_acute_utf8 = string.char(195)..string.char(186) -- C3 BA |
|
6 local u_acute_latin1 = string.char(250) -- FA |
|
7 |
5 local tests = { |
8 local tests = { |
6 [[<abc a1="A1" a2="A2">inside tag `abc'</abc>]], |
9 { |
7 [[<qwerty q1="q1" q2="q2"> |
10 [[<abc a1="A1" a2="A2">inside tag `abc'</abc>]], |
|
11 { |
|
12 tag="abc", |
|
13 attr = { "a1", "a2", a1 = "A1", a2 = "A2", }, |
|
14 "inside tag `abc'", |
|
15 }, |
|
16 }, |
|
17 { |
|
18 [[<qwerty q1="q1" q2="q2"> |
8 <asdf>some text</asdf> |
19 <asdf>some text</asdf> |
9 </qwerty>]], |
20 </qwerty>]], |
|
21 { |
|
22 tag = "qwerty", |
|
23 attr = { "q1", "q2", q1 = "q1", q2 = "q2", }, |
|
24 "\n\t", |
|
25 { |
|
26 tag = "asdf", |
|
27 attr = {}, |
|
28 "some text", |
|
29 }, |
|
30 "\n", |
|
31 }, |
|
32 }, |
|
33 { |
|
34 [[<ul><li>conteudo 1</li><li>conte]]..u_acute_utf8..[[do 2</li></ul>]], |
|
35 encoding = "UTF-8", |
|
36 { |
|
37 tag = "ul", |
|
38 attr = {}, |
|
39 { |
|
40 tag = "li", |
|
41 attr = {}, |
|
42 "conteudo 1", |
|
43 }, |
|
44 { |
|
45 tag = "li", |
|
46 attr = {}, |
|
47 "conteúdo 2", |
|
48 }, |
|
49 }, |
|
50 }, |
|
51 { |
|
52 [[<ul><li>Conteudo 1</li><li>Conte]]..u_acute_latin1..[[do 2</li><li>Conteúdo 3</li></ul>]], |
|
53 encoding = "ISO-8859-1", |
|
54 doctype = [[<!DOCTYPE test [<!ENTITY uacute "ú">]>]], -- Ok! |
|
55 { |
|
56 tag = "ul", |
|
57 attr = {}, |
|
58 { |
|
59 tag = "li", |
|
60 attr = {}, |
|
61 "Conteudo 1", |
|
62 }, |
|
63 { |
|
64 tag = "li", |
|
65 attr = {}, |
|
66 "Conteúdo 2", -- Latin-1 becomes UTF-8 |
|
67 }, |
|
68 { |
|
69 tag = "li", |
|
70 attr = {}, |
|
71 "Conteúdo 3", -- entity becomes a UTF-8 character |
|
72 }, |
|
73 }, |
|
74 }, |
|
75 { |
|
76 [[<ul><li>Conteúdo</li></ul>]], |
|
77 --doctype = [[<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">]], --> ignora as entidades |
|
78 --doctype = [[<!DOCTYPE html SYSTEM "about:legacy-compat">]], --> ignora as entidades |
|
79 --doctype = [[<!DOCTYPE html>]], --> undefined entity |
|
80 --doctype = [[<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN">]], --> sintax error |
|
81 --doctype = [[<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01//EN" SYSTEM "http://www.w3.org/TR/html4/strict.dtd">]], --> syntax error |
|
82 --doctype = [[<!DOCTYPE HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1//EN//HTML">]], --> syntax error |
|
83 --doctype = [[<!DOCTYPE HTMLlat1 PUBLIC "-//W3C//ENTITIES Latin 1 for XHTML//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml-lat1.ent">]], --> ignora entidades |
|
84 --doctype = [[<!DOCTYPE isolat1 PUBLIC "//W3C//ENTITIES Added Latin 1//EN//XML" "http://www.w3.org/2003/entities/2007/isolat1.ent">]], --> ignora entidades |
|
85 doctype = [[<!DOCTYPE test [<!ENTITY uacute "ú">]>]], -- Ok! |
|
86 encoding = "UTF-8", |
|
87 { |
|
88 tag = "ul", |
|
89 attr = {}, |
|
90 { |
|
91 tag = "li", |
|
92 attr = {}, |
|
93 "Conteúdo", -- entity becomes a UTF-8 character |
|
94 }, |
|
95 }, |
|
96 }, |
10 } |
97 } |
11 |
98 |
12 function table._tostring (tab, indent, spacing) |
99 function table.equal (t1, t2) |
13 local s = {} |
100 for nome, val in pairs (t1) do |
14 spacing = spacing or "" |
101 local tv = type(val) |
15 indent = indent or "\t" |
102 if tv == "table" then |
16 table.insert (s, "{\n") |
103 if type(t2[nome]) ~= "table" then |
17 for nome, val in pairs (tab) do |
104 return false, "Different types at entry `"..nome.."': t1."..nome.." is "..tv.." while t2."..nome.." is "..type(t2[nome]).." ["..tostring(t2[nome]).."]" |
18 table.insert (s, spacing..indent) |
105 else |
19 local t = type(nome) |
106 local ok, msg = table.equal (val, t2[nome]) |
20 if t == "string" then |
107 if not ok then |
21 table.insert (s, string.format ("[%q] = ", tostring (nome))) |
108 return false, "["..nome.."]\t"..tostring(val).." ~= "..tostring(t2[nome]).."; "..msg |
22 elseif t == "number" or t == "boolean" then |
109 end |
23 table.insert (s, string.format ("[%s] = ", tostring (nome))) |
110 end |
24 else |
111 else |
25 table.insert (s, t) |
112 if val ~= t2[nome] then |
26 end |
113 return false, "["..nome.."]\t["..tostring(val).."] ~= ["..tostring(t2[nome])..']' |
27 t = type(val) |
114 end |
28 if t == "string" or t == "number" then |
115 end |
29 table.insert (s, string.format ("%q", val)) |
116 end |
30 elseif t == "table" then |
117 return true |
31 table.insert (s, table._tostring (val, indent, spacing..indent)) |
|
32 else |
|
33 table.insert (s, t) |
|
34 end |
|
35 table.insert (s, ",\n") |
|
36 end |
|
37 table.insert (s, spacing.."}") |
|
38 return table.concat (s) |
|
39 end |
|
40 |
|
41 function table.print (tab, indent, spacing) |
|
42 io.write (table._tostring (tab, indent, spacing)) |
|
43 end |
118 end |
44 |
119 |
45 |
120 |
46 for i, s in ipairs(tests) do |
121 for i, s in ipairs(tests) do |
47 --s = string.gsub (s, "[\n\r\t]", "") |
122 io.write'.' |
48 local ds = assert (lom.parse ([[<?xml version="1.0" encoding="ISO-8859-1"?>]]..s)) |
123 local encoding = s.encoding or "ISO-8859-1" |
49 print(table._tostring(ds)) |
124 local header = [[<?xml version="1.0" encoding="]]..encoding..[["?>]]..(s.doctype or '') |
|
125 local doc = header..s[1] |
|
126 |
|
127 local o1 = assert (lom.parse (doc)) |
|
128 assert(table.equal (o1, s[2])) |
|
129 |
|
130 local o2 = assert (lom.parse (string.gmatch(doc, ".-%>"))) |
|
131 assert(table.equal (o2, s[2])) |
50 end |
132 end |
|
133 |
|
134 local o = assert (lom.parse ([[ |
|
135 <?xml version="1.0"?> |
|
136 <a1> |
|
137 <b1> |
|
138 <c1>t111</c1> |
|
139 <c2>t112</c2> |
|
140 </b1> |
|
141 <b2> |
|
142 <c1>t121</c1> |
|
143 <c2>t122</c2> |
|
144 </b2> |
|
145 </a1>]])) |
|
146 assert (o.tag == "a1") |
|
147 assert (o[1] == "\n\t") |
|
148 assert (o[2].tag == "b1") |
|
149 assert (o[2][2].tag == "c1") |
|
150 local c1 = lom.find_elem (o, "c1") |
|
151 assert (type(c1) == "table") |
|
152 assert (c1.tag == "c1") |
|
153 assert (c1[1] == "t111") |
|
154 local next_child = lom.list_children (o) |
|
155 assert (next_child().tag == "b1") |
|
156 assert (next_child().tag == "b2") |
|
157 assert (next_child() == nil) |
|
158 |
|
159 print"OK" |