# HG changeset patch # User Matthew Wild # Date 1619208905 -3600 # Node ID 4a61f00ee916b099fefb39e372a5d36561fac756 # Parent d2d0bc06eac22bf5483fe9778028488c3ac68774 New implementation of lxp.lom by Tom?s Guisasola diff -r d2d0bc06eac2 -r 4a61f00ee916 src/lxp/lom.lua --- a/src/lxp/lom.lua Fri Apr 23 21:03:34 2021 +0100 +++ b/src/lxp/lom.lua Fri Apr 23 21:15:05 2021 +0100 @@ -1,12 +1,13 @@ -- See Copyright Notice in license.html --- $Id: lom.lua,v 1.6 2005/06/09 19:18:40 tuler Exp $ local lxp = require "lxp" +local table = require"table" local tinsert, tremove = table.insert, table.remove -local assert, type, print = assert, type, print +local assert, pairs, type = assert, pairs, type +-- auxiliary functions ------------------------------------------------------- local function starttag (p, tag, attr) local stack = p:getcallbacks().stack local newelement = {tag = tag, attr = attr} @@ -32,28 +33,76 @@ end end +-- main function ------------------------------------------------------------- local function parse (o) - local c = { StartElement = starttag, - EndElement = endtag, - CharacterData = text, - _nonstrict = true, - stack = {{}} - } - local p = lxp.new(c) - local status, err - if type(o) == "string" then - status, err = p:parse(o) - if not status then return nil, err end - else - for l in pairs(o) do - status, err = p:parse(l) - if not status then return nil, err end - end - end - status, err = p:parse() - if not status then return nil, err end - p:close() - return c.stack[1][1] + local c = { StartElement = starttag, + EndElement = endtag, + CharacterData = text, + _nonstrict = true, + stack = {{}} + } + local p = lxp.new(c) + local to = type(o) + if to == "string" then + local status, err, line, col, pos = p:parse(o) + if not status then return nil, err, line, col, pos end + else + local iter, state, init + if to == "table" then + iter, state, init = pairs(o) + elseif to == "function" then + iter = o + elseif to == "userdata" and o.read then + iter, state = o.read, o + else + error ("Bad argument #1 to parse: expected a string, a table, a function or a file, but got "..to, 2) + end + for l in iter, state, init do + local status, err, line, col, pos = p:parse(l) + if not status then return nil, err, line, col, pos end + end + end + local status, err, line, col, pos = p:parse() -- close document + if not status then return nil, err, line, col, pos end + p:close() + return c.stack[1][1] end -return { parse = parse } +-- utility functions --------------------------------------------------------- +local function find_elem (self, tag) + if self.tag == tag then + return self + end + for i = 1, #self do + local v = self[i] + if type(v) == "table" then + local found = find_elem (v, tag) + if found then + return found + end + end + end + return nil +end + +local function list_children (self, tag) + local i = 0 + return function () + i = i+1 + local v = self[i] + while v do + if type (v) == "table" and (tag == nil or tag == v.tag) then + return v + end + i = i+1 + v = self[i] + end + return nil + end +end + +return { + find_elem = find_elem, + list_children = list_children, + parse = parse, +} diff -r d2d0bc06eac2 -r 4a61f00ee916 tests/test-lom.lua --- a/tests/test-lom.lua Fri Apr 23 21:03:34 2021 +0100 +++ b/tests/test-lom.lua Fri Apr 23 21:15:05 2021 +0100 @@ -2,49 +2,158 @@ local lom = require "lxp.lom" +local u_acute_utf8 = string.char(195)..string.char(186) -- C3 BA +local u_acute_latin1 = string.char(250) -- FA + local tests = { - [[inside tag `abc']], - [[ + { + [[inside tag `abc']], + { + tag="abc", + attr = { "a1", "a2", a1 = "A1", a2 = "A2", }, + "inside tag `abc'", + }, + }, + { + [[ some text ]], + { + tag = "qwerty", + attr = { "q1", "q2", q1 = "q1", q2 = "q2", }, + "\n\t", + { + tag = "asdf", + attr = {}, + "some text", + }, + "\n", + }, + }, + { + [[]], + encoding = "UTF-8", + { + tag = "ul", + attr = {}, + { + tag = "li", + attr = {}, + "conteudo 1", + }, + { + tag = "li", + attr = {}, + "conteúdo 2", + }, + }, + }, + { + [[]], + encoding = "ISO-8859-1", + doctype = [[]>]], -- Ok! + { + tag = "ul", + attr = {}, + { + tag = "li", + attr = {}, + "Conteudo 1", + }, + { + tag = "li", + attr = {}, + "Conteúdo 2", -- Latin-1 becomes UTF-8 + }, + { + tag = "li", + attr = {}, + "Conteúdo 3", -- entity becomes a UTF-8 character + }, + }, + }, + { + [[]], + --doctype = [[]], --> ignora as entidades + --doctype = [[]], --> ignora as entidades + --doctype = [[]], --> undefined entity + --doctype = [[]], --> sintax error + --doctype = [[]], --> syntax error + --doctype = [[]], --> syntax error + --doctype = [[]], --> ignora entidades + --doctype = [[]], --> ignora entidades + doctype = [[]>]], -- Ok! + encoding = "UTF-8", + { + tag = "ul", + attr = {}, + { + tag = "li", + attr = {}, + "Conteúdo", -- entity becomes a UTF-8 character + }, + }, + }, } -function table._tostring (tab, indent, spacing) - local s = {} - spacing = spacing or "" - indent = indent or "\t" - table.insert (s, "{\n") - for nome, val in pairs (tab) do - table.insert (s, spacing..indent) - local t = type(nome) - if t == "string" then - table.insert (s, string.format ("[%q] = ", tostring (nome))) - elseif t == "number" or t == "boolean" then - table.insert (s, string.format ("[%s] = ", tostring (nome))) - else - table.insert (s, t) - end - t = type(val) - if t == "string" or t == "number" then - table.insert (s, string.format ("%q", val)) - elseif t == "table" then - table.insert (s, table._tostring (val, indent, spacing..indent)) - else - table.insert (s, t) - end - table.insert (s, ",\n") - end - table.insert (s, spacing.."}") - return table.concat (s) -end - -function table.print (tab, indent, spacing) - io.write (table._tostring (tab, indent, spacing)) +function table.equal (t1, t2) + for nome, val in pairs (t1) do + local tv = type(val) + if tv == "table" then + if type(t2[nome]) ~= "table" then + return false, "Different types at entry `"..nome.."': t1."..nome.." is "..tv.." while t2."..nome.." is "..type(t2[nome]).." ["..tostring(t2[nome]).."]" + else + local ok, msg = table.equal (val, t2[nome]) + if not ok then + return false, "["..nome.."]\t"..tostring(val).." ~= "..tostring(t2[nome]).."; "..msg + end + end + else + if val ~= t2[nome] then + return false, "["..nome.."]\t["..tostring(val).."] ~= ["..tostring(t2[nome])..']' + end + end + end + return true end for i, s in ipairs(tests) do - --s = string.gsub (s, "[\n\r\t]", "") - local ds = assert (lom.parse ([[]]..s)) - print(table._tostring(ds)) + io.write'.' + local encoding = s.encoding or "ISO-8859-1" + local header = [[]]..(s.doctype or '') + local doc = header..s[1] + + local o1 = assert (lom.parse (doc)) + assert(table.equal (o1, s[2])) + + local o2 = assert (lom.parse (string.gmatch(doc, ".-%>"))) + assert(table.equal (o2, s[2])) end + +local o = assert (lom.parse ([[ + + + + t111 + t112 + + + t121 + t122 + +]])) +assert (o.tag == "a1") +assert (o[1] == "\n\t") +assert (o[2].tag == "b1") +assert (o[2][2].tag == "c1") +local c1 = lom.find_elem (o, "c1") +assert (type(c1) == "table") +assert (c1.tag == "c1") +assert (c1[1] == "t111") +local next_child = lom.list_children (o) +assert (next_child().tag == "b1") +assert (next_child().tag == "b2") +assert (next_child() == nil) + +print"OK"