|
1 -- See Copyright Notice in license.html |
|
2 -- Based on Luiz Henrique de Figueiredo's lxml: |
|
3 -- http://www.tecgraf.puc-rio.br/~lhf/ftp/lua/#lxml |
|
4 |
|
5 local lxp = require "lxp" |
|
6 |
|
7 local table = require"table" |
|
8 local tinsert, tremove = table.insert, table.remove |
|
9 local assert, pairs, tostring, type = assert, pairs, tostring, type |
|
10 |
|
11 -- auxiliary functions ------------------------------------------------------- |
|
12 local function starttag (p, tag, attr) |
|
13 local stack = p:getcallbacks().stack |
|
14 local newelement = {[0] = tag} |
|
15 for i = 1, #attr do |
|
16 local attrname = attr[i] |
|
17 local attrvalue = attr[attrname] |
|
18 newelement[attrname] = attrvalue |
|
19 end |
|
20 tinsert(stack, newelement) |
|
21 end |
|
22 |
|
23 local function endtag (p, tag) |
|
24 local stack = p:getcallbacks().stack |
|
25 local element = tremove(stack) |
|
26 assert(element[0] == tag, "Error while closing element: table[0] should be `"..tostring(tag).."' but is `"..tostring(element[0]).."'") |
|
27 local level = #stack |
|
28 tinsert(stack[level], element) |
|
29 end |
|
30 |
|
31 local function text (p, txt) |
|
32 local stack = p:getcallbacks().stack |
|
33 local element = stack[#stack] |
|
34 local n = #element |
|
35 if type(element[n]) == "string" and n > 0 then |
|
36 element[n] = element[n] .. txt |
|
37 else |
|
38 tinsert(element, txt) |
|
39 end |
|
40 end |
|
41 |
|
42 -- main function ------------------------------------------------------------- |
|
43 local function parse (o) |
|
44 local c = { |
|
45 StartElement = starttag, |
|
46 EndElement = endtag, |
|
47 CharacterData = text, |
|
48 _nonstrict = true, |
|
49 stack = {{}}, |
|
50 } |
|
51 local p = lxp.new(c) |
|
52 if type(o) == "string" then |
|
53 local status, err, line, col, pos = p:parse(o) |
|
54 if not status then return nil, err, line, col, pos end |
|
55 else |
|
56 for l in pairs(o) do |
|
57 local status, err, line, col, pos = p:parse(l) |
|
58 if not status then return nil, err, line, col, pos end |
|
59 end |
|
60 end |
|
61 local status, err, line, col, pos = p:parse() -- close document |
|
62 if not status then return nil, err, line, col, pos end |
|
63 p:close() |
|
64 return c.stack[1][1] |
|
65 end |
|
66 |
|
67 -- utility functions --------------------------------------------------------- |
|
68 local function compact (t) -- remove empty entries |
|
69 local n = 0 |
|
70 for i = 1, #t do |
|
71 local v = t[i] |
|
72 if v then |
|
73 n = n+1 |
|
74 if n ~= i then |
|
75 t[n] = v |
|
76 t[i] = nil |
|
77 end |
|
78 else |
|
79 t[i] = nil |
|
80 end |
|
81 end |
|
82 end |
|
83 |
|
84 local function clean (t) -- remove empty strings |
|
85 for i = 1, #t do |
|
86 local v = t[i] |
|
87 local tv = type(v) |
|
88 if tv == "table" then |
|
89 clean (v) |
|
90 elseif tv == "string" and v:match"^%s*$" then |
|
91 t[i] = false |
|
92 end |
|
93 end |
|
94 compact (t) |
|
95 end |
|
96 |
|
97 local function torecord (t) -- move 1-value subtables to table entries |
|
98 for i = 1, #t do |
|
99 local v = t[i] |
|
100 if type(v) == "table" then |
|
101 if #v == 1 and type(v[1]) == "string" and t[v[0]] == nil then |
|
102 t[v[0]] = v[1] |
|
103 t[i] = false |
|
104 else |
|
105 torecord (v) |
|
106 end |
|
107 end |
|
108 end |
|
109 compact (t) |
|
110 end |
|
111 |
|
112 return { |
|
113 clean = clean, |
|
114 compact = compact, |
|
115 parse = parse, |
|
116 torecord = torecord, |
|
117 } |