|
1 |
|
2 local coroutine = coroutine; |
|
3 local tonumber = tonumber; |
|
4 local string = string; |
|
5 local setmetatable, getmetatable = setmetatable, getmetatable; |
|
6 local pairs = pairs; |
|
7 |
|
8 local deadroutine = coroutine.create(function() end); |
|
9 coroutine.resume(deadroutine); |
|
10 |
|
11 module("lxp") |
|
12 |
|
13 local entity_map = setmetatable({ |
|
14 ["amp"] = "&"; |
|
15 ["gt"] = ">"; |
|
16 ["lt"] = "<"; |
|
17 ["apos"] = "'"; |
|
18 ["quot"] = "\""; |
|
19 }, {__index = function(_, s) |
|
20 if s:sub(1,1) == "#" then |
|
21 if s:sub(2,2) == "x" then |
|
22 return string.char(tonumber(s:sub(3), 16)); |
|
23 else |
|
24 return string.char(tonumber(s:sub(2))); |
|
25 end |
|
26 end |
|
27 end |
|
28 }); |
|
29 local function xml_unescape(str) |
|
30 return (str:gsub("&(.-);", entity_map)); |
|
31 end |
|
32 local function parse_tag(s) |
|
33 local name,sattr=(s):gmatch("([^%s]+)(.*)")(); |
|
34 local attr = {}; |
|
35 for a,b in (sattr):gmatch("([^=%s]+)=['\"]([^'\"]*)['\"]") do attr[a] = xml_unescape(b); end |
|
36 return name, attr; |
|
37 end |
|
38 |
|
39 local function parser(data, handlers, ns_separator) |
|
40 local function read_until(str) |
|
41 local pos = data:find(str, nil, true); |
|
42 while not pos do |
|
43 data = data..coroutine.yield(); |
|
44 pos = data:find(str, nil, true); |
|
45 end |
|
46 local r = data:sub(1, pos); |
|
47 data = data:sub(pos+1); |
|
48 return r; |
|
49 end |
|
50 local function read_before(str) |
|
51 local pos = data:find(str, nil, true); |
|
52 while not pos do |
|
53 data = data..coroutine.yield(); |
|
54 pos = data:find(str, nil, true); |
|
55 end |
|
56 local r = data:sub(1, pos-1); |
|
57 data = data:sub(pos); |
|
58 return r; |
|
59 end |
|
60 local function peek() |
|
61 while #data == 0 do data = coroutine.yield(); end |
|
62 return data:sub(1,1); |
|
63 end |
|
64 |
|
65 local ns = { xml = "http://www.w3.org/XML/1998/namespace" }; |
|
66 ns.__index = ns; |
|
67 local function apply_ns(name, dodefault) |
|
68 local prefix,n = name:match("^([^:]*):(.*)$"); |
|
69 if prefix and ns[prefix] then |
|
70 return ns[prefix]..ns_separator..n; |
|
71 end |
|
72 if dodefault and ns[""] then |
|
73 return ns[""]..ns_separator..name; |
|
74 end |
|
75 return name; |
|
76 end |
|
77 local function push(tag, attr) |
|
78 ns = setmetatable({}, ns); |
|
79 for k,v in pairs(attr) do |
|
80 local xmlns = k == "xmlns" and "" or k:match("^xmlns:(.*)$"); |
|
81 if xmlns then |
|
82 ns[xmlns] = v; |
|
83 attr[k] = nil; |
|
84 end |
|
85 end |
|
86 local newattr, n = {}, 0; |
|
87 for k,v in pairs(attr) do |
|
88 n = n+1; |
|
89 k = apply_ns(k); |
|
90 newattr[n] = k; |
|
91 newattr[k] = v; |
|
92 end |
|
93 tag = apply_ns(tag, true); |
|
94 ns[0] = tag; |
|
95 ns.__index = ns; |
|
96 return tag, newattr; |
|
97 end |
|
98 local function pop() |
|
99 local tag = ns[0]; |
|
100 ns = getmetatable(ns); |
|
101 return tag; |
|
102 end |
|
103 |
|
104 while true do |
|
105 if peek() == "<" then |
|
106 local elem = read_until(">"):sub(2,-2); |
|
107 if elem:sub(1,1) == "!" or elem:sub(1,1) == "?" then -- neglect comments and processing-instructions |
|
108 elseif elem:sub(1,1) == "/" then -- end tag |
|
109 elem = elem:sub(2); |
|
110 local name = pop(); |
|
111 handlers:EndElement(name); -- TODO check for start-end tag name match |
|
112 elseif elem:sub(-1,-1) == "/" then -- empty tag |
|
113 elem = elem:sub(1,-2); |
|
114 local name,attr = parse_tag(elem); |
|
115 name,attr = push(name,attr); |
|
116 handlers:StartElement(name,attr); |
|
117 name = pop(); |
|
118 handlers:EndElement(name); |
|
119 else -- start tag |
|
120 local name,attr = parse_tag(elem); |
|
121 name,attr = push(name,attr); |
|
122 handlers:StartElement(name,attr); |
|
123 end |
|
124 else |
|
125 local text = read_before("<"); |
|
126 handlers:CharacterData(xml_unescape(text)); |
|
127 end |
|
128 end |
|
129 end |
|
130 |
|
131 function new(handlers, ns_separator) |
|
132 local co = coroutine.create(parser); |
|
133 return { |
|
134 parse = function(self, data) |
|
135 if not data then |
|
136 co = deadroutine; |
|
137 return true; -- eof |
|
138 end |
|
139 local success, result = coroutine.resume(co, data, handlers, ns_separator); |
|
140 if result then |
|
141 co = deadroutine; |
|
142 return nil, result; -- error |
|
143 end |
|
144 return true; -- success |
|
145 end; |
|
146 }; |
|
147 end |
|
148 |
|
149 return _M; |