util/xmllex.lua

changeset 4002
2b53b4b5d46e
parent 3995
7214dc7a5642
child 4003
cb6ddda1cb5f
equal deleted inserted replaced
4001:ed90aace9add 4002:2b53b4b5d46e
1 local assert, ipairs , pairs , setmetatable , rawget , rawset , tostring = 1 local assert , ipairs , pairs , setmetatable , rawget , rawset , tostring =
2 assert, ipairs , pairs , setmetatable , rawget , rawset , tostring 2 assert , ipairs , pairs , setmetatable , rawget , rawset , tostring
3 local strsub = string.sub 3 local strsub , strmatch = string.sub , string.match
4 local tblconcat = table.concat 4 local tblconcat = table.concat
5 local tblinsert = table.insert 5 local tblinsert = table.insert
6
7 local stanza_methods = require "util.stanza".stanza_mt;
6 8
7 local function getstring ( msgs , startpos , finishpos ) 9 local function getstring ( msgs , startpos , finishpos )
8 if #msgs == 1 then --All originated in same string 10 if #msgs == 1 then --All originated in same string
9 return strsub ( msgs[1] , startpos , finishpos ) 11 return strsub ( msgs[1] , startpos , finishpos )
10 else -- Over multiple source strings 12 else -- Over multiple source strings
59 r.state = "inside" 61 r.state = "inside"
60 return true 62 return true
61 end 63 end
62 64
63 local function handleinside ( str, r , initial ) 65 local function handleinside ( str, r , initial )
64 local c , d , selfclosing = str:find ( "(/?)>" , initial ) 66 local c , d , selfclosing = str:find ( "([/?]?)>" , initial )
65 if not c then 67 if not c then
66 r.state = "inside" 68 r.state = "inside"
67 return false 69 return false
68 end 70 end
69 71
70 local m = r[#r] 72 local m = r[#r]
71 m.finish = d 73 m.finish = d
72 m.finishs = c - 1 74 m.finishs = c - 1
73 if selfclosing == "/" then 75 if selfclosing == "/" or selfclosing == "?" then
74 m.type = "selfclosing" 76 m.type = "selfclosing"
75 r.depth = r.depth - 1 77 r.depth = r.depth - 1
76 end 78 end
77 79
78 local m = setmetatable ( { 80 local m = setmetatable ( {
115 end 117 end
116 118
117 return r 119 return r
118 end 120 end
119 121
120 local function process_starttag ( starttag ) 122 local function get_name ( str )
121 local str = tostring ( starttag ) 123 return strmatch ( str , "^<([^%s>/]+)" )
124 end
125
126 local function get_attr ( str )
122 local attr = { } 127 local attr = { }
123 128 for name , quote, attvalue in str:gmatch ( [=[([^%s=/<]+)%s*=%s*(["'])([^'"]*)%2]=] ) do
124 local elem = str:match ( "[^%s=></]+" )
125 for name , quote, attvalue in str:gmatch ( [=[([^%s=<]+)%s*=%s*(["'])([^"]*)%2]=] ) do
126 attr [ name ] = attvalue 129 attr [ name ] = attvalue
127 end 130 end
128 return elem , attr 131 return attr
129 end 132 end
133
134 function resolve_attr_namespaces ( attr )
135 local namespace = { }
136 local prefixattr = { }
137 for k , attr_value in pairs ( attr ) do
138 local attr_prefix , attr_name = k:match ( "^([^:\1]+):?([^\1]-)$" )
139
140 if attr_prefix == nil then
141 error ( "already resolved" )
142 elseif attr_prefix == "xmlns" then
143 namespace [ attr_name ] = attr_value
144 elseif #attr_name ~= 0 and attr_prefix ~= "xml" then
145 local t = prefixattr [ attr_prefix ]
146 if not t then
147 t = { }
148 prefixattr [ attr_prefix ] = t
149 end
150 t [ attr_name ] = attr_value
151 end
152 end
153
154 for k , v in pairs ( prefixattr ) do
155 for name , value in pairs ( v ) do
156 attr [ namespace [ k ] .. "\1" .. name ] = value
157 end
158 end
159 return namespace
160 end
161
162 local currentindex = 1
163
164 local function resolve_namespace ( element )
165 local parent = element.parent
166
167 local prefix = get_name ( element.str ):match ( "^([^:]+):" )
168
169 local namespace = setmetatable ( resolve_attr_namespaces ( element.attr ) , { __index = parent.namespace } )
170
171 local current_namespace = prefix and ( namespace [ prefix ] or error ("unbound prefix: "..prefix) )
172 or rawget(element.attr, "xmlns")
173 or parent.attr.xmlns
174 or parent.namespace [ currentindex ]
175 namespace [ currentindex ] = current_namespace
176
177 return namespace
178 end
179
180 local dynamic_properties = {
181 name = function ( t )
182 return get_name ( t.str ):match("[^:]+$")
183 end;
184 attr = function ( t , k )
185 return setmetatable ( get_attr ( t.str ) , { __index = function ( attr_table , attr )
186 local _ = t.namespace -- DO NOT OPTIMISE AWAY WAQAS
187 setmetatable ( attr_table , { __index = { xmlns = t.namespace[currentindex] } } )
188 return attr_table [ attr ]
189 end } )
190 end;
191 str = function ( t , k )
192 return tostring ( t.opentag )
193 end;
194 namespace = resolve_namespace ;
195 }
130 196
131 local stanza_mt = { 197 local stanza_mt = {
132 __index = function ( t , k ) 198 __index = function ( t , k )
133 if k == "name" or k == "attr" then 199 local f = dynamic_properties [ k ]
134 local elem , attr = process_starttag ( t.opentag ) 200 if f then
135 rawset ( t , "name" , elem ) 201 local v = f ( t )
136 rawset ( t , "attr" , attr ) 202
137 return rawget ( t , k ) 203 rawset ( t , k , v )
204 return v
138 else 205 else
139 print("METHOD",k) 206 return stanza_methods[k]
140 return stanza_methods [ k ]
141 end 207 end
142 end ; 208 end ;
209
143 __tostring = function ( t ) 210 __tostring = function ( t )
211 if t.modified then
212 return stanza_methods.__tostring ( t )
213 end
144 local opentag = t.opentag 214 local opentag = t.opentag
145 local endtag = assert ( rawget ( t , "endtag" ) or rawget ( t , "selfclosing" ) and t.opentag ) 215 local endtag = assert ( rawget ( t , "endtag" ) or rawget ( t , "selfclosing" ) and t.opentag )
146 return getstring ( opentag.msgs , opentag.start , endtag.finish ) 216 return getstring ( opentag.msgs , opentag.start , endtag.finish )
147 end ; 217 end ;
218
219 __newindex = function ( t , k , v )
220 rawset ( t , "modified", true )
221 rawset ( t , k , v )
222 end ;
148 } 223 }
149 224
150 local function new_stanza ( ) 225 local function new_stanza ( parent )
151 return setmetatable ( { tags = { } } , stanza_mt ) 226 return setmetatable ( { tags = { } , parent = parent } , stanza_mt )
152 end 227 end
153 228
154 local function tagindex_to_tree(indices) 229 local function tagindex_to_tree(indices, start, finish,root)
155 if not start then 230 if not start then
156 start = 1 231 start = 1
157 finish = #indices 232 finish = #indices
158 end 233 end
159 234
160 local root = { tags = { } } 235 root = root or { attr = { } }
236 root.namespace = resolve_attr_namespaces ( root.attr )
237 root.root = true
238 root.tags = { }
161 local leaf = root 239 local leaf = root
162 240
163 for i = start , finish do 241 for i = start , finish do
164 local v = indices [ i ] 242 local v = indices [ i ]
165 243
166 if v.type == "selfclosing" then 244 if v.type == "selfclosing" then
167 local newleaf = new_stanza ( ) 245 local newleaf = new_stanza ( leaf )
168 newleaf.opentag = v 246 newleaf.opentag = v
169 newleaf.selfclosing = true 247 newleaf.selfclosing = true
170 newleaf.parent = leaf
171 248
172 tblinsert ( leaf , newleaf ) 249 tblinsert ( leaf , newleaf )
173 tblinsert ( leaf.tags , newleaf ) 250 tblinsert ( leaf.tags , newleaf )
174 elseif v.type == "close" then -- Close tag 251 elseif v.type == "close" then -- Close tag
175 leaf.endtag = v 252 leaf.endtag = v
176 leaf = leaf.parent 253 leaf = leaf.parent
177 elseif v.type == "text" then 254 elseif v.type == "text" then
178 tblinsert ( leaf, v ) 255 tblinsert ( leaf, v )
179 else -- Open tag 256 else -- Open tag
180 local newleaf = new_stanza ( ) 257 local newleaf = new_stanza ( leaf )
181 newleaf.opentag = v 258 newleaf.opentag = v
182 newleaf.parent = leaf
183 259
184 tblinsert ( leaf , newleaf ) 260 tblinsert ( leaf , newleaf )
185 tblinsert ( leaf.tags , newleaf ) 261 tblinsert ( leaf.tags , newleaf )
186 262
187 leaf = newleaf 263 leaf = newleaf
193 end 269 end
194 270
195 return { 271 return {
196 index = index ; 272 index = index ;
197 tagindex_to_tree = tagindex_to_tree ; 273 tagindex_to_tree = tagindex_to_tree ;
198 process_starttag = process_starttag ; 274 get_name = get_name ;
275 get_attr = get_attr ;
276 resolve_attr_namespaces = resolve_attr_namespaces ;
199 }; 277 };

mercurial