util/xmllex.lua

changeset 4003
cb6ddda1cb5f
parent 4002
2b53b4b5d46e
equal deleted inserted replaced
4002:2b53b4b5d46e 4003:cb6ddda1cb5f
1 local assert , ipairs , pairs , setmetatable , rawget , rawset , tostring = 1 local assert , ipairs , pairs , setmetatable , rawget , rawset , tostring =
2 assert , ipairs , pairs , setmetatable , rawget , rawset , tostring 2 assert , ipairs , pairs , setmetatable , rawget , rawset , tostring
3 local strsub , strmatch = string.sub , string.match 3 local strchar , strgmatch , strgsub , strsub , strmatch = string.char , string.gmatch , string.gsub , string.sub , string.match
4 local tblconcat = table.concat 4 local tblconcat = table.concat
5 local tblinsert = table.insert 5 local tblinsert = table.insert
6 6
7 local stanza_methods = require "util.stanza".stanza_mt; 7 local stanza_methods = require "util.stanza".stanza_mt;
8
9 local entities = setmetatable ( {
10 amp = "&" ;
11 lt = "<" ;
12 gt = ">" ;
13 apos = "'" ;
14 quot = '"' ;
15 } , { __index = function ( entity )
16 return strchar ( tonumber ( entity:match ( "^#%d+" ) ) or error ( "invalid entity " .. entity ) )
17 end } )
18 local function xml_unescape ( str )
19 return ( strgsub ( str , "&([^;]*);" , entities ) )
20 end
8 21
9 local function getstring ( msgs , startpos , finishpos ) 22 local function getstring ( msgs , startpos , finishpos )
10 if #msgs == 1 then --All originated in same string 23 if #msgs == 1 then --All originated in same string
11 return strsub ( msgs[1] , startpos , finishpos ) 24 return strsub ( msgs[1] , startpos , finishpos )
12 else -- Over multiple source strings 25 else -- Over multiple source strings
21 local str = v.stringform 34 local str = v.stringform
22 if str then 35 if str then
23 return str 36 return str
24 else 37 else
25 str = getstring ( v.msgs , v.start , v.finish ) 38 str = getstring ( v.msgs , v.start , v.finish )
39 if v.type == "text" then
40 str = xml_unescape ( str )
41 end
26 v.stringform = str 42 v.stringform = str
27 return str 43 return str
28 end 44 end
29 end 45 end
30 } 46 }
123 return strmatch ( str , "^<([^%s>/]+)" ) 139 return strmatch ( str , "^<([^%s>/]+)" )
124 end 140 end
125 141
126 local function get_attr ( str ) 142 local function get_attr ( str )
127 local attr = { } 143 local attr = { }
128 for name , quote, attvalue in str:gmatch ( [=[([^%s=/<]+)%s*=%s*(["'])([^'"]*)%2]=] ) do 144 for name , quote, attvalue in strgmatch ( str , [=[([^%s=/<]+)%s*=%s*(["'])([^'"]*)%2]=] ) do
129 attr [ name ] = attvalue 145 attr [ name ] = xml_unescape ( attvalue )
130 end 146 end
131 return attr 147 return attr
132 end 148 end
133 149
134 function resolve_attr_namespaces ( attr ) 150 function resolve_attr_namespaces ( attr )

mercurial