Mon, 03 Jan 2011 18:31:08 +0000
Tree structure now similar to prosody stanza format
util/xmllex.lua | file | annotate | diff | comparison | revisions |
--- a/util/xmllex.lua Mon Jan 03 16:47:06 2011 +0000 +++ b/util/xmllex.lua Mon Jan 03 18:31:08 2011 +0000 @@ -1,32 +1,7 @@ -a=[[ -<iq type='set' - from='francisco@denmark.lit/barracks' - to='pubsub.shakespeare.lit' - id='sub1'> - <pubsub xmlns='http://jabber.org/protocol/pubsub'> - <subscribe - node='princely_musings' - jid='francisco@denmark.lit'/> - </pubsub> -</iq> -]] -b=[[ -<message> - <body>Wow, I'm green with envy!</body> - <html xmlns='http://jabber.org/protocol/xhtml-im'> - <body xmlns='http://www.w3.org/1999/xhtml'> - <p style='font-size:large'> - <em>Wow</em>, I'm <span style='color:green'>green</span> - with <strong>envy</strong>! - </p> - </body> - </html> -</message> -]] - local ipairs , pairs , setmetatable = ipairs , pairs , setmetatable local strsub = string.sub local tblconcat = table.concat +local tblinsert = table.insert local function getstring ( stringindex , startmsg , startpos , finishmsg , finishpos ) if startmsg == finishmsg then --All originated in same string @@ -40,7 +15,14 @@ local m_mt = { __tostring = function ( v ) - return getstring ( v.stringindex , v.startmsg , v.starte + 1 , v.finishmsg , v.finishs - 1 ) + local str = v.stringform + if str then + return str + else + str = getstring ( v.stringindex , v.startmsg , v.start , v.finishmsg , v.finish ) + rawset ( v , "stringform" , str ) + return str + end end } @@ -108,40 +90,10 @@ return r end -local function tagindex_to_tree(indices) - local root = {} - local leaf = root - local stringindex = indices.stringindex - for k ,v in ipairs ( indices ) do - if v.selfclosed then - print("selfclosed",v) - leaf [ #leaf + 1 ] = { - parent = leaf ; - selfclosing = v ; - } - elseif v.closed then - print("close",v) - leaf.endtag = v - leaf = leaf.parent - else - print("open",v) - - local newleaf = { - parent = leaf ; - starttag = v ; - } - leaf [ #leaf + 1 ] = newleaf - leaf = newleaf - end - end - assert ( leaf == root , "Mismatched opening/closing tags" ) - return root; -end - -local function getattributes ( starttag ) +local function process_starttag ( starttag ) local str = tostring ( starttag ) + local attr = { } - local attr = { } local elem = str:match ( "[^%s=></]+" ) for name , quote, attvalue in str:gmatch ( [=[([^%s=<]+)%s*=%s*(["'])([^"]*)%2]=] ) do attr [ name ] = attvalue @@ -149,9 +101,77 @@ return elem , attr end +local stanza_mt = { + __index = function ( t , k ) + if k == "name" or k == "attr" then + local elem , attr = process_starttag ( t.opentag ) + rawset ( t , "name" , elem ) + rawset ( t , "attr" , attr ) + return rawget ( t , k ) + else + return stanza_methods [ k ] + end + end ; + __tostring = function ( t ) + local opentag = t.opentag + local endtag = assert ( rawget ( t , "endtag" ) or rawget ( t , "selfclosing" ) and t.opentag ) + return getstring ( opentag.stringindex , opentag.startmsg , opentag.start , endtag.finishmsg , endtag.finish ) + end ; +} + +local function new_stanza ( ) + return setmetatable ( { tags = { } } , stanza_mt ) +end + +local function add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , new_startmsg , new_start ) + if last_finishmsg ~= new_startmsg and last_finish + 1 ~= new_start - 1 then -- Don't bother with obviously zero length strings + tblinsert ( leaf , setmetatable ( { + stringindex = stringindex ; + startmsg = last_finishmsg ; start = last_finish + 1 ; + finishmsg = new_startmsg ; finish = new_start - 1 ; + } , m_mt ) ) + end +end + +local function tagindex_to_tree(indices) + local root = { tags = { } } + local leaf = root + local stringindex = indices.stringindex + + local last_finishmsg , last_finish = 1 , 1 + + for k ,v in ipairs ( indices ) do + add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , v.startmsg , v.start ) + last_finishmsg = v.finishmsg + last_finish = v.finish + + if v.selfclosed then + local newleaf = new_stanza ( ) + newleaf.opentag = v + newleaf.selfclosing = true + newleaf.parent = leaf + tblinsert ( leaf , newleaf ) + tblinsert ( leaf.tags , newleaf ) + elseif v.closed then -- Close tag + leaf.endtag = v + leaf = leaf.parent + else -- Open tag + local newleaf = new_stanza ( ) + newleaf.opentag = v + newleaf.parent = leaf + tblinsert ( leaf , newleaf ) + tblinsert ( leaf.tags , newleaf ) + + leaf = newleaf + end + end + add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , #stringindex , -1 ) + + assert ( leaf == root , "Mismatched opening/closing tags" ) + return root; +end + return { index = index; tagindex_to_tree = tagindex_to_tree; - getattributes = getattributes; - getstring = getstring; };