Mon, 03 Jan 2011 18:31:08 +0000
Tree structure now similar to prosody stanza format
local ipairs , pairs , setmetatable = ipairs , pairs , setmetatable local strsub = string.sub local tblconcat = table.concat local tblinsert = table.insert local function getstring ( stringindex , startmsg , startpos , finishmsg , finishpos ) if startmsg == finishmsg then --All originated in same string return strsub ( stringindex [ startmsg ] , startpos , finishpos ) else -- Over multiple source strings return strsub ( stringindex [ startmsg ] , startpos , -1 ) .. tblconcat ( stringindex , "" , startmsg + 1 , finishmsg - 1 ) .. strsub ( stringindex [ finishmsg ] , 1 , finishpos ) end end local m_mt = { __tostring = function ( v ) local str = v.stringform if str then return str else str = getstring ( v.stringindex , v.startmsg , v.start , v.finishmsg , v.finish ) rawset ( v , "stringform" , str ) return str end end } local function index ( str , r ) local stringindex local curstr , nexti if r then stringindex = r.stringindex or { } curstr = #stringindex + 1 stringindex [ curstr ] = str nexti = #r else stringindex = { str } curstr = 1 r = { stringindex = stringindex } nexti = 0 end local m do local t = r [ nexti ] if t and not t.finish then m = t else m = setmetatable ( { stringindex = stringindex } , m_mt ) nexti = nexti + 1 end end local d = 0 while true do local a , b , c , close , selfclosing if not m.start then a , b , close = str:find ( "<(/?)" , d ) if not a then break end m.startmsg = curstr m.start = a m.starte = b if close == "/" then m.closed = true else m.closed = false end end r [ nexti ] = m nexti = nexti + 1 c , d , selfclosing = str:find ( "(/?)>" , b ) if not c then return false , r end m.finishmsg = curstr m.finish = d m.finishs = c m.selfclosed = selfclosing == "/" m = setmetatable ( { stringindex = stringindex } , m_mt ) end return r end local function process_starttag ( starttag ) local str = tostring ( starttag ) local attr = { } local elem = str:match ( "[^%s=></]+" ) for name , quote, attvalue in str:gmatch ( [=[([^%s=<]+)%s*=%s*(["'])([^"]*)%2]=] ) do attr [ name ] = attvalue end return elem , attr end local stanza_mt = { __index = function ( t , k ) if k == "name" or k == "attr" then local elem , attr = process_starttag ( t.opentag ) rawset ( t , "name" , elem ) rawset ( t , "attr" , attr ) return rawget ( t , k ) else return stanza_methods [ k ] end end ; __tostring = function ( t ) local opentag = t.opentag local endtag = assert ( rawget ( t , "endtag" ) or rawget ( t , "selfclosing" ) and t.opentag ) return getstring ( opentag.stringindex , opentag.startmsg , opentag.start , endtag.finishmsg , endtag.finish ) end ; } local function new_stanza ( ) return setmetatable ( { tags = { } } , stanza_mt ) end local function add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , new_startmsg , new_start ) if last_finishmsg ~= new_startmsg and last_finish + 1 ~= new_start - 1 then -- Don't bother with obviously zero length strings tblinsert ( leaf , setmetatable ( { stringindex = stringindex ; startmsg = last_finishmsg ; start = last_finish + 1 ; finishmsg = new_startmsg ; finish = new_start - 1 ; } , m_mt ) ) end end local function tagindex_to_tree(indices) local root = { tags = { } } local leaf = root local stringindex = indices.stringindex local last_finishmsg , last_finish = 1 , 1 for k ,v in ipairs ( indices ) do add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , v.startmsg , v.start ) last_finishmsg = v.finishmsg last_finish = v.finish if v.selfclosed then local newleaf = new_stanza ( ) newleaf.opentag = v newleaf.selfclosing = true newleaf.parent = leaf tblinsert ( leaf , newleaf ) tblinsert ( leaf.tags , newleaf ) elseif v.closed then -- Close tag leaf.endtag = v leaf = leaf.parent else -- Open tag local newleaf = new_stanza ( ) newleaf.opentag = v newleaf.parent = leaf tblinsert ( leaf , newleaf ) tblinsert ( leaf.tags , newleaf ) leaf = newleaf end end add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , #stringindex , -1 ) assert ( leaf == root , "Mismatched opening/closing tags" ) return root; end return { index = index; tagindex_to_tree = tagindex_to_tree; };