util/xmllex.lua

Mon, 03 Jan 2011 18:31:08 +0000

author
Daurnimator <quae@daurnimator.com>
date
Mon, 03 Jan 2011 18:31:08 +0000
changeset 3991
7a2856c8ab7a
parent 3990
783004a12224
child 3992
de77ec2b49bc
permissions
-rw-r--r--

Tree structure now similar to prosody stanza format

local ipairs , pairs , setmetatable = ipairs , pairs , setmetatable
local strsub = string.sub
local tblconcat = table.concat
local tblinsert = table.insert

local function getstring ( stringindex , startmsg , startpos , finishmsg , finishpos )
	if startmsg == finishmsg then --All originated in same string
		return strsub ( stringindex [ startmsg ] , startpos , finishpos )
	else -- Over multiple source strings
		return strsub ( stringindex [ startmsg ] , startpos , -1 )
			.. tblconcat ( stringindex , "" , startmsg + 1 , finishmsg - 1 )
			.. strsub ( stringindex [ finishmsg ] , 1 , finishpos )
	end
end

local m_mt = {
	__tostring = function ( v )
		local str = v.stringform
		if str then
			return str
		else
			str = getstring ( v.stringindex , v.startmsg , v.start , v.finishmsg , v.finish )
			rawset ( v , "stringform" , str )
			return str
		end
	end
}

local function index ( str , r )
	local stringindex
	local curstr , nexti
	if r then
		stringindex = r.stringindex or { }
		curstr = #stringindex + 1
		stringindex [ curstr ] = str
		
		nexti = #r
	else
		stringindex = { str }
		curstr = 1
		
		r = { stringindex = stringindex }
		
		nexti = 0
	end
	
	local m
	do
		local t = r [ nexti ]
		if t and not t.finish then
			m = t
		else
			m = setmetatable ( { stringindex = stringindex } , m_mt )
			nexti = nexti + 1
		end
	end
	
	local d = 0
	while true do
		local a , b , c , close , selfclosing

		if not m.start then
			a , b , close = str:find ( "<(/?)" , d )
			
			if not a then break end
			m.startmsg = curstr
			m.start = a
			m.starte = b
			if close == "/" then
				m.closed = true
			else
				m.closed = false
			end
		end
		
		r [ nexti ] = m
		nexti = nexti + 1
		
		c , d , selfclosing = str:find ( "(/?)>" , b )
		if not c then
			return false , r
		end
		m.finishmsg = curstr
		m.finish = d
		m.finishs = c
		m.selfclosed = selfclosing == "/"
		
		m = setmetatable ( { stringindex = stringindex } , m_mt )
	end
	return r
end

local function process_starttag ( starttag )
	local str = tostring ( starttag )
	local attr = { }
	
	local elem = str:match ( "[^%s=></]+" )
	for name , quote, attvalue in str:gmatch ( [=[([^%s=<]+)%s*=%s*(["'])([^"]*)%2]=] ) do
		attr [ name ] = attvalue
	end
	return elem , attr
end

local stanza_mt = {
	__index = function ( t , k )
		if k == "name" or k == "attr" then
			local elem , attr = process_starttag ( t.opentag )
			rawset ( t , "name" , elem )
			rawset ( t , "attr" , attr )
			return rawget ( t , k )
		else
			return stanza_methods [ k ]
		end
	end ;
	__tostring = function ( t )
		local opentag = t.opentag
		local endtag = assert ( rawget ( t , "endtag" ) or rawget ( t , "selfclosing" ) and t.opentag )
		return getstring ( opentag.stringindex , opentag.startmsg , opentag.start , endtag.finishmsg , endtag.finish )
	end ;
}

local function new_stanza ( )
	return setmetatable ( { tags = { } } , stanza_mt )
end

local function add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , new_startmsg , new_start )
	if last_finishmsg ~= new_startmsg and last_finish + 1 ~= new_start - 1 then -- Don't bother with obviously zero length strings
		tblinsert ( leaf , setmetatable ( { 
			stringindex = stringindex ;
			startmsg = last_finishmsg ; start = last_finish + 1 ;
			finishmsg = new_startmsg ; finish = new_start - 1 ; 
			} , m_mt ) )
	end
end

local function tagindex_to_tree(indices)
	local root = { tags = { } }
	local leaf = root
	local stringindex = indices.stringindex
	
	local last_finishmsg , last_finish = 1 , 1
	
	for k ,v in ipairs ( indices ) do
		add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , v.startmsg , v.start )
		last_finishmsg = v.finishmsg
		last_finish = v.finish
		
		if v.selfclosed then
			local newleaf = new_stanza ( )
			newleaf.opentag = v
			newleaf.selfclosing = true
			newleaf.parent = leaf
			tblinsert ( leaf , newleaf )
			tblinsert ( leaf.tags , newleaf )
		elseif v.closed then -- Close tag
			leaf.endtag = v
			leaf = leaf.parent
		else -- Open tag
			local newleaf = new_stanza ( )
			newleaf.opentag = v
			newleaf.parent = leaf
			tblinsert ( leaf , newleaf )
			tblinsert ( leaf.tags , newleaf )
			
			leaf = newleaf
		end
	end
	add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , #stringindex , -1 )
	
	assert ( leaf == root , "Mismatched opening/closing tags" )
	return root;
end

return {
	index = index;
	tagindex_to_tree = tagindex_to_tree;
};

mercurial