util/xmllex.lua

Mon, 03 Jan 2011 16:47:06 +0000

author
Matthew Wild <mwild1@gmail.com>
date
Mon, 03 Jan 2011 16:47:06 +0000
changeset 3990
783004a12224
child 3991
7a2856c8ab7a
permissions
-rw-r--r--

util.xmllex: Add

a=[[
<iq type='set'
    from='francisco@denmark.lit/barracks'
    to='pubsub.shakespeare.lit'
    id='sub1'>
  <pubsub xmlns='http://jabber.org/protocol/pubsub'>
    <subscribe
        node='princely_musings'
        jid='francisco@denmark.lit'/>
  </pubsub>
</iq>
]]
b=[[
<message>
  <body>Wow, I&apos;m green with envy!</body>
  <html xmlns='http://jabber.org/protocol/xhtml-im'>
    <body xmlns='http://www.w3.org/1999/xhtml'>
      <p style='font-size:large'>
        <em>Wow</em>, I&apos;m <span style='color:green'>green</span>
        with <strong>envy</strong>!
      </p>
    </body>
  </html>
</message>
]]

local ipairs , pairs , setmetatable = ipairs , pairs , setmetatable
local strsub = string.sub
local tblconcat = table.concat

local function getstring ( stringindex , startmsg , startpos , finishmsg , finishpos )
	if startmsg == finishmsg then --All originated in same string
		return strsub ( stringindex [ startmsg ] , startpos , finishpos )
	else -- Over multiple source strings
		return strsub ( stringindex [ startmsg ] , startpos , -1 )
			.. tblconcat ( stringindex , "" , startmsg + 1 , finishmsg - 1 )
			.. strsub ( stringindex [ finishmsg ] , 1 , finishpos )
	end
end

local m_mt = {
	__tostring = function ( v )
		return getstring ( v.stringindex , v.startmsg , v.starte + 1 , v.finishmsg , v.finishs - 1 )
	end
}

local function index ( str , r )
	local stringindex
	local curstr , nexti
	if r then
		stringindex = r.stringindex or { }
		curstr = #stringindex + 1
		stringindex [ curstr ] = str
		
		nexti = #r
	else
		stringindex = { str }
		curstr = 1
		
		r = { stringindex = stringindex }
		
		nexti = 0
	end
	
	local m
	do
		local t = r [ nexti ]
		if t and not t.finish then
			m = t
		else
			m = setmetatable ( { stringindex = stringindex } , m_mt )
			nexti = nexti + 1
		end
	end
	
	local d = 0
	while true do
		local a , b , c , close , selfclosing

		if not m.start then
			a , b , close = str:find ( "<(/?)" , d )
			
			if not a then break end
			m.startmsg = curstr
			m.start = a
			m.starte = b
			if close == "/" then
				m.closed = true
			else
				m.closed = false
			end
		end
		
		r [ nexti ] = m
		nexti = nexti + 1
		
		c , d , selfclosing = str:find ( "(/?)>" , b )
		if not c then
			return false , r
		end
		m.finishmsg = curstr
		m.finish = d
		m.finishs = c
		m.selfclosed = selfclosing == "/"
		
		m = setmetatable ( { stringindex = stringindex } , m_mt )
	end
	return r
end

local function tagindex_to_tree(indices)
	local root = {}
	local leaf = root
	local stringindex = indices.stringindex
	for k ,v in ipairs ( indices ) do
		if v.selfclosed then
			print("selfclosed",v)
			leaf [ #leaf + 1 ] = {
				parent = leaf ;
				selfclosing = v ;
			}
		elseif v.closed then
			print("close",v)
			leaf.endtag = v
			leaf = leaf.parent
		else
			print("open",v)
			
			local newleaf = {
				parent = leaf ;
				starttag = v ;
			}
			leaf [ #leaf + 1 ] = newleaf
			leaf = newleaf
		end
	end
	assert ( leaf == root , "Mismatched opening/closing tags" )
	return root;
end

local function getattributes ( starttag )
	local str = tostring ( starttag )
	
	local attr = { }
	local elem = str:match ( "[^%s=></]+" )
	for name , quote, attvalue in str:gmatch ( [=[([^%s=<]+)%s*=%s*(["'])([^"]*)%2]=] ) do
		attr [ name ] = attvalue
	end
	return elem , attr
end

return {
	index = index;
	tagindex_to_tree = tagindex_to_tree;
	getattributes = getattributes;
	getstring = getstring;
};

mercurial