util/xmllex.lua

Wed, 05 Jan 2011 05:14:02 +0000

author
daurnimator <quae@daurnimator.com>
date
Wed, 05 Jan 2011 05:14:02 +0000
changeset 4002
2b53b4b5d46e
parent 3995
7214dc7a5642
child 4003
cb6ddda1cb5f
permissions
-rw-r--r--

util.xmllex, util.xmppstream: It runs

local assert , ipairs , pairs , setmetatable , rawget , rawset , tostring =
	assert , ipairs , pairs , setmetatable , rawget , rawset , tostring
local strsub , strmatch = string.sub , string.match
local tblconcat = table.concat
local tblinsert = table.insert

local stanza_methods = require "util.stanza".stanza_mt;

local function getstring ( msgs , startpos , finishpos )
	if #msgs == 1 then --All originated in same string
		return strsub ( msgs[1] , startpos , finishpos )
	else -- Over multiple source strings
		return strsub ( msgs[1] , startpos , -1 )
			.. tblconcat ( msgs , "" , 2 , #msgs - 1 )
			.. strsub ( msgs[#msgs] , 1 , finishpos )
	end
end

local m_mt = {
	__tostring = function ( v )
		local str = v.stringform
		if str then
			return str
		else
			str = getstring ( v.msgs , v.start , v.finish )
			v.stringform = str
			return str
		end
	end
}

local function handleoutside ( str , r , initial )
	local a , b , close = str:find ( "<(/?)" , initial )

	if not a then
		r.state = "outside"
		return false
	end
	
	--Finalise text object
	local m = r[#r]
	m.finish = a - 1
	m.type = "text"
	
	local m = setmetatable ( {
		msgs = { str } ;
		start = a ;
		starte = b + 1 ;
	} , m_mt )
	
	if close ~= "/" then
		r.depth = r.depth + 1
		m.type = "open"
	else
		r.depth = r.depth - 1
		m.type = "close"
	end
	
	tblinsert ( r , m )

	r.state = "inside"
	return true
end

local function handleinside ( str, r , initial )
	local c , d , selfclosing = str:find ( "([/?]?)>" , initial )
	if not c then
		r.state = "inside"
		return false
	end
	
	local m = r[#r]
	m.finish = d
	m.finishs = c - 1
	if selfclosing == "/" or selfclosing == "?" then
		m.type = "selfclosing"
		r.depth = r.depth - 1
	end

	local m = setmetatable ( {
		msgs = { str } ;
		start = d + 1 ;
		type = "text" ;
	} , m_mt )
	tblinsert ( r , m )
	
	r.state = "outside"
	return true
end

local function index ( str , r )
	r = r or { depth = 0, state = "outside" }
	
	local initial = 1
	
	if not r[#r] then
		r[1] = setmetatable ( {
			msgs = { str } ;
			type = "text" ;
			start = 1 ;
		} , m_mt )
	else
		tblinsert ( r[#r].msgs , str )
	end
	
	while true do
		if r.state == "outside" then
			if not handleoutside ( str , r , initial ) then
				break
			end
		else
			if not handleinside ( str , r , initial ) then
				break
			end
		end
		initial = r[#r].start
	end

	return r
end

local function get_name ( str )
	return strmatch ( str , "^<([^%s>/]+)" )
end

local function get_attr ( str  )
	local attr = { }
	for name , quote, attvalue in str:gmatch ( [=[([^%s=/<]+)%s*=%s*(["'])([^'"]*)%2]=] ) do
		attr [ name ] = attvalue
	end
	return attr
end

function resolve_attr_namespaces ( attr )
	local namespace = { }
	local prefixattr = { }
	for k , attr_value in pairs ( attr ) do
		local attr_prefix , attr_name = k:match ( "^([^:\1]+):?([^\1]-)$" )
		
		if attr_prefix == nil then
			error ( "already resolved" )
		elseif attr_prefix == "xmlns" then
			namespace [ attr_name ] = attr_value
		elseif #attr_name ~= 0 and attr_prefix ~= "xml" then
			local t = prefixattr [ attr_prefix ]
			if not t then
				t = { }
				prefixattr [ attr_prefix ] = t
			end
			t [ attr_name ] = attr_value
		end
	end

	for k , v in pairs ( prefixattr ) do 
		for name , value in pairs ( v ) do
			attr [ namespace [ k ] .. "\1" .. name ] = value
		end
	end
	return namespace
end

local currentindex = 1

local function resolve_namespace ( element )
	local parent = element.parent

	local prefix = get_name ( element.str ):match ( "^([^:]+):" )

	local namespace = setmetatable ( resolve_attr_namespaces ( element.attr ) , { __index = parent.namespace } )
	
	local current_namespace = prefix and ( namespace [ prefix ] or error ("unbound prefix: "..prefix) )
		or rawget(element.attr, "xmlns")
		or parent.attr.xmlns
		or parent.namespace [ currentindex ]
	namespace [ currentindex ] = current_namespace
	
	return namespace
end

local dynamic_properties = {
	name = function ( t )
		return get_name ( t.str ):match("[^:]+$")
	end;
	attr = function ( t , k )
		return setmetatable ( get_attr ( t.str ) , { __index = function ( attr_table , attr )
				local _ = t.namespace -- DO NOT OPTIMISE AWAY WAQAS
				setmetatable ( attr_table , { __index = { xmlns = t.namespace[currentindex] } } )
				return attr_table [ attr ]
			end } )
	end;
	str = function ( t , k )
		return tostring ( t.opentag )
	end;
	namespace = resolve_namespace ;
}

local stanza_mt = {
	__index = function ( t , k )
		local f = dynamic_properties [ k ]
		if f then
			local v = f ( t )
			
			rawset ( t , k , v )
			return v
		else
			return stanza_methods[k]
		end
	end ;
	
	__tostring = function ( t )
		if t.modified then
			return stanza_methods.__tostring ( t )
		end
		local opentag = t.opentag
		local endtag = assert ( rawget ( t , "endtag" ) or rawget ( t , "selfclosing" ) and t.opentag )
		return getstring ( opentag.msgs , opentag.start , endtag.finish )
	end ;
	
	__newindex = function ( t , k , v )
		rawset ( t , "modified", true )
		rawset ( t , k , v )
	end ;
}

local function new_stanza ( parent )
	return setmetatable ( { tags = { } , parent = parent } , stanza_mt )
end

local function tagindex_to_tree(indices, start, finish,root)
	if not start then
		start = 1
		finish = #indices
	end
	
	root = root or { attr = { } }
	root.namespace = resolve_attr_namespaces ( root.attr )
	root.root = true
	root.tags = { }
	local leaf = root
	
	for i = start , finish do
		local v = indices [ i ]
		
		if v.type == "selfclosing" then
			local newleaf = new_stanza ( leaf )
			newleaf.opentag = v
			newleaf.selfclosing = true
			
			tblinsert ( leaf , newleaf )
			tblinsert ( leaf.tags , newleaf )
		elseif v.type == "close" then -- Close tag
			leaf.endtag = v
			leaf = leaf.parent
		elseif v.type == "text" then
			tblinsert ( leaf, v )
		else -- Open tag
			local newleaf = new_stanza ( leaf )
			newleaf.opentag = v
			
			tblinsert ( leaf , newleaf )
			tblinsert ( leaf.tags , newleaf )
			
			leaf = newleaf
		end
	end
	
	assert ( leaf == root , "Mismatched opening/closing tags" )
	return root;
end

return {
	index = index ;
	tagindex_to_tree = tagindex_to_tree ;
	get_name = get_name ;
	get_attr = get_attr ;
	resolve_attr_namespaces = resolve_attr_namespaces ;
};

mercurial