util.xmllex: Add

Mon, 03 Jan 2011 16:47:06 +0000

author
Matthew Wild <mwild1@gmail.com>
date
Mon, 03 Jan 2011 16:47:06 +0000
changeset 3990
783004a12224
parent 3989
692d221ef9bd
child 3991
7a2856c8ab7a

util.xmllex: Add

util/xmllex.lua file | annotate | diff | comparison | revisions
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/util/xmllex.lua	Mon Jan 03 16:47:06 2011 +0000
@@ -0,0 +1,157 @@
+a=[[
+<iq type='set'
+    from='francisco@denmark.lit/barracks'
+    to='pubsub.shakespeare.lit'
+    id='sub1'>
+  <pubsub xmlns='http://jabber.org/protocol/pubsub'>
+    <subscribe
+        node='princely_musings'
+        jid='francisco@denmark.lit'/>
+  </pubsub>
+</iq>
+]]
+b=[[
+<message>
+  <body>Wow, I&apos;m green with envy!</body>
+  <html xmlns='http://jabber.org/protocol/xhtml-im'>
+    <body xmlns='http://www.w3.org/1999/xhtml'>
+      <p style='font-size:large'>
+        <em>Wow</em>, I&apos;m <span style='color:green'>green</span>
+        with <strong>envy</strong>!
+      </p>
+    </body>
+  </html>
+</message>
+]]
+
+local ipairs , pairs , setmetatable = ipairs , pairs , setmetatable
+local strsub = string.sub
+local tblconcat = table.concat
+
+local function getstring ( stringindex , startmsg , startpos , finishmsg , finishpos )
+	if startmsg == finishmsg then --All originated in same string
+		return strsub ( stringindex [ startmsg ] , startpos , finishpos )
+	else -- Over multiple source strings
+		return strsub ( stringindex [ startmsg ] , startpos , -1 )
+			.. tblconcat ( stringindex , "" , startmsg + 1 , finishmsg - 1 )
+			.. strsub ( stringindex [ finishmsg ] , 1 , finishpos )
+	end
+end
+
+local m_mt = {
+	__tostring = function ( v )
+		return getstring ( v.stringindex , v.startmsg , v.starte + 1 , v.finishmsg , v.finishs - 1 )
+	end
+}
+
+local function index ( str , r )
+	local stringindex
+	local curstr , nexti
+	if r then
+		stringindex = r.stringindex or { }
+		curstr = #stringindex + 1
+		stringindex [ curstr ] = str
+		
+		nexti = #r
+	else
+		stringindex = { str }
+		curstr = 1
+		
+		r = { stringindex = stringindex }
+		
+		nexti = 0
+	end
+	
+	local m
+	do
+		local t = r [ nexti ]
+		if t and not t.finish then
+			m = t
+		else
+			m = setmetatable ( { stringindex = stringindex } , m_mt )
+			nexti = nexti + 1
+		end
+	end
+	
+	local d = 0
+	while true do
+		local a , b , c , close , selfclosing
+
+		if not m.start then
+			a , b , close = str:find ( "<(/?)" , d )
+			
+			if not a then break end
+			m.startmsg = curstr
+			m.start = a
+			m.starte = b
+			if close == "/" then
+				m.closed = true
+			else
+				m.closed = false
+			end
+		end
+		
+		r [ nexti ] = m
+		nexti = nexti + 1
+		
+		c , d , selfclosing = str:find ( "(/?)>" , b )
+		if not c then
+			return false , r
+		end
+		m.finishmsg = curstr
+		m.finish = d
+		m.finishs = c
+		m.selfclosed = selfclosing == "/"
+		
+		m = setmetatable ( { stringindex = stringindex } , m_mt )
+	end
+	return r
+end
+
+local function tagindex_to_tree(indices)
+	local root = {}
+	local leaf = root
+	local stringindex = indices.stringindex
+	for k ,v in ipairs ( indices ) do
+		if v.selfclosed then
+			print("selfclosed",v)
+			leaf [ #leaf + 1 ] = {
+				parent = leaf ;
+				selfclosing = v ;
+			}
+		elseif v.closed then
+			print("close",v)
+			leaf.endtag = v
+			leaf = leaf.parent
+		else
+			print("open",v)
+			
+			local newleaf = {
+				parent = leaf ;
+				starttag = v ;
+			}
+			leaf [ #leaf + 1 ] = newleaf
+			leaf = newleaf
+		end
+	end
+	assert ( leaf == root , "Mismatched opening/closing tags" )
+	return root;
+end
+
+local function getattributes ( starttag )
+	local str = tostring ( starttag )
+	
+	local attr = { }
+	local elem = str:match ( "[^%s=></]+" )
+	for name , quote, attvalue in str:gmatch ( [=[([^%s=<]+)%s*=%s*(["'])([^"]*)%2]=] ) do
+		attr [ name ] = attvalue
+	end
+	return elem , attr
+end
+
+return {
+	index = index;
+	tagindex_to_tree = tagindex_to_tree;
+	getattributes = getattributes;
+	getstring = getstring;
+};

mercurial