util/xmllex.lua

changeset 3991
7a2856c8ab7a
parent 3990
783004a12224
child 3992
de77ec2b49bc
equal deleted inserted replaced
3990:783004a12224 3991:7a2856c8ab7a
1 a=[[
2 <iq type='set'
3 from='francisco@denmark.lit/barracks'
4 to='pubsub.shakespeare.lit'
5 id='sub1'>
6 <pubsub xmlns='http://jabber.org/protocol/pubsub'>
7 <subscribe
8 node='princely_musings'
9 jid='francisco@denmark.lit'/>
10 </pubsub>
11 </iq>
12 ]]
13 b=[[
14 <message>
15 <body>Wow, I&apos;m green with envy!</body>
16 <html xmlns='http://jabber.org/protocol/xhtml-im'>
17 <body xmlns='http://www.w3.org/1999/xhtml'>
18 <p style='font-size:large'>
19 <em>Wow</em>, I&apos;m <span style='color:green'>green</span>
20 with <strong>envy</strong>!
21 </p>
22 </body>
23 </html>
24 </message>
25 ]]
26
27 local ipairs , pairs , setmetatable = ipairs , pairs , setmetatable 1 local ipairs , pairs , setmetatable = ipairs , pairs , setmetatable
28 local strsub = string.sub 2 local strsub = string.sub
29 local tblconcat = table.concat 3 local tblconcat = table.concat
4 local tblinsert = table.insert
30 5
31 local function getstring ( stringindex , startmsg , startpos , finishmsg , finishpos ) 6 local function getstring ( stringindex , startmsg , startpos , finishmsg , finishpos )
32 if startmsg == finishmsg then --All originated in same string 7 if startmsg == finishmsg then --All originated in same string
33 return strsub ( stringindex [ startmsg ] , startpos , finishpos ) 8 return strsub ( stringindex [ startmsg ] , startpos , finishpos )
34 else -- Over multiple source strings 9 else -- Over multiple source strings
38 end 13 end
39 end 14 end
40 15
41 local m_mt = { 16 local m_mt = {
42 __tostring = function ( v ) 17 __tostring = function ( v )
43 return getstring ( v.stringindex , v.startmsg , v.starte + 1 , v.finishmsg , v.finishs - 1 ) 18 local str = v.stringform
19 if str then
20 return str
21 else
22 str = getstring ( v.stringindex , v.startmsg , v.start , v.finishmsg , v.finish )
23 rawset ( v , "stringform" , str )
24 return str
25 end
44 end 26 end
45 } 27 }
46 28
47 local function index ( str , r ) 29 local function index ( str , r )
48 local stringindex 30 local stringindex
106 m = setmetatable ( { stringindex = stringindex } , m_mt ) 88 m = setmetatable ( { stringindex = stringindex } , m_mt )
107 end 89 end
108 return r 90 return r
109 end 91 end
110 92
111 local function tagindex_to_tree(indices) 93 local function process_starttag ( starttag )
112 local root = {}
113 local leaf = root
114 local stringindex = indices.stringindex
115 for k ,v in ipairs ( indices ) do
116 if v.selfclosed then
117 print("selfclosed",v)
118 leaf [ #leaf + 1 ] = {
119 parent = leaf ;
120 selfclosing = v ;
121 }
122 elseif v.closed then
123 print("close",v)
124 leaf.endtag = v
125 leaf = leaf.parent
126 else
127 print("open",v)
128
129 local newleaf = {
130 parent = leaf ;
131 starttag = v ;
132 }
133 leaf [ #leaf + 1 ] = newleaf
134 leaf = newleaf
135 end
136 end
137 assert ( leaf == root , "Mismatched opening/closing tags" )
138 return root;
139 end
140
141 local function getattributes ( starttag )
142 local str = tostring ( starttag ) 94 local str = tostring ( starttag )
95 local attr = { }
143 96
144 local attr = { }
145 local elem = str:match ( "[^%s=></]+" ) 97 local elem = str:match ( "[^%s=></]+" )
146 for name , quote, attvalue in str:gmatch ( [=[([^%s=<]+)%s*=%s*(["'])([^"]*)%2]=] ) do 98 for name , quote, attvalue in str:gmatch ( [=[([^%s=<]+)%s*=%s*(["'])([^"]*)%2]=] ) do
147 attr [ name ] = attvalue 99 attr [ name ] = attvalue
148 end 100 end
149 return elem , attr 101 return elem , attr
150 end 102 end
151 103
104 local stanza_mt = {
105 __index = function ( t , k )
106 if k == "name" or k == "attr" then
107 local elem , attr = process_starttag ( t.opentag )
108 rawset ( t , "name" , elem )
109 rawset ( t , "attr" , attr )
110 return rawget ( t , k )
111 else
112 return stanza_methods [ k ]
113 end
114 end ;
115 __tostring = function ( t )
116 local opentag = t.opentag
117 local endtag = assert ( rawget ( t , "endtag" ) or rawget ( t , "selfclosing" ) and t.opentag )
118 return getstring ( opentag.stringindex , opentag.startmsg , opentag.start , endtag.finishmsg , endtag.finish )
119 end ;
120 }
121
122 local function new_stanza ( )
123 return setmetatable ( { tags = { } } , stanza_mt )
124 end
125
126 local function add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , new_startmsg , new_start )
127 if last_finishmsg ~= new_startmsg and last_finish + 1 ~= new_start - 1 then -- Don't bother with obviously zero length strings
128 tblinsert ( leaf , setmetatable ( {
129 stringindex = stringindex ;
130 startmsg = last_finishmsg ; start = last_finish + 1 ;
131 finishmsg = new_startmsg ; finish = new_start - 1 ;
132 } , m_mt ) )
133 end
134 end
135
136 local function tagindex_to_tree(indices)
137 local root = { tags = { } }
138 local leaf = root
139 local stringindex = indices.stringindex
140
141 local last_finishmsg , last_finish = 1 , 1
142
143 for k ,v in ipairs ( indices ) do
144 add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , v.startmsg , v.start )
145 last_finishmsg = v.finishmsg
146 last_finish = v.finish
147
148 if v.selfclosed then
149 local newleaf = new_stanza ( )
150 newleaf.opentag = v
151 newleaf.selfclosing = true
152 newleaf.parent = leaf
153 tblinsert ( leaf , newleaf )
154 tblinsert ( leaf.tags , newleaf )
155 elseif v.closed then -- Close tag
156 leaf.endtag = v
157 leaf = leaf.parent
158 else -- Open tag
159 local newleaf = new_stanza ( )
160 newleaf.opentag = v
161 newleaf.parent = leaf
162 tblinsert ( leaf , newleaf )
163 tblinsert ( leaf.tags , newleaf )
164
165 leaf = newleaf
166 end
167 end
168 add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , #stringindex , -1 )
169
170 assert ( leaf == root , "Mismatched opening/closing tags" )
171 return root;
172 end
173
152 return { 174 return {
153 index = index; 175 index = index;
154 tagindex_to_tree = tagindex_to_tree; 176 tagindex_to_tree = tagindex_to_tree;
155 getattributes = getattributes;
156 getstring = getstring;
157 }; 177 };

mercurial