util/xmllex.lua

changeset 3990
783004a12224
child 3991
7a2856c8ab7a
equal deleted inserted replaced
3989:692d221ef9bd 3990:783004a12224
1 a=[[
2 <iq type='set'
3 from='francisco@denmark.lit/barracks'
4 to='pubsub.shakespeare.lit'
5 id='sub1'>
6 <pubsub xmlns='http://jabber.org/protocol/pubsub'>
7 <subscribe
8 node='princely_musings'
9 jid='francisco@denmark.lit'/>
10 </pubsub>
11 </iq>
12 ]]
13 b=[[
14 <message>
15 <body>Wow, I&apos;m green with envy!</body>
16 <html xmlns='http://jabber.org/protocol/xhtml-im'>
17 <body xmlns='http://www.w3.org/1999/xhtml'>
18 <p style='font-size:large'>
19 <em>Wow</em>, I&apos;m <span style='color:green'>green</span>
20 with <strong>envy</strong>!
21 </p>
22 </body>
23 </html>
24 </message>
25 ]]
26
27 local ipairs , pairs , setmetatable = ipairs , pairs , setmetatable
28 local strsub = string.sub
29 local tblconcat = table.concat
30
31 local function getstring ( stringindex , startmsg , startpos , finishmsg , finishpos )
32 if startmsg == finishmsg then --All originated in same string
33 return strsub ( stringindex [ startmsg ] , startpos , finishpos )
34 else -- Over multiple source strings
35 return strsub ( stringindex [ startmsg ] , startpos , -1 )
36 .. tblconcat ( stringindex , "" , startmsg + 1 , finishmsg - 1 )
37 .. strsub ( stringindex [ finishmsg ] , 1 , finishpos )
38 end
39 end
40
41 local m_mt = {
42 __tostring = function ( v )
43 return getstring ( v.stringindex , v.startmsg , v.starte + 1 , v.finishmsg , v.finishs - 1 )
44 end
45 }
46
47 local function index ( str , r )
48 local stringindex
49 local curstr , nexti
50 if r then
51 stringindex = r.stringindex or { }
52 curstr = #stringindex + 1
53 stringindex [ curstr ] = str
54
55 nexti = #r
56 else
57 stringindex = { str }
58 curstr = 1
59
60 r = { stringindex = stringindex }
61
62 nexti = 0
63 end
64
65 local m
66 do
67 local t = r [ nexti ]
68 if t and not t.finish then
69 m = t
70 else
71 m = setmetatable ( { stringindex = stringindex } , m_mt )
72 nexti = nexti + 1
73 end
74 end
75
76 local d = 0
77 while true do
78 local a , b , c , close , selfclosing
79
80 if not m.start then
81 a , b , close = str:find ( "<(/?)" , d )
82
83 if not a then break end
84 m.startmsg = curstr
85 m.start = a
86 m.starte = b
87 if close == "/" then
88 m.closed = true
89 else
90 m.closed = false
91 end
92 end
93
94 r [ nexti ] = m
95 nexti = nexti + 1
96
97 c , d , selfclosing = str:find ( "(/?)>" , b )
98 if not c then
99 return false , r
100 end
101 m.finishmsg = curstr
102 m.finish = d
103 m.finishs = c
104 m.selfclosed = selfclosing == "/"
105
106 m = setmetatable ( { stringindex = stringindex } , m_mt )
107 end
108 return r
109 end
110
111 local function tagindex_to_tree(indices)
112 local root = {}
113 local leaf = root
114 local stringindex = indices.stringindex
115 for k ,v in ipairs ( indices ) do
116 if v.selfclosed then
117 print("selfclosed",v)
118 leaf [ #leaf + 1 ] = {
119 parent = leaf ;
120 selfclosing = v ;
121 }
122 elseif v.closed then
123 print("close",v)
124 leaf.endtag = v
125 leaf = leaf.parent
126 else
127 print("open",v)
128
129 local newleaf = {
130 parent = leaf ;
131 starttag = v ;
132 }
133 leaf [ #leaf + 1 ] = newleaf
134 leaf = newleaf
135 end
136 end
137 assert ( leaf == root , "Mismatched opening/closing tags" )
138 return root;
139 end
140
141 local function getattributes ( starttag )
142 local str = tostring ( starttag )
143
144 local attr = { }
145 local elem = str:match ( "[^%s=></]+" )
146 for name , quote, attvalue in str:gmatch ( [=[([^%s=<]+)%s*=%s*(["'])([^"]*)%2]=] ) do
147 attr [ name ] = attvalue
148 end
149 return elem , attr
150 end
151
152 return {
153 index = index;
154 tagindex_to_tree = tagindex_to_tree;
155 getattributes = getattributes;
156 getstring = getstring;
157 };

mercurial