Mon, 03 Jan 2011 16:47:06 +0000
util.xmllex: Add
3990 | 1 | a=[[ |
2 | <iq type='set' | |
3 | from='francisco@denmark.lit/barracks' | |
4 | to='pubsub.shakespeare.lit' | |
5 | id='sub1'> | |
6 | <pubsub xmlns='http://jabber.org/protocol/pubsub'> | |
7 | <subscribe | |
8 | node='princely_musings' | |
9 | jid='francisco@denmark.lit'/> | |
10 | </pubsub> | |
11 | </iq> | |
12 | ]] | |
13 | b=[[ | |
14 | <message> | |
15 | <body>Wow, I'm green with envy!</body> | |
16 | <html xmlns='http://jabber.org/protocol/xhtml-im'> | |
17 | <body xmlns='http://www.w3.org/1999/xhtml'> | |
18 | <p style='font-size:large'> | |
19 | <em>Wow</em>, I'm <span style='color:green'>green</span> | |
20 | with <strong>envy</strong>! | |
21 | </p> | |
22 | </body> | |
23 | </html> | |
24 | </message> | |
25 | ]] | |
26 | ||
27 | local ipairs , pairs , setmetatable = ipairs , pairs , setmetatable | |
28 | local strsub = string.sub | |
29 | local tblconcat = table.concat | |
30 | ||
31 | local function getstring ( stringindex , startmsg , startpos , finishmsg , finishpos ) | |
32 | if startmsg == finishmsg then --All originated in same string | |
33 | return strsub ( stringindex [ startmsg ] , startpos , finishpos ) | |
34 | else -- Over multiple source strings | |
35 | return strsub ( stringindex [ startmsg ] , startpos , -1 ) | |
36 | .. tblconcat ( stringindex , "" , startmsg + 1 , finishmsg - 1 ) | |
37 | .. strsub ( stringindex [ finishmsg ] , 1 , finishpos ) | |
38 | end | |
39 | end | |
40 | ||
41 | local m_mt = { | |
42 | __tostring = function ( v ) | |
43 | return getstring ( v.stringindex , v.startmsg , v.starte + 1 , v.finishmsg , v.finishs - 1 ) | |
44 | end | |
45 | } | |
46 | ||
47 | local function index ( str , r ) | |
48 | local stringindex | |
49 | local curstr , nexti | |
50 | if r then | |
51 | stringindex = r.stringindex or { } | |
52 | curstr = #stringindex + 1 | |
53 | stringindex [ curstr ] = str | |
54 | ||
55 | nexti = #r | |
56 | else | |
57 | stringindex = { str } | |
58 | curstr = 1 | |
59 | ||
60 | r = { stringindex = stringindex } | |
61 | ||
62 | nexti = 0 | |
63 | end | |
64 | ||
65 | local m | |
66 | do | |
67 | local t = r [ nexti ] | |
68 | if t and not t.finish then | |
69 | m = t | |
70 | else | |
71 | m = setmetatable ( { stringindex = stringindex } , m_mt ) | |
72 | nexti = nexti + 1 | |
73 | end | |
74 | end | |
75 | ||
76 | local d = 0 | |
77 | while true do | |
78 | local a , b , c , close , selfclosing | |
79 | ||
80 | if not m.start then | |
81 | a , b , close = str:find ( "<(/?)" , d ) | |
82 | ||
83 | if not a then break end | |
84 | m.startmsg = curstr | |
85 | m.start = a | |
86 | m.starte = b | |
87 | if close == "/" then | |
88 | m.closed = true | |
89 | else | |
90 | m.closed = false | |
91 | end | |
92 | end | |
93 | ||
94 | r [ nexti ] = m | |
95 | nexti = nexti + 1 | |
96 | ||
97 | c , d , selfclosing = str:find ( "(/?)>" , b ) | |
98 | if not c then | |
99 | return false , r | |
100 | end | |
101 | m.finishmsg = curstr | |
102 | m.finish = d | |
103 | m.finishs = c | |
104 | m.selfclosed = selfclosing == "/" | |
105 | ||
106 | m = setmetatable ( { stringindex = stringindex } , m_mt ) | |
107 | end | |
108 | return r | |
109 | end | |
110 | ||
111 | local function tagindex_to_tree(indices) | |
112 | local root = {} | |
113 | local leaf = root | |
114 | local stringindex = indices.stringindex | |
115 | for k ,v in ipairs ( indices ) do | |
116 | if v.selfclosed then | |
117 | print("selfclosed",v) | |
118 | leaf [ #leaf + 1 ] = { | |
119 | parent = leaf ; | |
120 | selfclosing = v ; | |
121 | } | |
122 | elseif v.closed then | |
123 | print("close",v) | |
124 | leaf.endtag = v | |
125 | leaf = leaf.parent | |
126 | else | |
127 | print("open",v) | |
128 | ||
129 | local newleaf = { | |
130 | parent = leaf ; | |
131 | starttag = v ; | |
132 | } | |
133 | leaf [ #leaf + 1 ] = newleaf | |
134 | leaf = newleaf | |
135 | end | |
136 | end | |
137 | assert ( leaf == root , "Mismatched opening/closing tags" ) | |
138 | return root; | |
139 | end | |
140 | ||
141 | local function getattributes ( starttag ) | |
142 | local str = tostring ( starttag ) | |
143 | ||
144 | local attr = { } | |
145 | local elem = str:match ( "[^%s=></]+" ) | |
146 | for name , quote, attvalue in str:gmatch ( [=[([^%s=<]+)%s*=%s*(["'])([^"]*)%2]=] ) do | |
147 | attr [ name ] = attvalue | |
148 | end | |
149 | return elem , attr | |
150 | end | |
151 | ||
152 | return { | |
153 | index = index; | |
154 | tagindex_to_tree = tagindex_to_tree; | |
155 | getattributes = getattributes; | |
156 | getstring = getstring; | |
157 | }; |