util/xmllex.lua

Mon, 03 Jan 2011 16:47:06 +0000

author
Matthew Wild <mwild1@gmail.com>
date
Mon, 03 Jan 2011 16:47:06 +0000
changeset 3990
783004a12224
child 3991
7a2856c8ab7a
permissions
-rw-r--r--

util.xmllex: Add

3990
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
1 a=[[
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
2 <iq type='set'
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
3 from='francisco@denmark.lit/barracks'
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
4 to='pubsub.shakespeare.lit'
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
5 id='sub1'>
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
6 <pubsub xmlns='http://jabber.org/protocol/pubsub'>
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
7 <subscribe
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
8 node='princely_musings'
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
9 jid='francisco@denmark.lit'/>
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
10 </pubsub>
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
11 </iq>
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
12 ]]
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
13 b=[[
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
14 <message>
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
15 <body>Wow, I&apos;m green with envy!</body>
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
16 <html xmlns='http://jabber.org/protocol/xhtml-im'>
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
17 <body xmlns='http://www.w3.org/1999/xhtml'>
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
18 <p style='font-size:large'>
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
19 <em>Wow</em>, I&apos;m <span style='color:green'>green</span>
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
20 with <strong>envy</strong>!
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
21 </p>
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
22 </body>
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
23 </html>
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
24 </message>
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
25 ]]
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
26
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
27 local ipairs , pairs , setmetatable = ipairs , pairs , setmetatable
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
28 local strsub = string.sub
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
29 local tblconcat = table.concat
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
30
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
31 local function getstring ( stringindex , startmsg , startpos , finishmsg , finishpos )
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
32 if startmsg == finishmsg then --All originated in same string
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
33 return strsub ( stringindex [ startmsg ] , startpos , finishpos )
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
34 else -- Over multiple source strings
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
35 return strsub ( stringindex [ startmsg ] , startpos , -1 )
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
36 .. tblconcat ( stringindex , "" , startmsg + 1 , finishmsg - 1 )
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
37 .. strsub ( stringindex [ finishmsg ] , 1 , finishpos )
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
38 end
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
39 end
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
40
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
41 local m_mt = {
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
42 __tostring = function ( v )
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
43 return getstring ( v.stringindex , v.startmsg , v.starte + 1 , v.finishmsg , v.finishs - 1 )
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
44 end
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
45 }
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
46
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
47 local function index ( str , r )
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
48 local stringindex
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
49 local curstr , nexti
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
50 if r then
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
51 stringindex = r.stringindex or { }
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
52 curstr = #stringindex + 1
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
53 stringindex [ curstr ] = str
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
54
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
55 nexti = #r
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
56 else
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
57 stringindex = { str }
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
58 curstr = 1
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
59
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
60 r = { stringindex = stringindex }
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
61
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
62 nexti = 0
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
63 end
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
64
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
65 local m
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
66 do
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
67 local t = r [ nexti ]
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
68 if t and not t.finish then
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
69 m = t
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
70 else
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
71 m = setmetatable ( { stringindex = stringindex } , m_mt )
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
72 nexti = nexti + 1
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
73 end
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
74 end
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
75
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
76 local d = 0
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
77 while true do
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
78 local a , b , c , close , selfclosing
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
79
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
80 if not m.start then
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
81 a , b , close = str:find ( "<(/?)" , d )
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
82
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
83 if not a then break end
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
84 m.startmsg = curstr
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
85 m.start = a
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
86 m.starte = b
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
87 if close == "/" then
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
88 m.closed = true
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
89 else
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
90 m.closed = false
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
91 end
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
92 end
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
93
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
94 r [ nexti ] = m
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
95 nexti = nexti + 1
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
96
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
97 c , d , selfclosing = str:find ( "(/?)>" , b )
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
98 if not c then
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
99 return false , r
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
100 end
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
101 m.finishmsg = curstr
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
102 m.finish = d
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
103 m.finishs = c
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
104 m.selfclosed = selfclosing == "/"
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
105
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
106 m = setmetatable ( { stringindex = stringindex } , m_mt )
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
107 end
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
108 return r
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
109 end
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
110
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
111 local function tagindex_to_tree(indices)
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
112 local root = {}
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
113 local leaf = root
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
114 local stringindex = indices.stringindex
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
115 for k ,v in ipairs ( indices ) do
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
116 if v.selfclosed then
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
117 print("selfclosed",v)
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
118 leaf [ #leaf + 1 ] = {
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
119 parent = leaf ;
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
120 selfclosing = v ;
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
121 }
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
122 elseif v.closed then
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
123 print("close",v)
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
124 leaf.endtag = v
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
125 leaf = leaf.parent
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
126 else
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
127 print("open",v)
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
128
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
129 local newleaf = {
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
130 parent = leaf ;
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
131 starttag = v ;
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
132 }
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
133 leaf [ #leaf + 1 ] = newleaf
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
134 leaf = newleaf
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
135 end
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
136 end
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
137 assert ( leaf == root , "Mismatched opening/closing tags" )
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
138 return root;
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
139 end
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
140
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
141 local function getattributes ( starttag )
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
142 local str = tostring ( starttag )
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
143
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
144 local attr = { }
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
145 local elem = str:match ( "[^%s=></]+" )
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
146 for name , quote, attvalue in str:gmatch ( [=[([^%s=<]+)%s*=%s*(["'])([^"]*)%2]=] ) do
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
147 attr [ name ] = attvalue
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
148 end
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
149 return elem , attr
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
150 end
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
151
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
152 return {
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
153 index = index;
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
154 tagindex_to_tree = tagindex_to_tree;
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
155 getattributes = getattributes;
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
156 getstring = getstring;
783004a12224 util.xmllex: Add
Matthew Wild <mwild1@gmail.com>
parents:
diff changeset
157 };

mercurial