util/xmllex.lua

changeset 3992
de77ec2b49bc
parent 3991
7a2856c8ab7a
child 3995
7214dc7a5642
equal deleted inserted replaced
3991:7a2856c8ab7a 3992:de77ec2b49bc
1 local ipairs , pairs , setmetatable = ipairs , pairs , setmetatable 1 local assert, ipairs , pairs , setmetatable , rawget , rawset , tostring =
2 assert, ipairs , pairs , setmetatable , rawget , rawset , tostring
2 local strsub = string.sub 3 local strsub = string.sub
3 local tblconcat = table.concat 4 local tblconcat = table.concat
4 local tblinsert = table.insert 5 local tblinsert = table.insert
5 6
6 local function getstring ( stringindex , startmsg , startpos , finishmsg , finishpos ) 7 local function getstring ( msgs , startpos , finishpos )
7 if startmsg == finishmsg then --All originated in same string 8 if #msgs == 1 then --All originated in same string
8 return strsub ( stringindex [ startmsg ] , startpos , finishpos ) 9 return strsub ( msgs[1] , startpos , finishpos )
9 else -- Over multiple source strings 10 else -- Over multiple source strings
10 return strsub ( stringindex [ startmsg ] , startpos , -1 ) 11 return strsub ( msgs[1] , startpos , -1 )
11 .. tblconcat ( stringindex , "" , startmsg + 1 , finishmsg - 1 ) 12 .. tblconcat ( msgs , "" , 2 , #msgs - 1 )
12 .. strsub ( stringindex [ finishmsg ] , 1 , finishpos ) 13 .. strsub ( msgs[#msgs] , 1 , finishpos )
13 end 14 end
14 end 15 end
15 16
16 local m_mt = { 17 local m_mt = {
17 __tostring = function ( v ) 18 __tostring = function ( v )
18 local str = v.stringform 19 local str = v.stringform
19 if str then 20 if str then
20 return str 21 return str
21 else 22 else
22 str = getstring ( v.stringindex , v.startmsg , v.start , v.finishmsg , v.finish ) 23 str = getstring ( v.msgs , v.start , v.finish )
23 rawset ( v , "stringform" , str ) 24 v.stringform = str
24 return str 25 return str
25 end 26 end
26 end 27 end
27 } 28 }
28 29
29 local function index ( str , r ) 30 local function handleoutside ( str , r , initial )
30 local stringindex 31 local a , b , close = str:find ( "<(/?)" , initial )
31 local curstr , nexti 32 if not a then
32 if r then 33 r.state = "outside"
33 stringindex = r.stringindex or { } 34 return false
34 curstr = #stringindex + 1
35 stringindex [ curstr ] = str
36
37 nexti = #r
38 else
39 stringindex = { str }
40 curstr = 1
41
42 r = { stringindex = stringindex }
43
44 nexti = 0
45 end 35 end
46 36
47 local m 37 --Finalise text object
48 do 38 local m = r[#r]
49 local t = r [ nexti ] 39 m.finish = a - 1
50 if t and not t.finish then 40 m.type = "text"
51 m = t 41
52 else 42 local m = setmetatable ( {
53 m = setmetatable ( { stringindex = stringindex } , m_mt ) 43 msgs = { str } ;
54 nexti = nexti + 1 44 start = a ;
55 end 45 starte = b + 1 ;
46 } , m_mt )
47
48 if close ~= "/" then
49 r.depth = r.depth + 1
50 m.type = "open"
51 else
52 r.depth = r.depth - 1
53 m.type = "close"
56 end 54 end
57 55
58 local d = 0 56 tblinsert ( r , m )
59 while true do
60 local a , b , c , close , selfclosing
61 57
62 if not m.start then 58 r.state = "inside"
63 a , b , close = str:find ( "<(/?)" , d ) 59 return true
64 60 end
65 if not a then break end 61
66 m.startmsg = curstr 62 local function handleinside ( str, r , initial )
67 m.start = a 63 local c , d , selfclosing = str:find ( "(/?)>" , initial )
68 m.starte = b 64 if not c then
69 if close == "/" then 65 r.state = "inside"
70 m.closed = true 66 return false
71 else 67 end
72 m.closed = false 68
69 local m = r[#r]
70 m.finish = d
71 m.finishs = c - 1
72 if selfclosing == "/" then
73 m.type = "selfclosing"
74 r.depth = r.depth - 1
75 end
76
77 local m = setmetatable ( {
78 msgs = { str } ;
79 start = d + 1 ;
80 type = "text" ;
81 } , m_mt )
82 tblinsert ( r , m )
83
84 r.state = "outside"
85 return true
86 end
87
88 local function index ( str , r )
89 r = r or { depth = 0, state = "outside" }
90
91 if not r[#r] then
92 r[1] = setmetatable ( {
93 msgs = { str } ;
94 type = "text" ;
95 start = 1 ;
96 } , m_mt )
97 else
98 tblinsert ( r[#r].msgs , str )
99 end
100
101 repeat
102 if r.state == "outside" then
103 if not handleoutside ( str , r , r[#r].start ) then
104 break
105 end
106 else
107 if not handleinside ( str , r , r[#r].start ) then
108 break
73 end 109 end
74 end 110 end
75 111 until false
76 r [ nexti ] = m 112
77 nexti = nexti + 1
78
79 c , d , selfclosing = str:find ( "(/?)>" , b )
80 if not c then
81 return false , r
82 end
83 m.finishmsg = curstr
84 m.finish = d
85 m.finishs = c
86 m.selfclosed = selfclosing == "/"
87
88 m = setmetatable ( { stringindex = stringindex } , m_mt )
89 end
90 return r 113 return r
91 end 114 end
92 115
93 local function process_starttag ( starttag ) 116 local function process_starttag ( starttag )
94 local str = tostring ( starttag ) 117 local str = tostring ( starttag )
107 local elem , attr = process_starttag ( t.opentag ) 130 local elem , attr = process_starttag ( t.opentag )
108 rawset ( t , "name" , elem ) 131 rawset ( t , "name" , elem )
109 rawset ( t , "attr" , attr ) 132 rawset ( t , "attr" , attr )
110 return rawget ( t , k ) 133 return rawget ( t , k )
111 else 134 else
135 print("METHOD",k)
112 return stanza_methods [ k ] 136 return stanza_methods [ k ]
113 end 137 end
114 end ; 138 end ;
115 __tostring = function ( t ) 139 __tostring = function ( t )
116 local opentag = t.opentag 140 local opentag = t.opentag
117 local endtag = assert ( rawget ( t , "endtag" ) or rawget ( t , "selfclosing" ) and t.opentag ) 141 local endtag = assert ( rawget ( t , "endtag" ) or rawget ( t , "selfclosing" ) and t.opentag )
118 return getstring ( opentag.stringindex , opentag.startmsg , opentag.start , endtag.finishmsg , endtag.finish ) 142 return getstring ( opentag.msgs , opentag.start , endtag.finish )
119 end ; 143 end ;
120 } 144 }
121 145
122 local function new_stanza ( ) 146 local function new_stanza ( )
123 return setmetatable ( { tags = { } } , stanza_mt ) 147 return setmetatable ( { tags = { } } , stanza_mt )
124 end 148 end
125 149
126 local function add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , new_startmsg , new_start )
127 if last_finishmsg ~= new_startmsg and last_finish + 1 ~= new_start - 1 then -- Don't bother with obviously zero length strings
128 tblinsert ( leaf , setmetatable ( {
129 stringindex = stringindex ;
130 startmsg = last_finishmsg ; start = last_finish + 1 ;
131 finishmsg = new_startmsg ; finish = new_start - 1 ;
132 } , m_mt ) )
133 end
134 end
135
136 local function tagindex_to_tree(indices) 150 local function tagindex_to_tree(indices)
137 local root = { tags = { } } 151 local root = { tags = { } }
138 local leaf = root 152 local leaf = root
139 local stringindex = indices.stringindex
140
141 local last_finishmsg , last_finish = 1 , 1
142 153
143 for k ,v in ipairs ( indices ) do 154 for k ,v in ipairs ( indices ) do
144 add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , v.startmsg , v.start ) 155 if v.type == "selfclosing" then
145 last_finishmsg = v.finishmsg
146 last_finish = v.finish
147
148 if v.selfclosed then
149 local newleaf = new_stanza ( ) 156 local newleaf = new_stanza ( )
150 newleaf.opentag = v 157 newleaf.opentag = v
151 newleaf.selfclosing = true 158 newleaf.selfclosing = true
152 newleaf.parent = leaf 159 newleaf.parent = leaf
160
153 tblinsert ( leaf , newleaf ) 161 tblinsert ( leaf , newleaf )
154 tblinsert ( leaf.tags , newleaf ) 162 tblinsert ( leaf.tags , newleaf )
155 elseif v.closed then -- Close tag 163 elseif v.type == "close" then -- Close tag
156 leaf.endtag = v 164 leaf.endtag = v
157 leaf = leaf.parent 165 leaf = leaf.parent
166 elseif v.type == "text" then
167 tblinsert ( leaf, v )
158 else -- Open tag 168 else -- Open tag
159 local newleaf = new_stanza ( ) 169 local newleaf = new_stanza ( )
160 newleaf.opentag = v 170 newleaf.opentag = v
161 newleaf.parent = leaf 171 newleaf.parent = leaf
172
162 tblinsert ( leaf , newleaf ) 173 tblinsert ( leaf , newleaf )
163 tblinsert ( leaf.tags , newleaf ) 174 tblinsert ( leaf.tags , newleaf )
164 175
165 leaf = newleaf 176 leaf = newleaf
166 end 177 end
167 end 178 end
168 add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , #stringindex , -1 )
169 179
170 assert ( leaf == root , "Mismatched opening/closing tags" ) 180 assert ( leaf == root , "Mismatched opening/closing tags" )
171 return root; 181 return root;
172 end 182 end
173 183

mercurial