1 local ipairs , pairs , setmetatable = ipairs , pairs , setmetatable |
1 local assert, ipairs , pairs , setmetatable , rawget , rawset , tostring = |
|
2 assert, ipairs , pairs , setmetatable , rawget , rawset , tostring |
2 local strsub = string.sub |
3 local strsub = string.sub |
3 local tblconcat = table.concat |
4 local tblconcat = table.concat |
4 local tblinsert = table.insert |
5 local tblinsert = table.insert |
5 |
6 |
6 local function getstring ( stringindex , startmsg , startpos , finishmsg , finishpos ) |
7 local function getstring ( msgs , startpos , finishpos ) |
7 if startmsg == finishmsg then --All originated in same string |
8 if #msgs == 1 then --All originated in same string |
8 return strsub ( stringindex [ startmsg ] , startpos , finishpos ) |
9 return strsub ( msgs[1] , startpos , finishpos ) |
9 else -- Over multiple source strings |
10 else -- Over multiple source strings |
10 return strsub ( stringindex [ startmsg ] , startpos , -1 ) |
11 return strsub ( msgs[1] , startpos , -1 ) |
11 .. tblconcat ( stringindex , "" , startmsg + 1 , finishmsg - 1 ) |
12 .. tblconcat ( msgs , "" , 2 , #msgs - 1 ) |
12 .. strsub ( stringindex [ finishmsg ] , 1 , finishpos ) |
13 .. strsub ( msgs[#msgs] , 1 , finishpos ) |
13 end |
14 end |
14 end |
15 end |
15 |
16 |
16 local m_mt = { |
17 local m_mt = { |
17 __tostring = function ( v ) |
18 __tostring = function ( v ) |
18 local str = v.stringform |
19 local str = v.stringform |
19 if str then |
20 if str then |
20 return str |
21 return str |
21 else |
22 else |
22 str = getstring ( v.stringindex , v.startmsg , v.start , v.finishmsg , v.finish ) |
23 str = getstring ( v.msgs , v.start , v.finish ) |
23 rawset ( v , "stringform" , str ) |
24 v.stringform = str |
24 return str |
25 return str |
25 end |
26 end |
26 end |
27 end |
27 } |
28 } |
28 |
29 |
29 local function index ( str , r ) |
30 local function handleoutside ( str , r , initial ) |
30 local stringindex |
31 local a , b , close = str:find ( "<(/?)" , initial ) |
31 local curstr , nexti |
32 if not a then |
32 if r then |
33 r.state = "outside" |
33 stringindex = r.stringindex or { } |
34 return false |
34 curstr = #stringindex + 1 |
|
35 stringindex [ curstr ] = str |
|
36 |
|
37 nexti = #r |
|
38 else |
|
39 stringindex = { str } |
|
40 curstr = 1 |
|
41 |
|
42 r = { stringindex = stringindex } |
|
43 |
|
44 nexti = 0 |
|
45 end |
35 end |
46 |
36 |
47 local m |
37 --Finalise text object |
48 do |
38 local m = r[#r] |
49 local t = r [ nexti ] |
39 m.finish = a - 1 |
50 if t and not t.finish then |
40 m.type = "text" |
51 m = t |
41 |
52 else |
42 local m = setmetatable ( { |
53 m = setmetatable ( { stringindex = stringindex } , m_mt ) |
43 msgs = { str } ; |
54 nexti = nexti + 1 |
44 start = a ; |
55 end |
45 starte = b + 1 ; |
|
46 } , m_mt ) |
|
47 |
|
48 if close ~= "/" then |
|
49 r.depth = r.depth + 1 |
|
50 m.type = "open" |
|
51 else |
|
52 r.depth = r.depth - 1 |
|
53 m.type = "close" |
56 end |
54 end |
57 |
55 |
58 local d = 0 |
56 tblinsert ( r , m ) |
59 while true do |
|
60 local a , b , c , close , selfclosing |
|
61 |
57 |
62 if not m.start then |
58 r.state = "inside" |
63 a , b , close = str:find ( "<(/?)" , d ) |
59 return true |
64 |
60 end |
65 if not a then break end |
61 |
66 m.startmsg = curstr |
62 local function handleinside ( str, r , initial ) |
67 m.start = a |
63 local c , d , selfclosing = str:find ( "(/?)>" , initial ) |
68 m.starte = b |
64 if not c then |
69 if close == "/" then |
65 r.state = "inside" |
70 m.closed = true |
66 return false |
71 else |
67 end |
72 m.closed = false |
68 |
|
69 local m = r[#r] |
|
70 m.finish = d |
|
71 m.finishs = c - 1 |
|
72 if selfclosing == "/" then |
|
73 m.type = "selfclosing" |
|
74 r.depth = r.depth - 1 |
|
75 end |
|
76 |
|
77 local m = setmetatable ( { |
|
78 msgs = { str } ; |
|
79 start = d + 1 ; |
|
80 type = "text" ; |
|
81 } , m_mt ) |
|
82 tblinsert ( r , m ) |
|
83 |
|
84 r.state = "outside" |
|
85 return true |
|
86 end |
|
87 |
|
88 local function index ( str , r ) |
|
89 r = r or { depth = 0, state = "outside" } |
|
90 |
|
91 if not r[#r] then |
|
92 r[1] = setmetatable ( { |
|
93 msgs = { str } ; |
|
94 type = "text" ; |
|
95 start = 1 ; |
|
96 } , m_mt ) |
|
97 else |
|
98 tblinsert ( r[#r].msgs , str ) |
|
99 end |
|
100 |
|
101 repeat |
|
102 if r.state == "outside" then |
|
103 if not handleoutside ( str , r , r[#r].start ) then |
|
104 break |
|
105 end |
|
106 else |
|
107 if not handleinside ( str , r , r[#r].start ) then |
|
108 break |
73 end |
109 end |
74 end |
110 end |
75 |
111 until false |
76 r [ nexti ] = m |
112 |
77 nexti = nexti + 1 |
|
78 |
|
79 c , d , selfclosing = str:find ( "(/?)>" , b ) |
|
80 if not c then |
|
81 return false , r |
|
82 end |
|
83 m.finishmsg = curstr |
|
84 m.finish = d |
|
85 m.finishs = c |
|
86 m.selfclosed = selfclosing == "/" |
|
87 |
|
88 m = setmetatable ( { stringindex = stringindex } , m_mt ) |
|
89 end |
|
90 return r |
113 return r |
91 end |
114 end |
92 |
115 |
93 local function process_starttag ( starttag ) |
116 local function process_starttag ( starttag ) |
94 local str = tostring ( starttag ) |
117 local str = tostring ( starttag ) |
107 local elem , attr = process_starttag ( t.opentag ) |
130 local elem , attr = process_starttag ( t.opentag ) |
108 rawset ( t , "name" , elem ) |
131 rawset ( t , "name" , elem ) |
109 rawset ( t , "attr" , attr ) |
132 rawset ( t , "attr" , attr ) |
110 return rawget ( t , k ) |
133 return rawget ( t , k ) |
111 else |
134 else |
|
135 print("METHOD",k) |
112 return stanza_methods [ k ] |
136 return stanza_methods [ k ] |
113 end |
137 end |
114 end ; |
138 end ; |
115 __tostring = function ( t ) |
139 __tostring = function ( t ) |
116 local opentag = t.opentag |
140 local opentag = t.opentag |
117 local endtag = assert ( rawget ( t , "endtag" ) or rawget ( t , "selfclosing" ) and t.opentag ) |
141 local endtag = assert ( rawget ( t , "endtag" ) or rawget ( t , "selfclosing" ) and t.opentag ) |
118 return getstring ( opentag.stringindex , opentag.startmsg , opentag.start , endtag.finishmsg , endtag.finish ) |
142 return getstring ( opentag.msgs , opentag.start , endtag.finish ) |
119 end ; |
143 end ; |
120 } |
144 } |
121 |
145 |
122 local function new_stanza ( ) |
146 local function new_stanza ( ) |
123 return setmetatable ( { tags = { } } , stanza_mt ) |
147 return setmetatable ( { tags = { } } , stanza_mt ) |
124 end |
148 end |
125 |
149 |
126 local function add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , new_startmsg , new_start ) |
|
127 if last_finishmsg ~= new_startmsg and last_finish + 1 ~= new_start - 1 then -- Don't bother with obviously zero length strings |
|
128 tblinsert ( leaf , setmetatable ( { |
|
129 stringindex = stringindex ; |
|
130 startmsg = last_finishmsg ; start = last_finish + 1 ; |
|
131 finishmsg = new_startmsg ; finish = new_start - 1 ; |
|
132 } , m_mt ) ) |
|
133 end |
|
134 end |
|
135 |
|
136 local function tagindex_to_tree(indices) |
150 local function tagindex_to_tree(indices) |
137 local root = { tags = { } } |
151 local root = { tags = { } } |
138 local leaf = root |
152 local leaf = root |
139 local stringindex = indices.stringindex |
|
140 |
|
141 local last_finishmsg , last_finish = 1 , 1 |
|
142 |
153 |
143 for k ,v in ipairs ( indices ) do |
154 for k ,v in ipairs ( indices ) do |
144 add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , v.startmsg , v.start ) |
155 if v.type == "selfclosing" then |
145 last_finishmsg = v.finishmsg |
|
146 last_finish = v.finish |
|
147 |
|
148 if v.selfclosed then |
|
149 local newleaf = new_stanza ( ) |
156 local newleaf = new_stanza ( ) |
150 newleaf.opentag = v |
157 newleaf.opentag = v |
151 newleaf.selfclosing = true |
158 newleaf.selfclosing = true |
152 newleaf.parent = leaf |
159 newleaf.parent = leaf |
|
160 |
153 tblinsert ( leaf , newleaf ) |
161 tblinsert ( leaf , newleaf ) |
154 tblinsert ( leaf.tags , newleaf ) |
162 tblinsert ( leaf.tags , newleaf ) |
155 elseif v.closed then -- Close tag |
163 elseif v.type == "close" then -- Close tag |
156 leaf.endtag = v |
164 leaf.endtag = v |
157 leaf = leaf.parent |
165 leaf = leaf.parent |
|
166 elseif v.type == "text" then |
|
167 tblinsert ( leaf, v ) |
158 else -- Open tag |
168 else -- Open tag |
159 local newleaf = new_stanza ( ) |
169 local newleaf = new_stanza ( ) |
160 newleaf.opentag = v |
170 newleaf.opentag = v |
161 newleaf.parent = leaf |
171 newleaf.parent = leaf |
|
172 |
162 tblinsert ( leaf , newleaf ) |
173 tblinsert ( leaf , newleaf ) |
163 tblinsert ( leaf.tags , newleaf ) |
174 tblinsert ( leaf.tags , newleaf ) |
164 |
175 |
165 leaf = newleaf |
176 leaf = newleaf |
166 end |
177 end |
167 end |
178 end |
168 add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , #stringindex , -1 ) |
|
169 |
179 |
170 assert ( leaf == root , "Mismatched opening/closing tags" ) |
180 assert ( leaf == root , "Mismatched opening/closing tags" ) |
171 return root; |
181 return root; |
172 end |
182 end |
173 |
183 |