Mon, 03 Jan 2011 18:31:08 +0000
Tree structure now similar to prosody stanza format
3990 | 1 | local ipairs , pairs , setmetatable = ipairs , pairs , setmetatable |
2 | local strsub = string.sub | |
3 | local tblconcat = table.concat | |
3991
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
4 | local tblinsert = table.insert |
3990 | 5 | |
6 | local function getstring ( stringindex , startmsg , startpos , finishmsg , finishpos ) | |
7 | if startmsg == finishmsg then --All originated in same string | |
8 | return strsub ( stringindex [ startmsg ] , startpos , finishpos ) | |
9 | else -- Over multiple source strings | |
10 | return strsub ( stringindex [ startmsg ] , startpos , -1 ) | |
11 | .. tblconcat ( stringindex , "" , startmsg + 1 , finishmsg - 1 ) | |
12 | .. strsub ( stringindex [ finishmsg ] , 1 , finishpos ) | |
13 | end | |
14 | end | |
15 | ||
16 | local m_mt = { | |
17 | __tostring = function ( v ) | |
3991
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
18 | local str = v.stringform |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
19 | if str then |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
20 | return str |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
21 | else |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
22 | str = getstring ( v.stringindex , v.startmsg , v.start , v.finishmsg , v.finish ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
23 | rawset ( v , "stringform" , str ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
24 | return str |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
25 | end |
3990 | 26 | end |
27 | } | |
28 | ||
29 | local function index ( str , r ) | |
30 | local stringindex | |
31 | local curstr , nexti | |
32 | if r then | |
33 | stringindex = r.stringindex or { } | |
34 | curstr = #stringindex + 1 | |
35 | stringindex [ curstr ] = str | |
36 | ||
37 | nexti = #r | |
38 | else | |
39 | stringindex = { str } | |
40 | curstr = 1 | |
41 | ||
42 | r = { stringindex = stringindex } | |
43 | ||
44 | nexti = 0 | |
45 | end | |
46 | ||
47 | local m | |
48 | do | |
49 | local t = r [ nexti ] | |
50 | if t and not t.finish then | |
51 | m = t | |
52 | else | |
53 | m = setmetatable ( { stringindex = stringindex } , m_mt ) | |
54 | nexti = nexti + 1 | |
55 | end | |
56 | end | |
57 | ||
58 | local d = 0 | |
59 | while true do | |
60 | local a , b , c , close , selfclosing | |
61 | ||
62 | if not m.start then | |
63 | a , b , close = str:find ( "<(/?)" , d ) | |
64 | ||
65 | if not a then break end | |
66 | m.startmsg = curstr | |
67 | m.start = a | |
68 | m.starte = b | |
69 | if close == "/" then | |
70 | m.closed = true | |
71 | else | |
72 | m.closed = false | |
73 | end | |
74 | end | |
75 | ||
76 | r [ nexti ] = m | |
77 | nexti = nexti + 1 | |
78 | ||
79 | c , d , selfclosing = str:find ( "(/?)>" , b ) | |
80 | if not c then | |
81 | return false , r | |
82 | end | |
83 | m.finishmsg = curstr | |
84 | m.finish = d | |
85 | m.finishs = c | |
86 | m.selfclosed = selfclosing == "/" | |
87 | ||
88 | m = setmetatable ( { stringindex = stringindex } , m_mt ) | |
89 | end | |
90 | return r | |
91 | end | |
92 | ||
3991
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
93 | local function process_starttag ( starttag ) |
3990 | 94 | local str = tostring ( starttag ) |
3991
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
95 | local attr = { } |
3990 | 96 | |
97 | local elem = str:match ( "[^%s=></]+" ) | |
98 | for name , quote, attvalue in str:gmatch ( [=[([^%s=<]+)%s*=%s*(["'])([^"]*)%2]=] ) do | |
99 | attr [ name ] = attvalue | |
100 | end | |
101 | return elem , attr | |
102 | end | |
103 | ||
3991
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
104 | local stanza_mt = { |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
105 | __index = function ( t , k ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
106 | if k == "name" or k == "attr" then |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
107 | local elem , attr = process_starttag ( t.opentag ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
108 | rawset ( t , "name" , elem ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
109 | rawset ( t , "attr" , attr ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
110 | return rawget ( t , k ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
111 | else |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
112 | return stanza_methods [ k ] |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
113 | end |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
114 | end ; |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
115 | __tostring = function ( t ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
116 | local opentag = t.opentag |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
117 | local endtag = assert ( rawget ( t , "endtag" ) or rawget ( t , "selfclosing" ) and t.opentag ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
118 | return getstring ( opentag.stringindex , opentag.startmsg , opentag.start , endtag.finishmsg , endtag.finish ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
119 | end ; |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
120 | } |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
121 | |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
122 | local function new_stanza ( ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
123 | return setmetatable ( { tags = { } } , stanza_mt ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
124 | end |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
125 | |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
126 | local function add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , new_startmsg , new_start ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
127 | if last_finishmsg ~= new_startmsg and last_finish + 1 ~= new_start - 1 then -- Don't bother with obviously zero length strings |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
128 | tblinsert ( leaf , setmetatable ( { |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
129 | stringindex = stringindex ; |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
130 | startmsg = last_finishmsg ; start = last_finish + 1 ; |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
131 | finishmsg = new_startmsg ; finish = new_start - 1 ; |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
132 | } , m_mt ) ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
133 | end |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
134 | end |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
135 | |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
136 | local function tagindex_to_tree(indices) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
137 | local root = { tags = { } } |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
138 | local leaf = root |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
139 | local stringindex = indices.stringindex |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
140 | |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
141 | local last_finishmsg , last_finish = 1 , 1 |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
142 | |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
143 | for k ,v in ipairs ( indices ) do |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
144 | add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , v.startmsg , v.start ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
145 | last_finishmsg = v.finishmsg |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
146 | last_finish = v.finish |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
147 | |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
148 | if v.selfclosed then |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
149 | local newleaf = new_stanza ( ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
150 | newleaf.opentag = v |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
151 | newleaf.selfclosing = true |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
152 | newleaf.parent = leaf |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
153 | tblinsert ( leaf , newleaf ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
154 | tblinsert ( leaf.tags , newleaf ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
155 | elseif v.closed then -- Close tag |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
156 | leaf.endtag = v |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
157 | leaf = leaf.parent |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
158 | else -- Open tag |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
159 | local newleaf = new_stanza ( ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
160 | newleaf.opentag = v |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
161 | newleaf.parent = leaf |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
162 | tblinsert ( leaf , newleaf ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
163 | tblinsert ( leaf.tags , newleaf ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
164 | |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
165 | leaf = newleaf |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
166 | end |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
167 | end |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
168 | add_inbetween ( stringindex , leaf , last_finishmsg , last_finish , #stringindex , -1 ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
169 | |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
170 | assert ( leaf == root , "Mismatched opening/closing tags" ) |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
171 | return root; |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
172 | end |
7a2856c8ab7a
Tree structure now similar to prosody stanza format
Daurnimator <quae@daurnimator.com>
parents:
3990
diff
changeset
|
173 | |
3990 | 174 | return { |
175 | index = index; | |
176 | tagindex_to_tree = tagindex_to_tree; | |
177 | }; |