Fri, 17 Mar 2023 11:02:12 +0000
A range of fixes for Lua 5.2 support
1 | 1 | --[[-------------------------------------------------------------------- |
2 | ||
3 | optparser.lua: does parser-based optimizations | |
4 | This file is part of LuaSrcDiet. | |
5 | ||
6 | Copyright (c) 2008 Kein-Hong Man <khman@users.sf.net> | |
7 | The COPYRIGHT file describes the conditions | |
8 | under which this software may be distributed. | |
9 | ||
10 | See the ChangeLog for more information. | |
11 | ||
12 | ----------------------------------------------------------------------]] | |
13 | ||
14 | --[[-------------------------------------------------------------------- | |
15 | -- NOTES: | |
16 | -- * For more parser-based optimization ideas, see the TODO items or | |
17 | -- look at technotes.txt. | |
18 | -- * The processing load is quite significant, but since this is an | |
19 | -- off-line text processor, I believe we can wait a few seconds. | |
20 | -- * TODO: might process "local a,a,a" wrongly... need tests! | |
21 | -- * TODO: remove position handling if overlapped locals (rem < 0) | |
22 | -- needs more study, to check behaviour | |
23 | -- * TODO: there are probably better ways to do allocation, e.g. by | |
24 | -- choosing better methods to sort and pick locals... | |
25 | -- * TODO: we don't need 53*63 two-letter identifiers; we can make | |
26 | -- do with significantly less depending on how many that are really | |
27 | -- needed and improve entropy; e.g. 13 needed -> choose 4*4 instead | |
28 | ----------------------------------------------------------------------]] | |
29 | ||
30 | local base = _G | |
31 | local string = require "string" | |
32 | local table = require "table" | |
33 | ||
34 | ---------------------------------------------------------------------- | |
35 | -- Letter frequencies for reducing symbol entropy (fixed version) | |
36 | -- * Might help a wee bit when the output file is compressed | |
37 | -- * See Wikipedia: http://en.wikipedia.org/wiki/Letter_frequencies | |
38 | -- * We use letter frequencies according to a Linotype keyboard, plus | |
39 | -- the underscore, and both lower case and upper case letters. | |
40 | -- * The arrangement below (LC, underscore, %d, UC) is arbitrary. | |
41 | -- * This is certainly not optimal, but is quick-and-dirty and the | |
42 | -- process has no significant overhead | |
43 | ---------------------------------------------------------------------- | |
44 | ||
45 | local LETTERS = "etaoinshrdlucmfwypvbgkqjxz_ETAOINSHRDLUCMFWYPVBGKQJXZ" | |
46 | local ALPHANUM = "etaoinshrdlucmfwypvbgkqjxz_0123456789ETAOINSHRDLUCMFWYPVBGKQJXZ" | |
47 | ||
48 | -- names or identifiers that must be skipped | |
49 | -- * the first two lines are for keywords | |
50 | local SKIP_NAME = {} | |
51 | for v in string.gmatch([[ | |
52 | and break do else elseif end false for function if in | |
53 | local nil not or repeat return then true until while | |
94
cf209451be5f
minify: Preserve _ENV, as it's lexically significant in 5.2+
Matthew Wild <mwild1@gmail.com>
parents:
93
diff
changeset
|
54 | self _ENV]], "%S+") do |
1 | 55 | SKIP_NAME[v] = true |
56 | end | |
57 | ||
58 | ------------------------------------------------------------------------ | |
59 | -- variables and data structures | |
60 | ------------------------------------------------------------------------ | |
61 | ||
62 | local toklist, seminfolist, -- token lists | |
63 | globalinfo, localinfo, -- variable information tables | |
64 | globaluniq, localuniq, -- unique name tables | |
65 | var_new, -- index of new variable names | |
66 | varlist -- list of output variables | |
67 | ||
68 | ---------------------------------------------------------------------- | |
69 | -- preprocess information table to get lists of unique names | |
70 | ---------------------------------------------------------------------- | |
71 | ||
72 | local function preprocess(infotable) | |
73 | local uniqtable = {} | |
74 | for i = 1, #infotable do -- enumerate info table | |
75 | local obj = infotable[i] | |
76 | local name = obj.name | |
77 | -------------------------------------------------------------------- | |
78 | if not uniqtable[name] then -- not found, start an entry | |
79 | uniqtable[name] = { | |
80 | decl = 0, token = 0, size = 0, | |
81 | } | |
82 | end | |
83 | -------------------------------------------------------------------- | |
84 | local uniq = uniqtable[name] -- count declarations, tokens, size | |
85 | uniq.decl = uniq.decl + 1 | |
86 | local xref = obj.xref | |
87 | local xcount = #xref | |
88 | uniq.token = uniq.token + xcount | |
89 | uniq.size = uniq.size + xcount * #name | |
90 | -------------------------------------------------------------------- | |
91 | if obj.decl then -- if local table, create first,last pairs | |
92 | obj.id = i | |
93 | obj.xcount = xcount | |
94 | if xcount > 1 then -- if ==1, means local never accessed | |
95 | obj.first = xref[2] | |
96 | obj.last = xref[xcount] | |
97 | end | |
98 | -------------------------------------------------------------------- | |
99 | else -- if global table, add a back ref | |
100 | uniq.id = i | |
101 | end | |
102 | -------------------------------------------------------------------- | |
103 | end--for | |
104 | return uniqtable | |
105 | end | |
106 | ||
107 | ---------------------------------------------------------------------- | |
108 | -- calculate actual symbol frequencies, in order to reduce entropy | |
109 | -- * this may help further reduce the size of compressed sources | |
110 | -- * note that since parsing optimizations is put before lexing | |
111 | -- optimizations, the frequency table is not exact! | |
112 | -- * yes, this will miss --keep block comments too... | |
113 | ---------------------------------------------------------------------- | |
114 | ||
115 | local function recalc_for_entropy(option) | |
116 | local byte = string.byte | |
117 | local char = string.char | |
118 | -- table of token classes to accept in calculating symbol frequency | |
119 | local ACCEPT = { | |
120 | TK_KEYWORD = true, TK_NAME = true, TK_NUMBER = true, | |
121 | TK_STRING = true, TK_LSTRING = true, | |
122 | } | |
123 | if not option["opt-comments"] then | |
124 | ACCEPT.TK_COMMENT = true | |
125 | ACCEPT.TK_LCOMMENT = true | |
126 | end | |
127 | -------------------------------------------------------------------- | |
128 | -- create a new table and remove any original locals by filtering | |
129 | -------------------------------------------------------------------- | |
130 | local filtered = {} | |
131 | for i = 1, #toklist do | |
132 | filtered[i] = seminfolist[i] | |
133 | end | |
134 | for i = 1, #localinfo do -- enumerate local info table | |
135 | local obj = localinfo[i] | |
136 | local xref = obj.xref | |
137 | for j = 1, obj.xcount do | |
138 | local p = xref[j] | |
139 | filtered[p] = "" -- remove locals | |
140 | end | |
141 | end | |
142 | -------------------------------------------------------------------- | |
143 | local freq = {} -- reset symbol frequency table | |
144 | for i = 0, 255 do freq[i] = 0 end | |
145 | for i = 1, #toklist do -- gather symbol frequency | |
146 | local tok, info = toklist[i], filtered[i] | |
147 | if ACCEPT[tok] then | |
148 | for j = 1, #info do | |
149 | local c = byte(info, j) | |
150 | freq[c] = freq[c] + 1 | |
151 | end | |
152 | end--if | |
153 | end--for | |
154 | -------------------------------------------------------------------- | |
155 | -- function to re-sort symbols according to actual frequencies | |
156 | -------------------------------------------------------------------- | |
157 | local function resort(symbols) | |
158 | local symlist = {} | |
159 | for i = 1, #symbols do -- prepare table to sort | |
160 | local c = byte(symbols, i) | |
161 | symlist[i] = { c = c, freq = freq[c], } | |
162 | end | |
163 | table.sort(symlist, -- sort selected symbols | |
164 | function(v1, v2) | |
165 | return v1.freq > v2.freq | |
166 | end | |
167 | ) | |
168 | local charlist = {} -- reconstitute the string | |
169 | for i = 1, #symlist do | |
170 | charlist[i] = char(symlist[i].c) | |
171 | end | |
172 | return table.concat(charlist) | |
173 | end | |
174 | -------------------------------------------------------------------- | |
175 | LETTERS = resort(LETTERS) -- change letter arrangement | |
176 | ALPHANUM = resort(ALPHANUM) | |
177 | end | |
178 | ||
179 | ---------------------------------------------------------------------- | |
180 | -- returns a string containing a new local variable name to use, and | |
181 | -- a flag indicating whether it collides with a global variable | |
182 | -- * trapping keywords and other names like 'self' is done elsewhere | |
183 | ---------------------------------------------------------------------- | |
184 | ||
185 | local function new_var_name() | |
186 | local var | |
187 | local cletters, calphanum = #LETTERS, #ALPHANUM | |
188 | local v = var_new | |
189 | if v < cletters then -- single char | |
190 | v = v + 1 | |
191 | var = string.sub(LETTERS, v, v) | |
192 | else -- longer names | |
193 | local range, sz = cletters, 1 -- calculate # chars fit | |
194 | repeat | |
195 | v = v - range | |
196 | range = range * calphanum | |
197 | sz = sz + 1 | |
198 | until range > v | |
199 | local n = v % cletters -- left side cycles faster | |
200 | v = (v - n) / cletters -- do first char first | |
201 | n = n + 1 | |
202 | var = string.sub(LETTERS, n, n) | |
203 | while sz > 1 do | |
204 | local m = v % calphanum | |
205 | v = (v - m) / calphanum | |
206 | m = m + 1 | |
207 | var = var..string.sub(ALPHANUM, m, m) | |
208 | sz = sz - 1 | |
209 | end | |
210 | end | |
211 | var_new = var_new + 1 | |
212 | return var, globaluniq[var] ~= nil | |
213 | end | |
214 | ||
215 | ---------------------------------------------------------------------- | |
216 | -- main entry point | |
217 | -- * does only local variable optimization for now | |
218 | ---------------------------------------------------------------------- | |
219 | ||
220 | function optimize(option, _toklist, _seminfolist, _globalinfo, _localinfo) | |
221 | -- set tables | |
222 | toklist, seminfolist, globalinfo, localinfo | |
223 | = _toklist, _seminfolist, _globalinfo, _localinfo | |
224 | var_new = 0 -- reset variable name allocator | |
225 | varlist = {} | |
226 | ------------------------------------------------------------------ | |
227 | -- preprocess global/local tables, handle entropy reduction | |
228 | ------------------------------------------------------------------ | |
229 | globaluniq = preprocess(globalinfo) | |
230 | localuniq = preprocess(localinfo) | |
231 | if option["opt-entropy"] then -- for entropy improvement | |
232 | recalc_for_entropy(option) | |
233 | end | |
234 | ------------------------------------------------------------------ | |
235 | -- build initial declared object table, then sort according to | |
236 | -- token count, this might help assign more tokens to more common | |
237 | -- variable names such as 'e' thus possibly reducing entropy | |
238 | -- * an object knows its localinfo index via its 'id' field | |
239 | -- * special handling for "self" special local (parameter) here | |
240 | ------------------------------------------------------------------ | |
241 | local object = {} | |
242 | for i = 1, #localinfo do | |
243 | object[i] = localinfo[i] | |
244 | end | |
245 | table.sort(object, -- sort largest first | |
246 | function(v1, v2) | |
247 | return v1.xcount > v2.xcount | |
248 | end | |
249 | ) | |
250 | ------------------------------------------------------------------ | |
251 | -- the special "self" function parameters must be preserved | |
252 | -- * the allocator below will never use "self", so it is safe to | |
253 | -- keep those implicit declarations as-is | |
254 | ------------------------------------------------------------------ | |
255 | local temp, j, gotself = {}, 1, false | |
256 | for i = 1, #object do | |
257 | local obj = object[i] | |
94
cf209451be5f
minify: Preserve _ENV, as it's lexically significant in 5.2+
Matthew Wild <mwild1@gmail.com>
parents:
93
diff
changeset
|
258 | if not obj.preserve then |
1 | 259 | temp[j] = obj |
260 | j = j + 1 | |
94
cf209451be5f
minify: Preserve _ENV, as it's lexically significant in 5.2+
Matthew Wild <mwild1@gmail.com>
parents:
93
diff
changeset
|
261 | elseif obj.name == "self" then |
1 | 262 | gotself = true |
263 | end | |
264 | end | |
265 | object = temp | |
266 | ------------------------------------------------------------------ | |
267 | -- a simple first-come first-served heuristic name allocator, | |
268 | -- note that this is in no way optimal... | |
269 | -- * each object is a local variable declaration plus existence | |
270 | -- * the aim is to assign short names to as many tokens as possible, | |
271 | -- so the following tries to maximize name reuse | |
272 | -- * note that we preserve sort order | |
273 | ------------------------------------------------------------------ | |
274 | local nobject = #object | |
275 | while nobject > 0 do | |
276 | local varname, gcollide | |
277 | repeat | |
278 | varname, gcollide = new_var_name() -- collect a variable name | |
279 | until not SKIP_NAME[varname] -- skip all special names | |
280 | varlist[#varlist + 1] = varname -- keep a list | |
281 | local oleft = nobject | |
282 | ------------------------------------------------------------------ | |
283 | -- if variable name collides with an existing global, the name | |
284 | -- cannot be used by a local when the name is accessed as a global | |
285 | -- during which the local is alive (between 'act' to 'rem'), so | |
286 | -- we drop objects that collides with the corresponding global | |
287 | ------------------------------------------------------------------ | |
288 | if gcollide then | |
289 | -- find the xref table of the global | |
290 | local gref = globalinfo[globaluniq[varname].id].xref | |
291 | local ngref = #gref | |
292 | -- enumerate for all current objects; all are valid at this point | |
293 | for i = 1, nobject do | |
294 | local obj = object[i] | |
295 | local act, rem = obj.act, obj.rem -- 'live' range of local | |
296 | -- if rem < 0, it is a -id to a local that had the same name | |
297 | -- so follow rem to extend it; does this make sense? | |
298 | while rem < 0 do | |
299 | rem = localinfo[-rem].rem | |
300 | end | |
301 | local drop | |
302 | for j = 1, ngref do | |
303 | local p = gref[j] | |
304 | if p >= act and p <= rem then drop = true end -- in range? | |
305 | end | |
306 | if drop then | |
307 | obj.skip = true | |
308 | oleft = oleft - 1 | |
309 | end | |
310 | end--for | |
311 | end--if gcollide | |
312 | ------------------------------------------------------------------ | |
313 | -- now the first unassigned local (since it's sorted) will be the | |
314 | -- one with the most tokens to rename, so we set this one and then | |
315 | -- eliminate all others that collides, then any locals that left | |
316 | -- can then reuse the same variable name; this is repeated until | |
317 | -- all local declaration that can use this name is assigned | |
318 | -- * the criteria for local-local reuse/collision is: | |
319 | -- A is the local with a name already assigned | |
320 | -- B is the unassigned local under consideration | |
321 | -- => anytime A is accessed, it cannot be when B is 'live' | |
322 | -- => to speed up things, we have first/last accesses noted | |
323 | ------------------------------------------------------------------ | |
324 | while oleft > 0 do | |
325 | local i = 1 | |
326 | while object[i].skip do -- scan for first object | |
327 | i = i + 1 | |
328 | end | |
329 | ------------------------------------------------------------------ | |
330 | -- first object is free for assignment of the variable name | |
331 | -- [first,last] gives the access range for collision checking | |
332 | ------------------------------------------------------------------ | |
333 | oleft = oleft - 1 | |
334 | local obja = object[i] | |
335 | i = i + 1 | |
336 | obja.newname = varname | |
337 | obja.skip = true | |
338 | obja.done = true | |
339 | local first, last = obja.first, obja.last | |
340 | local xref = obja.xref | |
341 | ------------------------------------------------------------------ | |
342 | -- then, scan all the rest and drop those colliding | |
343 | -- if A was never accessed then it'll never collide with anything | |
344 | -- otherwise trivial skip if: | |
345 | -- * B was activated after A's last access (last < act) | |
346 | -- * B was removed before A's first access (first > rem) | |
347 | -- if not, see detailed skip below... | |
348 | ------------------------------------------------------------------ | |
349 | if first and oleft > 0 then -- must have at least 1 access | |
350 | local scanleft = oleft | |
351 | while scanleft > 0 do | |
352 | while object[i].skip do -- next valid object | |
353 | i = i + 1 | |
354 | end | |
355 | scanleft = scanleft - 1 | |
356 | local objb = object[i] | |
357 | i = i + 1 | |
358 | local act, rem = objb.act, objb.rem -- live range of B | |
359 | -- if rem < 0, extend range of rem thru' following local | |
360 | while rem < 0 do | |
361 | rem = localinfo[-rem].rem | |
362 | end | |
363 | -------------------------------------------------------- | |
364 | if not(last < act or first > rem) then -- possible collision | |
365 | -------------------------------------------------------- | |
366 | -- B is activated later than A or at the same statement, | |
367 | -- this means for no collision, A cannot be accessed when B | |
368 | -- is alive, since B overrides A (or is a peer) | |
369 | -------------------------------------------------------- | |
370 | if act >= obja.act then | |
371 | for j = 1, obja.xcount do -- ... then check every access | |
372 | local p = xref[j] | |
373 | if p >= act and p <= rem then -- A accessed when B live! | |
374 | oleft = oleft - 1 | |
375 | objb.skip = true | |
376 | break | |
377 | end | |
378 | end--for | |
379 | -------------------------------------------------------- | |
380 | -- A is activated later than B, this means for no collision, | |
381 | -- A's access is okay since it overrides B, but B's last | |
382 | -- access need to be earlier than A's activation time | |
383 | -------------------------------------------------------- | |
384 | else | |
385 | if objb.last and objb.last >= obja.act then | |
386 | oleft = oleft - 1 | |
387 | objb.skip = true | |
388 | end | |
389 | end | |
390 | end | |
391 | -------------------------------------------------------- | |
392 | if oleft == 0 then break end | |
393 | end | |
394 | end--if first | |
395 | ------------------------------------------------------------------ | |
396 | end--while | |
397 | ------------------------------------------------------------------ | |
398 | -- after assigning all possible locals to one variable name, the | |
399 | -- unassigned locals/objects have the skip field reset and the table | |
400 | -- is compacted, to hopefully reduce iteration time | |
401 | ------------------------------------------------------------------ | |
402 | local temp, j = {}, 1 | |
403 | for i = 1, nobject do | |
404 | local obj = object[i] | |
405 | if not obj.done then | |
406 | obj.skip = false | |
407 | temp[j] = obj | |
408 | j = j + 1 | |
409 | end | |
410 | end | |
411 | object = temp -- new compacted object table | |
412 | nobject = #object -- objects left to process | |
413 | ------------------------------------------------------------------ | |
414 | end--while | |
415 | ------------------------------------------------------------------ | |
416 | -- after assigning all locals with new variable names, we can | |
417 | -- patch in the new names, and reprocess to get 'after' stats | |
418 | ------------------------------------------------------------------ | |
419 | for i = 1, #localinfo do -- enumerate all locals | |
420 | local obj = localinfo[i] | |
421 | local xref = obj.xref | |
422 | if obj.newname then -- if got new name, patch it in | |
423 | for j = 1, obj.xcount do | |
424 | local p = xref[j] -- xrefs indexes the token list | |
425 | seminfolist[p] = obj.newname | |
426 | end | |
427 | obj.name, obj.oldname -- adjust names | |
428 | = obj.newname, obj.name | |
429 | else | |
430 | obj.oldname = obj.name -- for cases like 'self' | |
431 | end | |
432 | end | |
433 | ------------------------------------------------------------------ | |
434 | -- deal with statistics output | |
435 | ------------------------------------------------------------------ | |
436 | if gotself then -- add 'self' to end of list | |
437 | varlist[#varlist + 1] = "self" | |
438 | end | |
439 | local afteruniq = preprocess(localinfo) | |
440 | ------------------------------------------------------------------ | |
441 | end | |
93
07c10f9ba77c
minify: Return _M from modules that weren't
Matthew Wild <mwild1@gmail.com>
parents:
1
diff
changeset
|
442 | |
99
2b6416334a25
A range of fixes for Lua 5.2 support
Matthew Wild <mwild1@gmail.com>
parents:
94
diff
changeset
|
443 | return {optimize = optimize} |