|
1 -- dmlib.deflate |
|
2 -- deflate (and gunzip) implemented in Lua. |
|
3 -- |
|
4 -- Note: only supports decompression. |
|
5 -- Compression not implemented. |
|
6 -- |
|
7 -- References |
|
8 -- [1] DEFLATE Compressed Data Format Specification version 1.3 |
|
9 -- http://tools.ietf.org/html/rfc1951 |
|
10 -- [2] GZIP file format specification version 4.3 |
|
11 -- http://tools.ietf.org/html/rfc1952 |
|
12 -- [3] http://en.wikipedia.org/wiki/DEFLATE |
|
13 -- [4] pyflate, by Paul Sladen |
|
14 -- http://www.paul.sladen.org/projects/pyflate/ |
|
15 -- [5] Compress::Zlib::Perl - partial pure Perl implementation of |
|
16 -- Compress::Zlib |
|
17 -- http://search.cpan.org/~nwclark/Compress-Zlib-Perl/Perl.pm |
|
18 -- |
|
19 -- (c) 2008 David Manura. Licensed under the same terms as Lua (MIT). |
|
20 |
|
21 local assert, error, ipairs, pairs, tostring, type, setmetatable, io, math, table_sort, |
|
22 math_max, string_char, io_open, _G = |
|
23 assert, error, ipairs, pairs, tostring, type, setmetatable, io, math, table.sort, |
|
24 math.max, string.char, io.open, _G; |
|
25 |
|
26 local function memoize(f) |
|
27 local mt = {}; |
|
28 local t = setmetatable({}, mt) |
|
29 function mt:__index(k) |
|
30 local v = f(k); t[k] = v |
|
31 return v |
|
32 end |
|
33 return t |
|
34 end |
|
35 |
|
36 local function runtime_error(s, level) |
|
37 level = level or 1 |
|
38 error({s}, level+1) |
|
39 end |
|
40 |
|
41 |
|
42 local function make_os(outbs) |
|
43 local os = {} |
|
44 os.outbs = outbs |
|
45 os.wnd = {} |
|
46 os.wnd_pos = 1 |
|
47 return os |
|
48 end |
|
49 |
|
50 |
|
51 local function output(os, byte) |
|
52 -- debug('OUTPUT:', s) |
|
53 local wnd_pos = os.wnd_pos |
|
54 os.outbs(byte) |
|
55 os.wnd[wnd_pos] = byte |
|
56 os.wnd_pos = wnd_pos % 32768 + 1 -- 32K |
|
57 end |
|
58 |
|
59 |
|
60 local function noeof(val) |
|
61 return assert(val, 'unexpected end of file') |
|
62 end |
|
63 |
|
64 |
|
65 local function hasbit(bits, bit) |
|
66 return bits % (bit + bit) >= bit |
|
67 end |
|
68 |
|
69 |
|
70 -- small optimization (lookup table for powers of 2) |
|
71 local pow2 = memoize(function(n) return 2^n end) |
|
72 --local tbits = memoize( |
|
73 -- function(bits) |
|
74 -- return memoize( function(bit) return getbit(bits, bit) end ) |
|
75 -- end ) |
|
76 |
|
77 |
|
78 -- weak metatable marking objects as bitstream type |
|
79 local is_bitstream = setmetatable({}, {__mode='k'}) |
|
80 |
|
81 |
|
82 |
|
83 local function bytestream_from_string(s) |
|
84 local i = 1 |
|
85 local o = {} |
|
86 function o:read() |
|
87 local by |
|
88 if i <= #s then |
|
89 by = s:byte(i) |
|
90 i = i + 1 |
|
91 end |
|
92 return by |
|
93 end |
|
94 return o |
|
95 end |
|
96 |
|
97 local left |
|
98 local function bitstream_from_bytestream(bys) |
|
99 local buf_byte, buf_nbit, o = 0, 0, {}; |
|
100 |
|
101 function o:nbits_left_in_byte() |
|
102 return buf_nbit |
|
103 end |
|
104 |
|
105 function o:read(nbits) |
|
106 nbits = nbits or 1 |
|
107 while buf_nbit < nbits do |
|
108 local byte = bys:read() |
|
109 if not byte then return end -- note: more calls also return nil |
|
110 buf_byte = buf_byte + pow2[buf_nbit] * byte |
|
111 buf_nbit = buf_nbit + 8 |
|
112 end |
|
113 local m = pow2[nbits] |
|
114 local bits = buf_byte % m |
|
115 buf_byte = (buf_byte - bits) / m |
|
116 buf_nbit = buf_nbit - nbits |
|
117 return bits |
|
118 end |
|
119 |
|
120 is_bitstream[o] = true |
|
121 |
|
122 return o |
|
123 end |
|
124 |
|
125 |
|
126 local function get_bitstream(o) |
|
127 return is_bitstream[o] and o or bitstream_from_bytestream(bytestream_from_string(o)) |
|
128 end |
|
129 |
|
130 |
|
131 local function get_obytestream(o) |
|
132 local bs |
|
133 if io.type(o) == 'file' then |
|
134 bs = function(sbyte) o:write(string_char(sbyte)) end |
|
135 elseif type(o) == 'function' then |
|
136 bs = o |
|
137 end |
|
138 return bs |
|
139 end |
|
140 |
|
141 |
|
142 local function HuffmanTable(init, is_full) |
|
143 local t = {} |
|
144 if is_full then |
|
145 for val,nbits in pairs(init) do |
|
146 if nbits ~= 0 then |
|
147 t[#t+1] = {val=val, nbits=nbits} |
|
148 --debug('*',val,nbits) |
|
149 end |
|
150 end |
|
151 else |
|
152 for i=1,#init-2,2 do |
|
153 local firstval, nbits, nextval = init[i], init[i+1], init[i+2] |
|
154 --debug(val, nextval, nbits) |
|
155 if nbits ~= 0 then |
|
156 for val=firstval,nextval-1 do |
|
157 t[#t+1] = {val=val, nbits=nbits} |
|
158 end |
|
159 end |
|
160 end |
|
161 end |
|
162 table_sort(t, function(a,b) |
|
163 return a.nbits == b.nbits and a.val < b.val or a.nbits < b.nbits |
|
164 end) |
|
165 |
|
166 -- assign codes |
|
167 local code = 1 -- leading 1 marker |
|
168 local nbits = 0 |
|
169 for i,s in ipairs(t) do |
|
170 if s.nbits ~= nbits then |
|
171 code = code * pow2[s.nbits - nbits] |
|
172 nbits = s.nbits |
|
173 end |
|
174 s.code = code |
|
175 --debug('huffman code:', i, s.nbits, s.val, code, bits_tostring(code)) |
|
176 code = code + 1 |
|
177 end |
|
178 |
|
179 local minbits = math.huge |
|
180 local look = {} |
|
181 for i,s in ipairs(t) do |
|
182 minbits = math.min(minbits, s.nbits) |
|
183 look[s.code] = s.val |
|
184 end |
|
185 |
|
186 --for _,o in ipairs(t) do |
|
187 -- debug(':', o.nbits, o.val) |
|
188 --end |
|
189 |
|
190 -- function t:lookup(bits) return look[bits] end |
|
191 |
|
192 local function msb(bits, nbits) |
|
193 local res = 0 |
|
194 for i=1,nbits do |
|
195 local b = bits % 2 |
|
196 bits = (bits - b) / 2 |
|
197 res = res * 2 + b |
|
198 end |
|
199 return res |
|
200 end |
|
201 local tfirstcode = memoize( |
|
202 function(bits) return pow2[minbits] + msb(bits, minbits) end) |
|
203 |
|
204 function t:read(bs) |
|
205 local code, nbits = 1, 0 -- leading 1 marker |
|
206 while 1 do |
|
207 if nbits == 0 then -- small optimization (optional) |
|
208 code = tfirstcode[noeof(bs:read(minbits))] |
|
209 nbits = nbits + minbits |
|
210 else |
|
211 local b = noeof(bs:read()) |
|
212 nbits = nbits + 1 |
|
213 --debug('b',b) |
|
214 code = code * 2 + b -- MSB first |
|
215 end |
|
216 --debug('code?', code, bits_tostring(code)) |
|
217 local val = look[code] |
|
218 if val then |
|
219 --debug('FOUND', val) |
|
220 return val |
|
221 end |
|
222 end |
|
223 end |
|
224 |
|
225 return t |
|
226 end |
|
227 |
|
228 |
|
229 local function parse_gzip_header(bs) |
|
230 -- local FLG_FTEXT = 2^0 |
|
231 local FLG_FHCRC = 2^1 |
|
232 local FLG_FEXTRA = 2^2 |
|
233 local FLG_FNAME = 2^3 |
|
234 local FLG_FCOMMENT = 2^4 |
|
235 |
|
236 local id1 = bs:read(8) |
|
237 local id2 = bs:read(8) |
|
238 local cm = bs:read(8) -- compression method |
|
239 local flg = bs:read(8) -- FLaGs |
|
240 local mtime = bs:read(32) -- Modification TIME |
|
241 local xfl = bs:read(8) -- eXtra FLags |
|
242 local os = bs:read(8) -- Operating System |
|
243 |
|
244 if hasbit(flg, FLG_FEXTRA) then |
|
245 local xlen = bs:read(16) |
|
246 local extra = 0 |
|
247 for i=1,xlen do |
|
248 extra = bs:read(8) |
|
249 end |
|
250 end |
|
251 |
|
252 if hasbit(flg, FLG_FNAME) then |
|
253 while bs:read(8) ~= 0 do end |
|
254 end |
|
255 |
|
256 if hasbit(flg, FLG_FCOMMENT) then |
|
257 while bs:read(8) ~= 0 do end |
|
258 end |
|
259 if hasbit(flg, FLG_FHCRC) then |
|
260 bs:read(16) |
|
261 end |
|
262 end |
|
263 |
|
264 |
|
265 local function parse_huffmantables(bs) |
|
266 local hlit = bs:read(5) -- # of literal/length codes - 257 |
|
267 local hdist = bs:read(5) -- # of distance codes - 1 |
|
268 local hclen = noeof(bs:read(4)) -- # of code length codes - 4 |
|
269 |
|
270 local ncodelen_codes = hclen + 4 |
|
271 local codelen_init = {} |
|
272 local codelen_vals = { |
|
273 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} |
|
274 for i=1,ncodelen_codes do |
|
275 local nbits = bs:read(3) |
|
276 local val = codelen_vals[i] |
|
277 codelen_init[val] = nbits |
|
278 end |
|
279 local codelentable = HuffmanTable(codelen_init, true) |
|
280 |
|
281 local function decode(ncodes) |
|
282 local init = {} |
|
283 local nbits |
|
284 local val = 0 |
|
285 while val < ncodes do |
|
286 local codelen = codelentable:read(bs) |
|
287 --FIX:check nil? |
|
288 local nrepeat |
|
289 if codelen <= 15 then |
|
290 nrepeat = 1 |
|
291 nbits = codelen |
|
292 --debug('w', nbits) |
|
293 elseif codelen == 16 then |
|
294 nrepeat = 3 + noeof(bs:read(2)) |
|
295 -- nbits unchanged |
|
296 elseif codelen == 17 then |
|
297 nrepeat = 3 + noeof(bs:read(3)) |
|
298 nbits = 0 |
|
299 elseif codelen == 18 then |
|
300 nrepeat = 11 + noeof(bs:read(7)) |
|
301 nbits = 0 |
|
302 else |
|
303 error 'ASSERT' |
|
304 end |
|
305 for i=1,nrepeat do |
|
306 init[val] = nbits |
|
307 val = val + 1 |
|
308 end |
|
309 end |
|
310 local huffmantable = HuffmanTable(init, true) |
|
311 return huffmantable |
|
312 end |
|
313 |
|
314 local nlit_codes = hlit + 257 |
|
315 local ndist_codes = hdist + 1 |
|
316 |
|
317 local littable = decode(nlit_codes) |
|
318 local disttable = decode(ndist_codes) |
|
319 |
|
320 return littable, disttable |
|
321 end |
|
322 |
|
323 |
|
324 local tdecode_len_base |
|
325 local tdecode_len_nextrabits |
|
326 local tdecode_dist_base |
|
327 local tdecode_dist_nextrabits |
|
328 local function parse_compressed_item(bs, os, littable, disttable) |
|
329 local val = littable:read(bs) |
|
330 --debug(val, val < 256 and string_char(val)) |
|
331 if val < 256 then -- literal |
|
332 output(os, val) |
|
333 elseif val == 256 then -- end of block |
|
334 return true |
|
335 else |
|
336 if not tdecode_len_base then |
|
337 local t = {[257]=3} |
|
338 local skip = 1 |
|
339 for i=258,285,4 do |
|
340 for j=i,i+3 do t[j] = t[j-1] + skip end |
|
341 if i ~= 258 then skip = skip * 2 end |
|
342 end |
|
343 t[285] = 258 |
|
344 tdecode_len_base = t |
|
345 --for i=257,285 do debug('T1',i,t[i]) end |
|
346 end |
|
347 if not tdecode_len_nextrabits then |
|
348 local t = {} |
|
349 for i=257,285 do |
|
350 local j = math_max(i - 261, 0) |
|
351 t[i] = (j - (j % 4)) / 4 |
|
352 end |
|
353 t[285] = 0 |
|
354 tdecode_len_nextrabits = t |
|
355 --for i=257,285 do debug('T2',i,t[i]) end |
|
356 end |
|
357 local len_base = tdecode_len_base[val] |
|
358 local nextrabits = tdecode_len_nextrabits[val] |
|
359 local extrabits = bs:read(nextrabits) |
|
360 local len = len_base + extrabits |
|
361 |
|
362 if not tdecode_dist_base then |
|
363 local t = {[0]=1} |
|
364 local skip = 1 |
|
365 for i=1,29,2 do |
|
366 for j=i,i+1 do t[j] = t[j-1] + skip end |
|
367 if i ~= 1 then skip = skip * 2 end |
|
368 end |
|
369 tdecode_dist_base = t |
|
370 --for i=0,29 do debug('T3',i,t[i]) end |
|
371 end |
|
372 if not tdecode_dist_nextrabits then |
|
373 local t = {} |
|
374 for i=0,29 do |
|
375 local j = math_max(i - 2, 0) |
|
376 t[i] = (j - (j % 2)) / 2 |
|
377 end |
|
378 tdecode_dist_nextrabits = t |
|
379 --for i=0,29 do debug('T4',i,t[i]) end |
|
380 end |
|
381 local dist_val = disttable:read(bs) |
|
382 local dist_base = tdecode_dist_base[dist_val] |
|
383 local dist_nextrabits = tdecode_dist_nextrabits[dist_val] |
|
384 local dist_extrabits = bs:read(dist_nextrabits) |
|
385 local dist = dist_base + dist_extrabits |
|
386 |
|
387 --debug('BACK', len, dist) |
|
388 for i=1,len do |
|
389 local pos = (os.wnd_pos - 1 - dist) % 32768 + 1 -- 32K |
|
390 output(os, assert(os.wnd[pos], 'invalid distance')) |
|
391 end |
|
392 end |
|
393 return false |
|
394 end |
|
395 |
|
396 |
|
397 local function parse_block(bs, os) |
|
398 local bfinal = bs:read(1) |
|
399 local btype = bs:read(2) |
|
400 |
|
401 local BTYPE_NO_COMPRESSION = 0 |
|
402 local BTYPE_FIXED_HUFFMAN = 1 |
|
403 local BTYPE_DYNAMIC_HUFFMAN = 2 |
|
404 local BTYPE_RESERVED = 3 |
|
405 |
|
406 if btype == BTYPE_NO_COMPRESSION then |
|
407 bs:read(bs:nbits_left_in_byte()) |
|
408 local len = bs:read(16) |
|
409 local nlen = noeof(bs:read(16)) |
|
410 |
|
411 for i=1,len do |
|
412 local by = noeof(bs:read(8)) |
|
413 output(os, by) |
|
414 end |
|
415 elseif btype == BTYPE_FIXED_HUFFMAN or btype == BTYPE_DYNAMIC_HUFFMAN then |
|
416 local littable, disttable |
|
417 if btype == BTYPE_DYNAMIC_HUFFMAN then |
|
418 littable, disttable = parse_huffmantables(bs) |
|
419 else |
|
420 littable = HuffmanTable {0,8, 144,9, 256,7, 280,8, 288,nil} |
|
421 disttable = HuffmanTable {0,5, 32,nil} |
|
422 end |
|
423 |
|
424 repeat until parse_compressed_item( |
|
425 bs, os, littable, disttable |
|
426 ); |
|
427 end |
|
428 |
|
429 return bfinal ~= 0 |
|
430 end |
|
431 |
|
432 |
|
433 local function deflate(t) |
|
434 local bs, os = get_bitstream(t.input) |
|
435 , make_os(get_obytestream(t.output)) |
|
436 repeat until parse_block(bs, os) |
|
437 end |
|
438 |
|
439 function gunzip(t) |
|
440 local bs = get_bitstream(t.input) |
|
441 local outbs = get_obytestream(t.output) |
|
442 |
|
443 parse_gzip_header(bs) |
|
444 |
|
445 deflate{input=bs, output=outbs} |
|
446 |
|
447 bs:read(bs:nbits_left_in_byte()) |
|
448 bs:read() |
|
449 end |
|
450 |
|
451 |
|
452 --[[ |
|
453 LICENSE |
|
454 |
|
455 Copyright (C) 2008, David Manura. |
|
456 |
|
457 Permission is hereby granted, free of charge, to any person obtaining a copy |
|
458 of this software and associated documentation files (the "Software"), to deal |
|
459 in the Software without restriction, including without limitation the rights |
|
460 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
|
461 copies of the Software, and to permit persons to whom the Software is |
|
462 furnished to do so, subject to the following conditions: |
|
463 |
|
464 The above copyright notice and this permission notice shall be included in |
|
465 all copies or substantial portions of the Software. |
|
466 |
|
467 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
|
468 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
|
469 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
|
470 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
|
471 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
|
472 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
|
473 THE SOFTWARE. |
|
474 |
|
475 (end license) |
|
476 --]] |