gzip/deflatelua.lua

changeset 59
165b36273ce7
child 65
41aeda62af16
equal deleted inserted replaced
58:17121881cbf7 59:165b36273ce7
1 -- dmlib.deflate
2 -- deflate (and gunzip) implemented in Lua.
3 --
4 -- Note: only supports decompression.
5 -- Compression not implemented.
6 --
7 -- References
8 -- [1] DEFLATE Compressed Data Format Specification version 1.3
9 -- http://tools.ietf.org/html/rfc1951
10 -- [2] GZIP file format specification version 4.3
11 -- http://tools.ietf.org/html/rfc1952
12 -- [3] http://en.wikipedia.org/wiki/DEFLATE
13 -- [4] pyflate, by Paul Sladen
14 -- http://www.paul.sladen.org/projects/pyflate/
15 -- [5] Compress::Zlib::Perl - partial pure Perl implementation of
16 -- Compress::Zlib
17 -- http://search.cpan.org/~nwclark/Compress-Zlib-Perl/Perl.pm
18 --
19 -- (c) 2008 David Manura. Licensed under the same terms as Lua (MIT).
20
21 local assert, error, ipairs, pairs, tostring, type, setmetatable, io, math, table_sort,
22 math_max, string_char, io_open, _G =
23 assert, error, ipairs, pairs, tostring, type, setmetatable, io, math, table.sort,
24 math.max, string.char, io.open, _G;
25
26 local function memoize(f)
27 local mt = {};
28 local t = setmetatable({}, mt)
29 function mt:__index(k)
30 local v = f(k); t[k] = v
31 return v
32 end
33 return t
34 end
35
36 local function runtime_error(s, level)
37 level = level or 1
38 error({s}, level+1)
39 end
40
41
42 local function make_os(outbs)
43 local os = {}
44 os.outbs = outbs
45 os.wnd = {}
46 os.wnd_pos = 1
47 return os
48 end
49
50
51 local function output(os, byte)
52 -- debug('OUTPUT:', s)
53 local wnd_pos = os.wnd_pos
54 os.outbs(byte)
55 os.wnd[wnd_pos] = byte
56 os.wnd_pos = wnd_pos % 32768 + 1 -- 32K
57 end
58
59
60 local function noeof(val)
61 return assert(val, 'unexpected end of file')
62 end
63
64
65 local function hasbit(bits, bit)
66 return bits % (bit + bit) >= bit
67 end
68
69
70 -- small optimization (lookup table for powers of 2)
71 local pow2 = memoize(function(n) return 2^n end)
72 --local tbits = memoize(
73 -- function(bits)
74 -- return memoize( function(bit) return getbit(bits, bit) end )
75 -- end )
76
77
78 -- weak metatable marking objects as bitstream type
79 local is_bitstream = setmetatable({}, {__mode='k'})
80
81
82
83 local function bytestream_from_string(s)
84 local i = 1
85 local o = {}
86 function o:read()
87 local by
88 if i <= #s then
89 by = s:byte(i)
90 i = i + 1
91 end
92 return by
93 end
94 return o
95 end
96
97 local left
98 local function bitstream_from_bytestream(bys)
99 local buf_byte, buf_nbit, o = 0, 0, {};
100
101 function o:nbits_left_in_byte()
102 return buf_nbit
103 end
104
105 function o:read(nbits)
106 nbits = nbits or 1
107 while buf_nbit < nbits do
108 local byte = bys:read()
109 if not byte then return end -- note: more calls also return nil
110 buf_byte = buf_byte + pow2[buf_nbit] * byte
111 buf_nbit = buf_nbit + 8
112 end
113 local m = pow2[nbits]
114 local bits = buf_byte % m
115 buf_byte = (buf_byte - bits) / m
116 buf_nbit = buf_nbit - nbits
117 return bits
118 end
119
120 is_bitstream[o] = true
121
122 return o
123 end
124
125
126 local function get_bitstream(o)
127 return is_bitstream[o] and o or bitstream_from_bytestream(bytestream_from_string(o))
128 end
129
130
131 local function get_obytestream(o)
132 local bs
133 if io.type(o) == 'file' then
134 bs = function(sbyte) o:write(string_char(sbyte)) end
135 elseif type(o) == 'function' then
136 bs = o
137 end
138 return bs
139 end
140
141
142 local function HuffmanTable(init, is_full)
143 local t = {}
144 if is_full then
145 for val,nbits in pairs(init) do
146 if nbits ~= 0 then
147 t[#t+1] = {val=val, nbits=nbits}
148 --debug('*',val,nbits)
149 end
150 end
151 else
152 for i=1,#init-2,2 do
153 local firstval, nbits, nextval = init[i], init[i+1], init[i+2]
154 --debug(val, nextval, nbits)
155 if nbits ~= 0 then
156 for val=firstval,nextval-1 do
157 t[#t+1] = {val=val, nbits=nbits}
158 end
159 end
160 end
161 end
162 table_sort(t, function(a,b)
163 return a.nbits == b.nbits and a.val < b.val or a.nbits < b.nbits
164 end)
165
166 -- assign codes
167 local code = 1 -- leading 1 marker
168 local nbits = 0
169 for i,s in ipairs(t) do
170 if s.nbits ~= nbits then
171 code = code * pow2[s.nbits - nbits]
172 nbits = s.nbits
173 end
174 s.code = code
175 --debug('huffman code:', i, s.nbits, s.val, code, bits_tostring(code))
176 code = code + 1
177 end
178
179 local minbits = math.huge
180 local look = {}
181 for i,s in ipairs(t) do
182 minbits = math.min(minbits, s.nbits)
183 look[s.code] = s.val
184 end
185
186 --for _,o in ipairs(t) do
187 -- debug(':', o.nbits, o.val)
188 --end
189
190 -- function t:lookup(bits) return look[bits] end
191
192 local function msb(bits, nbits)
193 local res = 0
194 for i=1,nbits do
195 local b = bits % 2
196 bits = (bits - b) / 2
197 res = res * 2 + b
198 end
199 return res
200 end
201 local tfirstcode = memoize(
202 function(bits) return pow2[minbits] + msb(bits, minbits) end)
203
204 function t:read(bs)
205 local code, nbits = 1, 0 -- leading 1 marker
206 while 1 do
207 if nbits == 0 then -- small optimization (optional)
208 code = tfirstcode[noeof(bs:read(minbits))]
209 nbits = nbits + minbits
210 else
211 local b = noeof(bs:read())
212 nbits = nbits + 1
213 --debug('b',b)
214 code = code * 2 + b -- MSB first
215 end
216 --debug('code?', code, bits_tostring(code))
217 local val = look[code]
218 if val then
219 --debug('FOUND', val)
220 return val
221 end
222 end
223 end
224
225 return t
226 end
227
228
229 local function parse_gzip_header(bs)
230 -- local FLG_FTEXT = 2^0
231 local FLG_FHCRC = 2^1
232 local FLG_FEXTRA = 2^2
233 local FLG_FNAME = 2^3
234 local FLG_FCOMMENT = 2^4
235
236 local id1 = bs:read(8)
237 local id2 = bs:read(8)
238 local cm = bs:read(8) -- compression method
239 local flg = bs:read(8) -- FLaGs
240 local mtime = bs:read(32) -- Modification TIME
241 local xfl = bs:read(8) -- eXtra FLags
242 local os = bs:read(8) -- Operating System
243
244 if hasbit(flg, FLG_FEXTRA) then
245 local xlen = bs:read(16)
246 local extra = 0
247 for i=1,xlen do
248 extra = bs:read(8)
249 end
250 end
251
252 if hasbit(flg, FLG_FNAME) then
253 while bs:read(8) ~= 0 do end
254 end
255
256 if hasbit(flg, FLG_FCOMMENT) then
257 while bs:read(8) ~= 0 do end
258 end
259 if hasbit(flg, FLG_FHCRC) then
260 bs:read(16)
261 end
262 end
263
264
265 local function parse_huffmantables(bs)
266 local hlit = bs:read(5) -- # of literal/length codes - 257
267 local hdist = bs:read(5) -- # of distance codes - 1
268 local hclen = noeof(bs:read(4)) -- # of code length codes - 4
269
270 local ncodelen_codes = hclen + 4
271 local codelen_init = {}
272 local codelen_vals = {
273 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15}
274 for i=1,ncodelen_codes do
275 local nbits = bs:read(3)
276 local val = codelen_vals[i]
277 codelen_init[val] = nbits
278 end
279 local codelentable = HuffmanTable(codelen_init, true)
280
281 local function decode(ncodes)
282 local init = {}
283 local nbits
284 local val = 0
285 while val < ncodes do
286 local codelen = codelentable:read(bs)
287 --FIX:check nil?
288 local nrepeat
289 if codelen <= 15 then
290 nrepeat = 1
291 nbits = codelen
292 --debug('w', nbits)
293 elseif codelen == 16 then
294 nrepeat = 3 + noeof(bs:read(2))
295 -- nbits unchanged
296 elseif codelen == 17 then
297 nrepeat = 3 + noeof(bs:read(3))
298 nbits = 0
299 elseif codelen == 18 then
300 nrepeat = 11 + noeof(bs:read(7))
301 nbits = 0
302 else
303 error 'ASSERT'
304 end
305 for i=1,nrepeat do
306 init[val] = nbits
307 val = val + 1
308 end
309 end
310 local huffmantable = HuffmanTable(init, true)
311 return huffmantable
312 end
313
314 local nlit_codes = hlit + 257
315 local ndist_codes = hdist + 1
316
317 local littable = decode(nlit_codes)
318 local disttable = decode(ndist_codes)
319
320 return littable, disttable
321 end
322
323
324 local tdecode_len_base
325 local tdecode_len_nextrabits
326 local tdecode_dist_base
327 local tdecode_dist_nextrabits
328 local function parse_compressed_item(bs, os, littable, disttable)
329 local val = littable:read(bs)
330 --debug(val, val < 256 and string_char(val))
331 if val < 256 then -- literal
332 output(os, val)
333 elseif val == 256 then -- end of block
334 return true
335 else
336 if not tdecode_len_base then
337 local t = {[257]=3}
338 local skip = 1
339 for i=258,285,4 do
340 for j=i,i+3 do t[j] = t[j-1] + skip end
341 if i ~= 258 then skip = skip * 2 end
342 end
343 t[285] = 258
344 tdecode_len_base = t
345 --for i=257,285 do debug('T1',i,t[i]) end
346 end
347 if not tdecode_len_nextrabits then
348 local t = {}
349 for i=257,285 do
350 local j = math_max(i - 261, 0)
351 t[i] = (j - (j % 4)) / 4
352 end
353 t[285] = 0
354 tdecode_len_nextrabits = t
355 --for i=257,285 do debug('T2',i,t[i]) end
356 end
357 local len_base = tdecode_len_base[val]
358 local nextrabits = tdecode_len_nextrabits[val]
359 local extrabits = bs:read(nextrabits)
360 local len = len_base + extrabits
361
362 if not tdecode_dist_base then
363 local t = {[0]=1}
364 local skip = 1
365 for i=1,29,2 do
366 for j=i,i+1 do t[j] = t[j-1] + skip end
367 if i ~= 1 then skip = skip * 2 end
368 end
369 tdecode_dist_base = t
370 --for i=0,29 do debug('T3',i,t[i]) end
371 end
372 if not tdecode_dist_nextrabits then
373 local t = {}
374 for i=0,29 do
375 local j = math_max(i - 2, 0)
376 t[i] = (j - (j % 2)) / 2
377 end
378 tdecode_dist_nextrabits = t
379 --for i=0,29 do debug('T4',i,t[i]) end
380 end
381 local dist_val = disttable:read(bs)
382 local dist_base = tdecode_dist_base[dist_val]
383 local dist_nextrabits = tdecode_dist_nextrabits[dist_val]
384 local dist_extrabits = bs:read(dist_nextrabits)
385 local dist = dist_base + dist_extrabits
386
387 --debug('BACK', len, dist)
388 for i=1,len do
389 local pos = (os.wnd_pos - 1 - dist) % 32768 + 1 -- 32K
390 output(os, assert(os.wnd[pos], 'invalid distance'))
391 end
392 end
393 return false
394 end
395
396
397 local function parse_block(bs, os)
398 local bfinal = bs:read(1)
399 local btype = bs:read(2)
400
401 local BTYPE_NO_COMPRESSION = 0
402 local BTYPE_FIXED_HUFFMAN = 1
403 local BTYPE_DYNAMIC_HUFFMAN = 2
404 local BTYPE_RESERVED = 3
405
406 if btype == BTYPE_NO_COMPRESSION then
407 bs:read(bs:nbits_left_in_byte())
408 local len = bs:read(16)
409 local nlen = noeof(bs:read(16))
410
411 for i=1,len do
412 local by = noeof(bs:read(8))
413 output(os, by)
414 end
415 elseif btype == BTYPE_FIXED_HUFFMAN or btype == BTYPE_DYNAMIC_HUFFMAN then
416 local littable, disttable
417 if btype == BTYPE_DYNAMIC_HUFFMAN then
418 littable, disttable = parse_huffmantables(bs)
419 else
420 littable = HuffmanTable {0,8, 144,9, 256,7, 280,8, 288,nil}
421 disttable = HuffmanTable {0,5, 32,nil}
422 end
423
424 repeat until parse_compressed_item(
425 bs, os, littable, disttable
426 );
427 end
428
429 return bfinal ~= 0
430 end
431
432
433 local function deflate(t)
434 local bs, os = get_bitstream(t.input)
435 , make_os(get_obytestream(t.output))
436 repeat until parse_block(bs, os)
437 end
438
439 function gunzip(t)
440 local bs = get_bitstream(t.input)
441 local outbs = get_obytestream(t.output)
442
443 parse_gzip_header(bs)
444
445 deflate{input=bs, output=outbs}
446
447 bs:read(bs:nbits_left_in_byte())
448 bs:read()
449 end
450
451
452 --[[
453 LICENSE
454
455 Copyright (C) 2008, David Manura.
456
457 Permission is hereby granted, free of charge, to any person obtaining a copy
458 of this software and associated documentation files (the "Software"), to deal
459 in the Software without restriction, including without limitation the rights
460 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
461 copies of the Software, and to permit persons to whom the Software is
462 furnished to do so, subject to the following conditions:
463
464 The above copyright notice and this permission notice shall be included in
465 all copies or substantial portions of the Software.
466
467 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
468 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
469 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
470 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
471 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
472 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
473 THE SOFTWARE.
474
475 (end license)
476 --]]

mercurial