17 # with this program; if not, write to the Free Software Foundation, Inc., |
17 # with this program; if not, write to the Free Software Foundation, Inc., |
18 # 51 Franklin Street, 5th Floor, Boston, MA, 02111. |
18 # 51 Franklin Street, 5th Floor, Boston, MA, 02111. |
19 |
19 |
20 import errno |
20 import errno |
21 import mimetypes |
21 import mimetypes |
|
22 import optparse |
22 import os |
23 import os |
23 import subprocess |
24 import subprocess |
24 import sys |
25 import sys |
25 import tempfile |
26 import tempfile |
26 |
27 |
27 from cStringIO import StringIO |
28 from cStringIO import StringIO |
28 |
29 |
|
30 VERSION = "1.1" |
|
31 VERSION_BANNER = """x version %s |
|
32 Copyright (c) 2006 Brett Smith <brettcsmith@brettcsmith.org> |
|
33 |
|
34 This program is free software; you can redistribute it and/or modify it |
|
35 under the terms of the GNU General Public License as published by the |
|
36 Free Software Foundation; either version 2 of the License, or (at your |
|
37 option) any later version. |
|
38 |
|
39 This program is distributed in the hope that it will be useful, but |
|
40 WITHOUT ANY WARRANTY; without even the implied warranty of |
|
41 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General |
|
42 Public License for more details.""" % (VERSION,) |
|
43 |
|
44 MATCHING_DIRECTORY = 1 |
|
45 # ONE_DIRECTORY = 2 |
|
46 BOMB = 3 |
|
47 EMPTY = 4 |
|
48 |
29 mimetypes.encodings_map.setdefault('.bz2', 'bzip2') |
49 mimetypes.encodings_map.setdefault('.bz2', 'bzip2') |
30 mimetypes.types_map['.exe'] = 'application/x-msdos-program' |
50 mimetypes.types_map['.exe'] = 'application/x-msdos-program' |
31 |
|
32 MATCHING_DIRECTORY = 1 |
|
33 ONE_DIRECTORY = 2 |
|
34 BOMB = 3 |
|
35 EMPTY = 4 |
|
36 |
51 |
37 class ExtractorError(Exception): |
52 class ExtractorError(Exception): |
38 pass |
53 pass |
39 |
54 |
40 |
55 |
72 |
87 |
73 class BaseExtractor(object): |
88 class BaseExtractor(object): |
74 decoders = {'bzip2': 'bzcat', 'gzip': 'zcat', 'compress': 'zcat'} |
89 decoders = {'bzip2': 'bzcat', 'gzip': 'zcat', 'compress': 'zcat'} |
75 |
90 |
76 def __init__(self, filename, mimetype, encoding): |
91 def __init__(self, filename, mimetype, encoding): |
|
92 if encoding and (not self.decoders.has_key(encoding)): |
|
93 raise ValueError("unrecognized encoding %s" % (encoding,)) |
77 self.filename = filename |
94 self.filename = filename |
78 self.mimetype = mimetype |
95 self.mimetype = mimetype |
79 self.encoding = encoding |
96 self.encoding = encoding |
|
97 self.included_archives = [] |
80 try: |
98 try: |
81 self.archive = open(filename, 'r') |
99 self.archive = open(filename, 'r') |
82 except (IOError, OSError), error: |
100 except (IOError, OSError), error: |
83 raise ExtractorError("could not open %s: %s" % |
101 raise ExtractorError("could not open %s: %s" % |
84 (filename, error.strerror)) |
102 (filename, error.strerror)) |
110 def prepare(self): |
128 def prepare(self): |
111 pass |
129 pass |
112 |
130 |
113 def check_contents(self): |
131 def check_contents(self): |
114 self.archive.seek(0, 0) |
132 self.archive.seek(0, 0) |
|
133 archive_type = None |
115 filenames = self.get_filenames() |
134 filenames = self.get_filenames() |
116 try: |
135 try: |
117 first_part = filenames.next().split('/', 1)[0] + '/' |
136 filename = filenames.next() |
|
137 if extractor_map.has_key(mimetypes.guess_type(filename)[0]): |
|
138 self.included_archives.append(filename) |
|
139 first_part = filename.split('/', 1)[0] + '/' |
118 except StopIteration: |
140 except StopIteration: |
119 filenames.stop() |
141 filenames.stop() |
120 return EMPTY |
142 return EMPTY |
121 for filename in filenames: |
143 for filename in filenames: |
122 if not filename.startswith(first_part): |
144 if extractor_map.has_key(mimetypes.guess_type(filename)[0]): |
123 filenames.stop() |
145 self.included_archives.append(filename) |
124 return BOMB |
146 if (archive_type is None) and (not filename.startswith(first_part)): |
|
147 archive_type = BOMB |
125 filenames.stop() |
148 filenames.stop() |
|
149 if archive_type: |
|
150 return archive_type |
126 if self.basename() == first_part[:-1]: |
151 if self.basename() == first_part[:-1]: |
127 return MATCHING_DIRECTORY |
152 return MATCHING_DIRECTORY |
128 return first_part |
153 return first_part |
129 |
154 |
130 def basename(self): |
155 def basename(self): |
155 class ZipExtractor(BaseExtractor): |
180 class ZipExtractor(BaseExtractor): |
156 def __init__(self, filename, mimetype, encoding): |
181 def __init__(self, filename, mimetype, encoding): |
157 self.filename = filename |
182 self.filename = filename |
158 self.mimetype = mimetype |
183 self.mimetype = mimetype |
159 self.encoding = encoding |
184 self.encoding = encoding |
|
185 self.included_archives = [] |
160 self.archive = StringIO() |
186 self.archive = StringIO() |
161 |
187 |
162 def get_filenames(self): |
188 def get_filenames(self): |
163 return ProcessStreamer(['zipinfo', '-1', self.filename], None) |
189 return ProcessStreamer(['zipinfo', '-1', self.filename], None) |
164 |
190 |
193 elif len(pieces[-1]) < 6: |
219 elif len(pieces[-1]) < 6: |
194 pieces.pop() |
220 pieces.pop() |
195 return '.'.join(pieces) |
221 return '.'.join(pieces) |
196 |
222 |
197 def check_contents(self): |
223 def check_contents(self): |
|
224 CpioExtractor.check_contents(self) |
198 return BOMB |
225 return BOMB |
199 |
226 |
200 |
227 |
201 class DebExtractor(TarExtractor): |
228 class DebExtractor(TarExtractor): |
202 def prepare(self): |
229 def prepare(self): |
203 self.pipe(['ar', 'p', self.filename, 'data.tar.gz'], |
230 self.pipe(['ar', 'p', self.filename, 'data.tar.gz'], |
204 "data.tar.gz extraction") |
231 "data.tar.gz extraction") |
212 elif (len(pieces[-1]) > 10) or (not pieces[-1].endswith('.deb')): |
239 elif (len(pieces[-1]) > 10) or (not pieces[-1].endswith('.deb')): |
213 return BaseExtractor.basename(self) |
240 return BaseExtractor.basename(self) |
214 return pieces[0] |
241 return pieces[0] |
215 |
242 |
216 def check_contents(self): |
243 def check_contents(self): |
|
244 TarExtractor.check_contents(self) |
217 return BOMB |
245 return BOMB |
218 |
246 |
219 |
247 |
|
248 extractor_map = {'application/x-tar': TarExtractor, |
|
249 'application/zip': ZipExtractor, |
|
250 'application/x-msdos-program': ZipExtractor, |
|
251 'application/x-debian-package': DebExtractor, |
|
252 'application/x-redhat-package-manager': RPMExtractor, |
|
253 'application/x-rpm': RPMExtractor, |
|
254 'application/x-cpio': CpioExtractor} |
|
255 |
220 class ExtractorApplication(object): |
256 class ExtractorApplication(object): |
221 extractor_map = {'application/x-tar': TarExtractor, |
257 actions = ['get_extractor', 'prepare_extraction', 'extract', 'recurse'] |
222 'application/zip': ZipExtractor, |
|
223 'application/x-msdos-program': ZipExtractor, |
|
224 'application/x-debian-package': DebExtractor, |
|
225 'application/x-redhat-package-manager': RPMExtractor, |
|
226 'application/x-rpm': RPMExtractor, |
|
227 'application/x-cpio': CpioExtractor} |
|
228 actions = ['get_extractor', 'prepare_extraction', 'extract'] |
|
229 |
258 |
230 def __init__(self, arguments): |
259 def __init__(self, arguments): |
231 self.filenames = arguments |
260 self.parse_options(arguments) |
232 self.successes = [] |
261 self.successes = [] |
233 self.failures = [] |
262 self.failures = [] |
234 |
263 |
|
264 def parse_options(self, arguments): |
|
265 parser = optparse.OptionParser( |
|
266 usage="%prog [options] archive [archive2 ...]", |
|
267 description="Intelligent archive extractor", |
|
268 version=VERSION_BANNER |
|
269 ) |
|
270 parser.add_option('-r', '--recursive', dest='recursive', |
|
271 action='store_true', default=False, |
|
272 help='extract archives contained in the ones listed') |
|
273 self.options, filenames = parser.parse_args(arguments) |
|
274 if not filenames: |
|
275 parser.error("you did not list any archives") |
|
276 self.archives = {os.path.realpath(os.curdir): filenames} |
|
277 |
235 def show_error(self, message): |
278 def show_error(self, message): |
236 print >>sys.stderr, "%s: %s" % (self.current_filename, message) |
279 print >>sys.stderr, "%s: %s" % (self.current_filename, message) |
237 |
280 |
238 def get_extractor(self): |
281 def get_extractor(self): |
239 mimetype, encoding = mimetypes.guess_type(self.current_filename) |
282 mimetype, encoding = mimetypes.guess_type(self.current_filename) |
240 try: |
283 try: |
241 handler = self.extractor_map[mimetype] |
284 handler = extractor_map[mimetype] |
242 except KeyError: |
285 except KeyError: |
243 self.show_error("not a known archive type") |
286 self.show_error("not a known archive type") |
244 return False |
287 return False |
245 try: |
288 try: |
246 self.current_extractor = handler(self.current_filename, mimetype, |
289 self.current_extractor = handler(self.current_filename, mimetype, |
271 |
314 |
272 def prepare_extraction(self): |
315 def prepare_extraction(self): |
273 self.current_path = '.' |
316 self.current_path = '.' |
274 contents = self.current_extractor.check_contents() |
317 contents = self.current_extractor.check_contents() |
275 if contents not in (MATCHING_DIRECTORY, EMPTY): |
318 if contents not in (MATCHING_DIRECTORY, EMPTY): |
276 directory = self.prepare_target_directory() |
319 self.target_directory = self.prepare_target_directory() |
277 if directory is None: |
320 if self.target_directory is None: |
278 return False |
321 return False |
279 if contents == BOMB: |
322 if contents == BOMB: |
280 os.chdir(directory) |
323 os.chdir(self.target_directory) |
281 self.current_path = '..' |
324 self.current_path = '..' |
282 self.cleanup_actions.append((os.chdir, '..')) |
|
283 else: |
325 else: |
284 self.cleanup_actions.append((os.rename, contents, directory)) |
326 self.cleanup_actions.append((os.rename, contents, |
|
327 self.target_directory)) |
|
328 else: |
|
329 self.target_directory = os.curdir |
285 return True |
330 return True |
286 |
331 |
287 def extract(self): |
332 def extract(self): |
288 try: |
333 try: |
289 self.current_extractor.extract(self.current_path) |
334 self.current_extractor.extract(self.current_path) |
290 except ExtractorError, error: |
335 except ExtractorError, error: |
291 self.show_error(error) |
336 self.show_error(error) |
292 return False |
337 return False |
293 return True |
338 return True |
294 |
339 |
|
340 def recurse(self): |
|
341 if not self.options.recursive: |
|
342 return True |
|
343 print "wow", self.current_extractor.included_archives |
|
344 for filename in self.current_extractor.included_archives: |
|
345 tail_path, basename = os.path.split(filename) |
|
346 directory = os.path.join(self.current_directory, |
|
347 self.target_directory, tail_path) |
|
348 self.archives.setdefault(directory, []).append(basename) |
|
349 print self.archives |
|
350 return True |
|
351 |
295 def run(self): |
352 def run(self): |
296 for filename in self.filenames: |
353 while self.archives: |
297 running = True |
354 self.current_directory, filenames = self.archives.popitem() |
298 self.current_filename = filename |
355 for filename in filenames: |
299 self.cleanup_actions = [] |
356 os.chdir(self.current_directory) |
300 actions = [getattr(self, name) for name in self.actions] |
357 running = True |
301 while running and actions: |
358 self.current_filename = filename |
302 running = actions.pop(0)() |
359 self.cleanup_actions = [] |
303 for action in self.cleanup_actions: |
360 actions = [getattr(self, name) for name in self.actions] |
304 action[0](*action[1:]) |
361 while running and actions: |
305 if running: |
362 running = actions.pop(0)() |
306 self.successes.append(self.current_filename) |
363 for action in self.cleanup_actions: |
307 else: |
364 action[0](*action[1:]) |
308 self.failures.append(self.current_filename) |
365 if running: |
|
366 self.successes.append(self.current_filename) |
|
367 else: |
|
368 self.failures.append(self.current_filename) |
309 if self.failures: |
369 if self.failures: |
310 return 1 |
370 return 1 |
311 return 0 |
371 return 0 |
312 |
372 |
313 |
373 |