20 import logging |
20 import logging |
21 import mimetypes |
21 import mimetypes |
22 import optparse |
22 import optparse |
23 import os |
23 import os |
24 import re |
24 import re |
|
25 import shutil |
25 import stat |
26 import stat |
26 import subprocess |
27 import subprocess |
27 import sys |
28 import sys |
28 import tempfile |
29 import tempfile |
29 import textwrap |
30 import textwrap |
30 import traceback |
31 import traceback |
31 |
32 |
32 from sets import Set |
33 from sets import Set |
33 |
34 |
34 VERSION = "5.0" |
35 VERSION = "6.0" |
35 VERSION_BANNER = """dtrx version %s |
36 VERSION_BANNER = """dtrx version %s |
36 Copyright (c) 2006, 2007 Brett Smith <brettcsmith@brettcsmith.org> |
37 Copyright (c) 2006, 2007, 2008 Brett Smith <brettcsmith@brettcsmith.org> |
37 |
38 |
38 This program is free software; you can redistribute it and/or modify it |
39 This program is free software; you can redistribute it and/or modify it |
39 under the terms of the GNU General Public License as published by the |
40 under the terms of the GNU General Public License as published by the |
40 Free Software Foundation; either version 3 of the License, or (at your |
41 Free Software Foundation; either version 3 of the License, or (at your |
41 option) any later version. |
42 option) any later version. |
118 |
119 |
119 |
120 |
120 class ExtractorError(Exception): |
121 class ExtractorError(Exception): |
121 pass |
122 pass |
122 |
123 |
|
124 |
|
125 class ExtractorUnusable(Exception): |
|
126 pass |
|
127 |
|
128 |
|
129 EXTRACTION_ERRORS = (ExtractorError, ExtractorUnusable, OSError, IOError) |
123 |
130 |
124 class BaseExtractor(object): |
131 class BaseExtractor(object): |
125 decoders = {'bzip2': 'bzcat', 'gzip': 'zcat', 'compress': 'zcat', |
132 decoders = {'bzip2': 'bzcat', 'gzip': 'zcat', 'compress': 'zcat', |
126 'lzma': 'lzcat'} |
133 'lzma': 'lzcat'} |
127 |
134 |
165 stdin = processes[-1].stdout |
172 stdin = processes[-1].stdout |
166 if index == last_pipe: |
173 if index == last_pipe: |
167 stdout = final_stdout |
174 stdout = final_stdout |
168 else: |
175 else: |
169 stdout = subprocess.PIPE |
176 stdout = subprocess.PIPE |
170 processes.append(subprocess.Popen(command, stdin=stdin, |
177 try: |
171 stdout=stdout, |
178 processes.append(subprocess.Popen(command, stdin=stdin, |
172 stderr=subprocess.PIPE)) |
179 stdout=stdout, |
|
180 stderr=subprocess.PIPE)) |
|
181 except OSError, error: |
|
182 if error.errno == errno.ENOENT: |
|
183 raise ExtractorUnusable("could not run %s" % (command[0],)) |
|
184 raise |
173 exit_codes = [pipe.wait() for pipe in processes] |
185 exit_codes = [pipe.wait() for pipe in processes] |
174 self.archive.close() |
186 self.archive.close() |
175 for index in range(last_pipe): |
187 for index in range(last_pipe): |
176 processes[index].stdout.close() |
188 processes[index].stdout.close() |
177 processes[index].stderr.close() |
189 processes[index].stderr.close() |
228 os.chdir(self.target) |
240 os.chdir(self.target) |
229 try: |
241 try: |
230 self.archive.seek(0, 0) |
242 self.archive.seek(0, 0) |
231 self.extract_archive() |
243 self.extract_archive() |
232 self.check_contents() |
244 self.check_contents() |
233 except ExtractorError: |
245 except EXTRACTION_ERRORS: |
234 os.chdir(old_path) |
246 os.chdir(old_path) |
235 subprocess.call(['rm', '-rf', self.target]) |
247 shutil.rmtree(self.target, ignore_errors=True) |
236 raise |
248 raise |
237 os.chdir(old_path) |
249 os.chdir(old_path) |
238 |
250 |
239 def get_filenames(self): |
251 def get_filenames(self): |
240 self.run_pipes() |
252 self.run_pipes() |
246 return |
258 return |
247 yield line.rstrip('\n') |
259 yield line.rstrip('\n') |
248 |
260 |
249 |
261 |
250 class CompressionExtractor(BaseExtractor): |
262 class CompressionExtractor(BaseExtractor): |
|
263 file_type = 'compressed file' |
251 name_checker = FilenameChecker |
264 name_checker = FilenameChecker |
252 |
265 |
253 def basename(self): |
266 def basename(self): |
254 pieces = os.path.basename(self.filename).split('.') |
267 pieces = os.path.basename(self.filename).split('.') |
255 extension = '.' + pieces[-1] |
268 extension = '.' + pieces[-1] |
267 output_fd, self.target = tempfile.mkstemp(prefix='.dtrx-', dir='.') |
280 output_fd, self.target = tempfile.mkstemp(prefix='.dtrx-', dir='.') |
268 except (OSError, IOError), error: |
281 except (OSError, IOError), error: |
269 raise ExtractorError("cannot extract here: %s" % (error.strerror,)) |
282 raise ExtractorError("cannot extract here: %s" % (error.strerror,)) |
270 try: |
283 try: |
271 self.run_pipes(output_fd) |
284 self.run_pipes(output_fd) |
272 except ExtractorError: |
285 except EXTRACTION_ERRORS: |
273 os.close(output_fd) |
286 os.close(output_fd) |
274 os.unlink(self.target) |
287 os.unlink(self.target) |
275 raise |
288 raise |
276 os.close(output_fd) |
289 os.close(output_fd) |
277 |
290 |
278 |
291 |
279 class TarExtractor(BaseExtractor): |
292 class TarExtractor(BaseExtractor): |
|
293 file_type = 'tar file' |
|
294 |
280 def get_filenames(self): |
295 def get_filenames(self): |
281 self.pipe(['tar', '-t'], "listing") |
296 self.pipe(['tar', '-t'], "listing") |
282 return BaseExtractor.get_filenames(self) |
297 return BaseExtractor.get_filenames(self) |
283 |
298 |
284 def extract_archive(self): |
299 def extract_archive(self): |
285 self.pipe(['tar', '-x']) |
300 self.pipe(['tar', '-x']) |
286 self.run_pipes() |
301 self.run_pipes() |
287 |
302 |
288 |
303 |
289 class CpioExtractor(BaseExtractor): |
304 class CpioExtractor(BaseExtractor): |
|
305 file_type = 'cpio file' |
|
306 |
290 def get_filenames(self): |
307 def get_filenames(self): |
291 self.pipe(['cpio', '-t'], "listing") |
308 self.pipe(['cpio', '-t'], "listing") |
292 return BaseExtractor.get_filenames(self) |
309 return BaseExtractor.get_filenames(self) |
293 |
310 |
294 def extract_archive(self): |
311 def extract_archive(self): |
318 self.check_included_archives(os.listdir('.')) |
337 self.check_included_archives(os.listdir('.')) |
319 self.content_type = BOMB |
338 self.content_type = BOMB |
320 |
339 |
321 |
340 |
322 class DebExtractor(TarExtractor): |
341 class DebExtractor(TarExtractor): |
|
342 file_type = 'Debian package' |
|
343 |
323 def prepare(self): |
344 def prepare(self): |
324 self.pipe(['ar', 'p', self.filename, 'data.tar.gz'], |
345 self.pipe(['ar', 'p', self.filename, 'data.tar.gz'], |
325 "data.tar.gz extraction") |
346 "data.tar.gz extraction") |
326 self.pipe(['zcat'], "data.tar.gz decompression") |
347 self.pipe(['zcat'], "data.tar.gz decompression") |
327 |
348 |
345 "control.tar.gz extraction") |
366 "control.tar.gz extraction") |
346 self.pipe(['zcat'], "control.tar.gz decompression") |
367 self.pipe(['zcat'], "control.tar.gz decompression") |
347 |
368 |
348 |
369 |
349 class GemExtractor(TarExtractor): |
370 class GemExtractor(TarExtractor): |
|
371 file_type = 'Ruby gem' |
|
372 |
350 def prepare(self): |
373 def prepare(self): |
351 self.pipe(['tar', '-xO', 'data.tar.gz'], "data.tar.gz extraction") |
374 self.pipe(['tar', '-xO', 'data.tar.gz'], "data.tar.gz extraction") |
352 self.pipe(['zcat'], "data.tar.gz decompression") |
375 self.pipe(['zcat'], "data.tar.gz decompression") |
353 |
376 |
354 def check_contents(self): |
377 def check_contents(self): |
355 self.check_included_archives(os.listdir('.')) |
378 self.check_included_archives(os.listdir('.')) |
356 self.content_type = BOMB |
379 self.content_type = BOMB |
357 |
380 |
358 |
381 |
359 class GemMetadataExtractor(CompressionExtractor): |
382 class GemMetadataExtractor(CompressionExtractor): |
|
383 file_type = 'Ruby gem' |
|
384 |
360 def prepare(self): |
385 def prepare(self): |
361 self.pipe(['tar', '-xO', 'metadata.gz'], "metadata.gz extraction") |
386 self.pipe(['tar', '-xO', 'metadata.gz'], "metadata.gz extraction") |
362 self.pipe(['zcat'], "metadata.gz decompression") |
387 self.pipe(['zcat'], "metadata.gz decompression") |
363 |
388 |
364 def basename(self): |
389 def basename(self): |
381 BaseExtractor.__init__(self, '/dev/null', None) |
406 BaseExtractor.__init__(self, '/dev/null', None) |
382 self.filename = os.path.realpath(filename) |
407 self.filename = os.path.realpath(filename) |
383 |
408 |
384 |
409 |
385 class ZipExtractor(NoPipeExtractor): |
410 class ZipExtractor(NoPipeExtractor): |
|
411 file_type = 'Zip file' |
|
412 |
386 def get_filenames(self): |
413 def get_filenames(self): |
387 self.pipe(['zipinfo', '-1', self.filename], "listing") |
414 self.pipe(['zipinfo', '-1', self.filename], "listing") |
388 return BaseExtractor.get_filenames(self) |
415 return BaseExtractor.get_filenames(self) |
389 |
416 |
390 def extract_archive(self): |
417 def extract_archive(self): |
391 self.pipe(['unzip', '-q', self.filename]) |
418 self.pipe(['unzip', '-q', self.filename]) |
392 self.run_pipes() |
419 self.run_pipes() |
393 |
420 |
394 |
421 |
395 class SevenExtractor(NoPipeExtractor): |
422 class SevenExtractor(NoPipeExtractor): |
|
423 file_type = '7z file' |
396 border_re = re.compile('^[- ]+$') |
424 border_re = re.compile('^[- ]+$') |
397 |
425 |
398 def get_filenames(self): |
426 def get_filenames(self): |
399 self.pipe(['7z', 'l', self.filename], "listing") |
427 self.pipe(['7z', 'l', self.filename], "listing") |
400 self.run_pipes() |
428 self.run_pipes() |
414 self.pipe(['7z', 'x', self.filename]) |
442 self.pipe(['7z', 'x', self.filename]) |
415 self.run_pipes() |
443 self.run_pipes() |
416 |
444 |
417 |
445 |
418 class CABExtractor(NoPipeExtractor): |
446 class CABExtractor(NoPipeExtractor): |
|
447 file_type = 'CAB archive' |
419 border_re = re.compile(r'^[-\+]+$') |
448 border_re = re.compile(r'^[-\+]+$') |
420 |
449 |
421 def get_filenames(self): |
450 def get_filenames(self): |
422 self.pipe(['cabextract', '-l', self.filename], "listing") |
451 self.pipe(['cabextract', '-l', self.filename], "listing") |
423 self.run_pipes() |
452 self.run_pipes() |
778 self.target = None |
807 self.target = None |
779 |
808 |
780 def report(self, function, *args): |
809 def report(self, function, *args): |
781 try: |
810 try: |
782 error = function(*args) |
811 error = function(*args) |
783 except (ExtractorError, IOError, OSError), exception: |
812 except EXTRACTION_ERRORS, exception: |
784 error = str(exception) |
813 error = str(exception) |
785 logger.debug(''.join(traceback.format_exception(*sys.exc_info()))) |
814 logger.debug(''.join(traceback.format_exception(*sys.exc_info()))) |
786 return error |
815 return error |
787 |
816 |
788 |
817 |
811 |
840 |
812 |
841 |
813 class ListAction(BaseAction): |
842 class ListAction(BaseAction): |
814 def __init__(self, options, filenames): |
843 def __init__(self, options, filenames): |
815 BaseAction.__init__(self, options, filenames) |
844 BaseAction.__init__(self, options, filenames) |
|
845 self.count = 0 |
816 |
846 |
817 def get_list(self, extractor): |
847 def get_list(self, extractor): |
818 # Note: The reason I'm getting all the filenames up front is |
848 # Note: The reason I'm getting all the filenames up front is |
819 # because if we run into trouble partway through the archive, we'll |
849 # because if we run into trouble partway through the archive, we'll |
820 # try another extractor. So before we display anything we have to |
850 # try another extractor. So before we display anything we have to |
821 # be sure this one is successful. We maybe don't have to be quite |
851 # be sure this one is successful. We maybe don't have to be quite |
822 # this conservative but this is the easy way out for now. |
852 # this conservative but this is the easy way out for now. |
823 self.filelist = list(extractor.get_filenames()) |
853 self.filelist = list(extractor.get_filenames()) |
824 |
854 |
825 def show_list(self, filename): |
855 def show_list(self, filename): |
|
856 self.count += 1 |
826 if len(self.filenames) != 1: |
857 if len(self.filenames) != 1: |
827 if filename != self.filenames[0]: |
858 if self.count > 1: |
828 print |
859 print |
829 print "%s:" % (filename,) |
860 print "%s:" % (filename,) |
830 print '\n'.join(self.filelist) |
861 print '\n'.join(self.filelist) |
831 |
862 |
832 def run(self, filename, extractor): |
863 def run(self, filename, extractor): |
833 self.current_filename = filename |
|
834 return (self.report(self.get_list, extractor) or |
864 return (self.report(self.get_list, extractor) or |
835 self.report(self.show_list, filename)) |
865 self.report(self.show_list, filename)) |
836 |
866 |
837 |
867 |
838 class ExtractorApplication(object): |
868 class ExtractorApplication(object): |
907 return error.strerror |
937 return error.strerror |
908 if stat.S_ISDIR(result.st_mode): |
938 if stat.S_ISDIR(result.st_mode): |
909 return "cannot extract a directory" |
939 return "cannot extract a directory" |
910 |
940 |
911 def try_extractors(self, filename, builder): |
941 def try_extractors(self, filename, builder): |
912 last_error = "could not find a way to extract this" |
942 errors = [] |
913 while True: |
943 for extractor in builder: |
914 try: |
|
915 extractor = builder.next() |
|
916 except StopIteration: |
|
917 return last_error |
|
918 except (IOError, OSError, ExtractorError), error: |
|
919 return str(error) |
|
920 error = self.action.run(filename, extractor) |
944 error = self.action.run(filename, extractor) |
921 if error: |
945 if error: |
922 logger.info("%s: %s" % (filename, error)) |
946 errors.append((extractor.file_type, extractor.encoding, error)) |
923 last_error = error |
|
924 else: |
947 else: |
925 self.recurse(filename, extractor, self.action) |
948 self.recurse(filename, extractor, self.action) |
926 return |
949 return |
|
950 logger.error("could not handle %s" % (filename,)) |
|
951 if not errors: |
|
952 logger.error("not a known archive type") |
|
953 return True |
|
954 for file_type, encoding, error in errors: |
|
955 message = ["treating as", file_type, "failed:", error] |
|
956 if encoding: |
|
957 message.insert(1, "%s-encoded" % (encoding,)) |
|
958 logger.error(' '.join(message)) |
|
959 return True |
927 |
960 |
928 def run(self): |
961 def run(self): |
929 if self.options.show_list: |
962 if self.options.show_list: |
930 action = ListAction |
963 action = ListAction |
931 else: |
964 else: |
937 for filename in self.filenames: |
970 for filename in self.filenames: |
938 builder = ExtractorBuilder(filename, self.options) |
971 builder = ExtractorBuilder(filename, self.options) |
939 error = (self.check_file(filename) or |
972 error = (self.check_file(filename) or |
940 self.try_extractors(filename, builder.get_extractor())) |
973 self.try_extractors(filename, builder.get_extractor())) |
941 if error: |
974 if error: |
942 logger.error("%s: %s" % (filename, error)) |
975 if error != True: |
|
976 logger.error("%s: %s" % (filename, error)) |
943 self.failures.append(filename) |
977 self.failures.append(filename) |
944 else: |
978 else: |
945 self.successes.append(filename) |
979 self.successes.append(filename) |
946 self.options.one_entry_policy.permanent_policy = EXTRACT_WRAP |
980 self.options.one_entry_policy.permanent_policy = EXTRACT_WRAP |
947 if self.failures: |
981 if self.failures: |