# HG changeset patch # User brett # Date 1200627367 18000 # Node ID 37d55540733427d33bc8046a4f52c07546680f45 # Parent 69cd944030889efd9986f3bf2563167142b752e9 [svn] At work I was getting an unhelpful "No such file or directory" error when I tried to extract an .exe. It turns out this was because I didn't have cabextract installed. This inspired a few changes: * BaseExtractor now raises an ExtractorUnusable error when this happens. * Various points in the code deal with this properly. * The loop for trying extractors will now report all the errors it got, if extraction is unsuccessful, to better help you understand why. Also snuck in a bug fix where things weren't being cleaned properly with a half-extracted archive. Also got my version number situation in order, in anticipation for the next release, which may be a while yet but at least I won't have to worry about it then. diff -r 69cd94403088 -r 37d555407334 README --- a/README Tue Nov 27 22:43:40 2007 -0500 +++ b/README Thu Jan 17 22:36:07 2008 -0500 @@ -70,9 +70,9 @@ Other Useful Information ------------------------ -dtrx 5.0 is copyright (c) 2006, 2007 `Brett Smith`_. Feel free to send -comments, bug reports, patches, and so on. You can find the latest version -of dtrx on `its home page`_. +dtrx 6.0 is copyright (c) 2006, 2007, 2008 `Brett Smith`_. Feel free to +send comments, bug reports, patches, and so on. You can find the latest +version of dtrx on `its home page`_. .. _`Brett Smith`: mailto:brettcsmith@brettcsmith.org .. _`its home page`: http://www.brettcsmith.org/2007/dtrx/ diff -r 69cd94403088 -r 37d555407334 TODO --- a/TODO Tue Nov 27 22:43:40 2007 -0500 +++ b/TODO Thu Jan 17 22:36:07 2008 -0500 @@ -1,3 +1,9 @@ +To do: +* Fix recursive extraction to check subdirectories +* Add ability to list included archives for recursive extraction +* Make C-c not print a stack trace. +* Make -v report some kind of progress + Things which I have a use case/anti-use case for: * Support pisi packages (http://paketler.pardus.org.tr/pardus-2007/) * Steal ideas from . diff -r 69cd94403088 -r 37d555407334 scripts/dtrx --- a/scripts/dtrx Tue Nov 27 22:43:40 2007 -0500 +++ b/scripts/dtrx Thu Jan 17 22:36:07 2008 -0500 @@ -22,6 +22,7 @@ import optparse import os import re +import shutil import stat import subprocess import sys @@ -31,9 +32,9 @@ from sets import Set -VERSION = "5.0" +VERSION = "6.0" VERSION_BANNER = """dtrx version %s -Copyright (c) 2006, 2007 Brett Smith +Copyright (c) 2006, 2007, 2008 Brett Smith This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the @@ -121,6 +122,12 @@ pass +class ExtractorUnusable(Exception): + pass + + +EXTRACTION_ERRORS = (ExtractorError, ExtractorUnusable, OSError, IOError) + class BaseExtractor(object): decoders = {'bzip2': 'bzcat', 'gzip': 'zcat', 'compress': 'zcat', 'lzma': 'lzcat'} @@ -167,9 +174,14 @@ stdout = final_stdout else: stdout = subprocess.PIPE - processes.append(subprocess.Popen(command, stdin=stdin, - stdout=stdout, - stderr=subprocess.PIPE)) + try: + processes.append(subprocess.Popen(command, stdin=stdin, + stdout=stdout, + stderr=subprocess.PIPE)) + except OSError, error: + if error.errno == errno.ENOENT: + raise ExtractorUnusable("could not run %s" % (command[0],)) + raise exit_codes = [pipe.wait() for pipe in processes] self.archive.close() for index in range(last_pipe): @@ -230,9 +242,9 @@ self.archive.seek(0, 0) self.extract_archive() self.check_contents() - except ExtractorError: + except EXTRACTION_ERRORS: os.chdir(old_path) - subprocess.call(['rm', '-rf', self.target]) + shutil.rmtree(self.target, ignore_errors=True) raise os.chdir(old_path) @@ -248,6 +260,7 @@ class CompressionExtractor(BaseExtractor): + file_type = 'compressed file' name_checker = FilenameChecker def basename(self): @@ -269,7 +282,7 @@ raise ExtractorError("cannot extract here: %s" % (error.strerror,)) try: self.run_pipes(output_fd) - except ExtractorError: + except EXTRACTION_ERRORS: os.close(output_fd) os.unlink(self.target) raise @@ -277,6 +290,8 @@ class TarExtractor(BaseExtractor): + file_type = 'tar file' + def get_filenames(self): self.pipe(['tar', '-t'], "listing") return BaseExtractor.get_filenames(self) @@ -287,6 +302,8 @@ class CpioExtractor(BaseExtractor): + file_type = 'cpio file' + def get_filenames(self): self.pipe(['cpio', '-t'], "listing") return BaseExtractor.get_filenames(self) @@ -298,6 +315,8 @@ class RPMExtractor(CpioExtractor): + file_type = 'RPM' + def prepare(self): self.pipe(['rpm2cpio', '-'], "rpm2cpio") @@ -320,6 +339,8 @@ class DebExtractor(TarExtractor): + file_type = 'Debian package' + def prepare(self): self.pipe(['ar', 'p', self.filename, 'data.tar.gz'], "data.tar.gz extraction") @@ -347,6 +368,8 @@ class GemExtractor(TarExtractor): + file_type = 'Ruby gem' + def prepare(self): self.pipe(['tar', '-xO', 'data.tar.gz'], "data.tar.gz extraction") self.pipe(['zcat'], "data.tar.gz decompression") @@ -357,6 +380,8 @@ class GemMetadataExtractor(CompressionExtractor): + file_type = 'Ruby gem' + def prepare(self): self.pipe(['tar', '-xO', 'metadata.gz'], "metadata.gz extraction") self.pipe(['zcat'], "metadata.gz decompression") @@ -383,6 +408,8 @@ class ZipExtractor(NoPipeExtractor): + file_type = 'Zip file' + def get_filenames(self): self.pipe(['zipinfo', '-1', self.filename], "listing") return BaseExtractor.get_filenames(self) @@ -393,6 +420,7 @@ class SevenExtractor(NoPipeExtractor): + file_type = '7z file' border_re = re.compile('^[- ]+$') def get_filenames(self): @@ -416,6 +444,7 @@ class CABExtractor(NoPipeExtractor): + file_type = 'CAB archive' border_re = re.compile(r'^[-\+]+$') def get_filenames(self): @@ -780,7 +809,7 @@ def report(self, function, *args): try: error = function(*args) - except (ExtractorError, IOError, OSError), exception: + except EXTRACTION_ERRORS, exception: error = str(exception) logger.debug(''.join(traceback.format_exception(*sys.exc_info()))) return error @@ -813,6 +842,7 @@ class ListAction(BaseAction): def __init__(self, options, filenames): BaseAction.__init__(self, options, filenames) + self.count = 0 def get_list(self, extractor): # Note: The reason I'm getting all the filenames up front is @@ -823,14 +853,14 @@ self.filelist = list(extractor.get_filenames()) def show_list(self, filename): + self.count += 1 if len(self.filenames) != 1: - if filename != self.filenames[0]: + if self.count > 1: print print "%s:" % (filename,) print '\n'.join(self.filelist) def run(self, filename, extractor): - self.current_filename = filename return (self.report(self.get_list, extractor) or self.report(self.show_list, filename)) @@ -909,21 +939,24 @@ return "cannot extract a directory" def try_extractors(self, filename, builder): - last_error = "could not find a way to extract this" - while True: - try: - extractor = builder.next() - except StopIteration: - return last_error - except (IOError, OSError, ExtractorError), error: - return str(error) + errors = [] + for extractor in builder: error = self.action.run(filename, extractor) if error: - logger.info("%s: %s" % (filename, error)) - last_error = error + errors.append((extractor.file_type, extractor.encoding, error)) else: self.recurse(filename, extractor, self.action) return + logger.error("could not handle %s" % (filename,)) + if not errors: + logger.error("not a known archive type") + return True + for file_type, encoding, error in errors: + message = ["treating as", file_type, "failed:", error] + if encoding: + message.insert(1, "%s-encoded" % (encoding,)) + logger.error(' '.join(message)) + return True def run(self): if self.options.show_list: @@ -939,7 +972,8 @@ error = (self.check_file(filename) or self.try_extractors(filename, builder.get_extractor())) if error: - logger.error("%s: %s" % (filename, error)) + if error != True: + logger.error("%s: %s" % (filename, error)) self.failures.append(filename) else: self.successes.append(filename) diff -r 69cd94403088 -r 37d555407334 setup.py --- a/setup.py Tue Nov 27 22:43:40 2007 -0500 +++ b/setup.py Thu Jan 17 22:36:07 2008 -0500 @@ -3,7 +3,7 @@ from distutils.core import setup setup(name="dtrx", - version = "5.0", + version = "6.0", description = "Script to intelligently extract multiple archive types", author = "Brett Smith", author_email = "brettcsmith@brettcsmith.org", diff -r 69cd94403088 -r 37d555407334 tests/tests.yml --- a/tests/tests.yml Tue Nov 27 22:43:40 2007 -0500 +++ b/tests/tests.yml Thu Jan 17 22:36:07 2008 -0500 @@ -424,7 +424,7 @@ - name: non-archive error filenames: /dev/null error: true - grep: "could not find a way to extract this" + grep: "not a known archive type" - name: no such file error filenames: nonexistent-file.tar.gz