# HG changeset patch
# User brett
# Date 1200627367 18000
# Node ID 37d55540733427d33bc8046a4f52c07546680f45
# Parent  69cd944030889efd9986f3bf2563167142b752e9
[svn] At work I was getting an unhelpful "No such file or directory" error when I
tried to extract an .exe.  It turns out this was because I didn't have
cabextract installed.  This inspired a few changes:

* BaseExtractor now raises an ExtractorUnusable error when this happens.
* Various points in the code deal with this properly.
* The loop for trying extractors will now report all the errors it got, if
  extraction is unsuccessful, to better help you understand why.

Also snuck in a bug fix where things weren't being cleaned properly with a
half-extracted archive.

Also got my version number situation in order, in anticipation for the next
release, which may be a while yet but at least I won't have to worry about
it then.

diff -r 69cd94403088 -r 37d555407334 README
--- a/README	Tue Nov 27 22:43:40 2007 -0500
+++ b/README	Thu Jan 17 22:36:07 2008 -0500
@@ -70,9 +70,9 @@
 Other Useful Information
 ------------------------
 
-dtrx 5.0 is copyright (c) 2006, 2007 `Brett Smith`_.  Feel free to send
-comments, bug reports, patches, and so on.  You can find the latest version
-of dtrx on `its home page`_.
+dtrx 6.0 is copyright (c) 2006, 2007, 2008 `Brett Smith`_.  Feel free to
+send comments, bug reports, patches, and so on.  You can find the latest
+version of dtrx on `its home page`_.
 
 .. _`Brett Smith`: mailto:brettcsmith@brettcsmith.org
 .. _`its home page`: http://www.brettcsmith.org/2007/dtrx/
diff -r 69cd94403088 -r 37d555407334 TODO
--- a/TODO	Tue Nov 27 22:43:40 2007 -0500
+++ b/TODO	Thu Jan 17 22:36:07 2008 -0500
@@ -1,3 +1,9 @@
+To do:
+* Fix recursive extraction to check subdirectories
+* Add ability to list included archives for recursive extraction
+* Make C-c not print a stack trace.
+* Make -v report some kind of progress
+
 Things which I have a use case/anti-use case for:
 * Support pisi packages (http://paketler.pardus.org.tr/pardus-2007/)
 * Steal ideas from <http://martin.ankerl.com/files/e>.
diff -r 69cd94403088 -r 37d555407334 scripts/dtrx
--- a/scripts/dtrx	Tue Nov 27 22:43:40 2007 -0500
+++ b/scripts/dtrx	Thu Jan 17 22:36:07 2008 -0500
@@ -22,6 +22,7 @@
 import optparse
 import os
 import re
+import shutil
 import stat
 import subprocess
 import sys
@@ -31,9 +32,9 @@
 
 from sets import Set
 
-VERSION = "5.0"
+VERSION = "6.0"
 VERSION_BANNER = """dtrx version %s
-Copyright (c) 2006, 2007 Brett Smith <brettcsmith@brettcsmith.org>
+Copyright (c) 2006, 2007, 2008 Brett Smith <brettcsmith@brettcsmith.org>
 
 This program is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
@@ -121,6 +122,12 @@
     pass
 
 
+class ExtractorUnusable(Exception):
+    pass
+
+
+EXTRACTION_ERRORS = (ExtractorError, ExtractorUnusable, OSError, IOError)
+
 class BaseExtractor(object):
     decoders = {'bzip2': 'bzcat', 'gzip': 'zcat', 'compress': 'zcat',
                 'lzma': 'lzcat'}
@@ -167,9 +174,14 @@
                 stdout = final_stdout
             else:
                 stdout = subprocess.PIPE
-            processes.append(subprocess.Popen(command, stdin=stdin,
-                                              stdout=stdout,
-                                              stderr=subprocess.PIPE))
+            try:
+                processes.append(subprocess.Popen(command, stdin=stdin,
+                                                  stdout=stdout,
+                                                  stderr=subprocess.PIPE))
+            except OSError, error:
+                if error.errno == errno.ENOENT:
+                    raise ExtractorUnusable("could not run %s" % (command[0],))
+                raise
         exit_codes = [pipe.wait() for pipe in processes]
         self.archive.close()
         for index in range(last_pipe):
@@ -230,9 +242,9 @@
             self.archive.seek(0, 0)
             self.extract_archive()
             self.check_contents()
-        except ExtractorError:
+        except EXTRACTION_ERRORS:
             os.chdir(old_path)
-            subprocess.call(['rm', '-rf', self.target])
+            shutil.rmtree(self.target, ignore_errors=True)
             raise
         os.chdir(old_path)
 
@@ -248,6 +260,7 @@
     
 
 class CompressionExtractor(BaseExtractor):
+    file_type = 'compressed file'
     name_checker = FilenameChecker
 
     def basename(self):
@@ -269,7 +282,7 @@
             raise ExtractorError("cannot extract here: %s" % (error.strerror,))
         try:
             self.run_pipes(output_fd)
-        except ExtractorError:
+        except EXTRACTION_ERRORS:
             os.close(output_fd)
             os.unlink(self.target)
             raise
@@ -277,6 +290,8 @@
         
 
 class TarExtractor(BaseExtractor):
+    file_type = 'tar file'
+
     def get_filenames(self):
         self.pipe(['tar', '-t'], "listing")
         return BaseExtractor.get_filenames(self)
@@ -287,6 +302,8 @@
         
         
 class CpioExtractor(BaseExtractor):
+    file_type = 'cpio file'
+
     def get_filenames(self):
         self.pipe(['cpio', '-t'], "listing")
         return BaseExtractor.get_filenames(self)
@@ -298,6 +315,8 @@
 
 
 class RPMExtractor(CpioExtractor):
+    file_type = 'RPM'
+
     def prepare(self):
         self.pipe(['rpm2cpio', '-'], "rpm2cpio")
 
@@ -320,6 +339,8 @@
 
 
 class DebExtractor(TarExtractor):
+    file_type = 'Debian package'
+
     def prepare(self):
         self.pipe(['ar', 'p', self.filename, 'data.tar.gz'],
                   "data.tar.gz extraction")
@@ -347,6 +368,8 @@
 
 
 class GemExtractor(TarExtractor):
+    file_type = 'Ruby gem'
+
     def prepare(self):
         self.pipe(['tar', '-xO', 'data.tar.gz'], "data.tar.gz extraction")
         self.pipe(['zcat'], "data.tar.gz decompression")
@@ -357,6 +380,8 @@
 
 
 class GemMetadataExtractor(CompressionExtractor):
+    file_type = 'Ruby gem'
+
     def prepare(self):
         self.pipe(['tar', '-xO', 'metadata.gz'], "metadata.gz extraction")
         self.pipe(['zcat'], "metadata.gz decompression")
@@ -383,6 +408,8 @@
 
 
 class ZipExtractor(NoPipeExtractor):
+    file_type = 'Zip file'
+
     def get_filenames(self):
         self.pipe(['zipinfo', '-1', self.filename], "listing")
         return BaseExtractor.get_filenames(self)
@@ -393,6 +420,7 @@
 
 
 class SevenExtractor(NoPipeExtractor):
+    file_type = '7z file'
     border_re = re.compile('^[- ]+$')
 
     def get_filenames(self):
@@ -416,6 +444,7 @@
         
 
 class CABExtractor(NoPipeExtractor):
+    file_type = 'CAB archive'
     border_re = re.compile(r'^[-\+]+$')
 
     def get_filenames(self):
@@ -780,7 +809,7 @@
     def report(self, function, *args):
         try:
             error = function(*args)
-        except (ExtractorError, IOError, OSError), exception:
+        except EXTRACTION_ERRORS, exception:
             error = str(exception)
             logger.debug(''.join(traceback.format_exception(*sys.exc_info())))
         return error
@@ -813,6 +842,7 @@
 class ListAction(BaseAction):
     def __init__(self, options, filenames):
         BaseAction.__init__(self, options, filenames)
+        self.count = 0
 
     def get_list(self, extractor):
         # Note: The reason I'm getting all the filenames up front is
@@ -823,14 +853,14 @@
         self.filelist = list(extractor.get_filenames())
 
     def show_list(self, filename):
+        self.count += 1
         if len(self.filenames) != 1:
-            if filename != self.filenames[0]:
+            if self.count > 1:
                 print
             print "%s:" % (filename,)
         print '\n'.join(self.filelist)
 
     def run(self, filename, extractor):
-        self.current_filename = filename
         return (self.report(self.get_list, extractor) or
                 self.report(self.show_list, filename))
 
@@ -909,21 +939,24 @@
             return "cannot extract a directory"
 
     def try_extractors(self, filename, builder):
-        last_error = "could not find a way to extract this"
-        while True:
-            try:
-                extractor = builder.next()
-            except StopIteration:
-                return last_error
-            except (IOError, OSError, ExtractorError), error:
-                return str(error)
+        errors = []
+        for extractor in builder:
             error = self.action.run(filename, extractor)
             if error:
-                logger.info("%s: %s" % (filename, error))
-                last_error = error
+                errors.append((extractor.file_type, extractor.encoding, error))
             else:
                 self.recurse(filename, extractor, self.action)
                 return
+        logger.error("could not handle %s" % (filename,))
+        if not errors:
+            logger.error("not a known archive type")
+            return True
+        for file_type, encoding, error in errors:
+            message = ["treating as", file_type, "failed:", error]
+            if encoding:
+                message.insert(1, "%s-encoded" % (encoding,))
+            logger.error(' '.join(message))
+        return True
         
     def run(self):
         if self.options.show_list:
@@ -939,7 +972,8 @@
                 error = (self.check_file(filename) or
                          self.try_extractors(filename, builder.get_extractor()))
                 if error:
-                    logger.error("%s: %s" % (filename, error))
+                    if error != True:
+                        logger.error("%s: %s" % (filename, error))
                     self.failures.append(filename)
                 else:
                     self.successes.append(filename)
diff -r 69cd94403088 -r 37d555407334 setup.py
--- a/setup.py	Tue Nov 27 22:43:40 2007 -0500
+++ b/setup.py	Thu Jan 17 22:36:07 2008 -0500
@@ -3,7 +3,7 @@
 from distutils.core import setup
 
 setup(name="dtrx",
-      version = "5.0",
+      version = "6.0",
       description = "Script to intelligently extract multiple archive types",
       author = "Brett Smith",
       author_email = "brettcsmith@brettcsmith.org",
diff -r 69cd94403088 -r 37d555407334 tests/tests.yml
--- a/tests/tests.yml	Tue Nov 27 22:43:40 2007 -0500
+++ b/tests/tests.yml	Thu Jan 17 22:36:07 2008 -0500
@@ -424,7 +424,7 @@
 - name: non-archive error
   filenames: /dev/null
   error: true
-  grep: "could not find a way to extract this"
+  grep: "not a known archive type"
 
 - name: no such file error
   filenames: nonexistent-file.tar.gz