Start printing results ASAP with -l or -t. trunk

Sun, 12 Apr 2009 11:21:51 -0400

author
Brett Smith <brettcsmith@brettcsmith.org>
date
Sun, 12 Apr 2009 11:21:51 -0400
branch
trunk
changeset 106
dcf005ef7070
parent 105
f76ac41fe061
child 107
1b7450ae4c67

Start printing results ASAP with -l or -t.

Doing -l on a large archive was painful because dtrx would wait until it
had all the listings before it displayed any. It did this to make sure
that the listing would be successful.

On reconsidering, though, I think it's really unlikely that there'll be a
case where a lister process provides *some* good output for a given file in
the wrong format. So, now -l will try to get one good line out of the
lister. If it gets that, then it will immediately start displaying results
as they come in. On the off chance that it detects an error later on, it
will display an error about that, and then try again with a different
extractor if appropriate.

NEWS file | annotate | diff | comparison | revisions
scripts/dtrx file | annotate | diff | comparison | revisions
tests/tests.yml file | annotate | diff | comparison | revisions
--- a/NEWS	Thu Apr 02 22:31:07 2009 -0400
+++ b/NEWS	Sun Apr 12 11:21:51 2009 -0400
@@ -4,6 +4,14 @@
 Version 6.5
 -----------
 
+Enhancements
+~~~~~~~~~~~~
+
+ * When you list archive contents with -l or -t, dtrx will start printing
+   results much faster than it used to.  There's a small chance that it
+   will print some incorrect listings if it misdetects the archive type of
+   a given file, but it will show you an error message when that happens.
+
 Bug fixes
 ~~~~~~~~~
 
--- a/scripts/dtrx	Thu Apr 02 22:31:07 2009 -0400
+++ b/scripts/dtrx	Sun Apr 12 11:21:51 2009 -0400
@@ -171,12 +171,21 @@
                 return index
         return None
 
+    def add_process(self, processes, command, stdin, stdout):
+        try:
+            processes.append(subprocess.Popen(command, stdin=stdin,
+                                              stdout=stdout,
+                                              stderr=self.stderr))
+        except OSError, error:
+            if error.errno == errno.ENOENT:
+                raise ExtractorUnusable("could not run %s" % (command[0],))
+            raise
+
     def run_pipes(self, final_stdout=None):
         if not self.pipes:
             return
         elif final_stdout is None:
-            # FIXME: Buffering this might be dumb.
-            final_stdout = tempfile.TemporaryFile()
+            final_stdout = open('/dev/null', 'w')
         num_pipes = len(self.pipes)
         last_pipe = num_pipes - 1
         processes = []
@@ -189,14 +198,7 @@
                 stdout = final_stdout
             else:
                 stdout = subprocess.PIPE
-            try:
-                processes.append(subprocess.Popen(command, stdin=stdin,
-                                                  stdout=stdout,
-                                                  stderr=self.stderr))
-            except OSError, error:
-                if error.errno == errno.ENOENT:
-                    raise ExtractorUnusable("could not run %s" % (command[0],))
-                raise
+            self.add_process(processes, command, stdin, stdout)
         self.exit_codes = [pipe.wait() for pipe in processes]
         self.archive.close()
         for index in range(last_pipe):
@@ -290,15 +292,22 @@
 
     def get_filenames(self):
         self.pipe(self.list_pipe, "listing")
-        self.run_pipes()
-        self.check_success(False)
-        self.archive.seek(0, 0)
+        processes = []
+        stdin = self.archive
+        for command in [pipe[0] for pipe in self.pipes]:
+            self.add_process(processes, command, stdin, subprocess.PIPE)
+            stdin = processes[-1].stdout
+        get_output_line = processes[-1].stdout.readline
         while True:
-            line = self.archive.readline()
+            line = get_output_line()
             if not line:
-                self.archive.close()
-                return
+                break
             yield line.rstrip('\n')
+        self.exit_codes = [pipe.wait() for pipe in processes]
+        self.archive.close()
+        for process in processes:
+            process.stdout.close()
+        self.check_success(False)
     
 
 class CompressionExtractor(BaseExtractor):
@@ -952,6 +961,7 @@
         self.options = options
         self.filenames = filenames
         self.target = None
+        self.do_print = False
         
     def report(self, function, *args):
         try:
@@ -961,15 +971,20 @@
             logger.debug(''.join(traceback.format_exception(*sys.exc_info())))
         return error
 
+    def show_filename(self, filename):
+        if len(self.filenames) < 2:
+            return
+        elif self.do_print:
+            print
+        else:
+            self.do_print = True
+        print "%s:" % (filename,)
+
 
 class ExtractionAction(BaseAction):
     handlers = [FlatHandler, OverwriteHandler, MatchHandler, EmptyHandler,
                 BombHandler]
 
-    def __init__(self, options, filenames):
-        BaseAction.__init__(self, options, filenames)
-        self.did_print = False
-
     def get_handler(self, extractor):
         if extractor.content_type in ONE_ENTRY_UNKNOWN:
             self.options.one_entry_policy.prep(self.current_filename,
@@ -983,11 +998,7 @@
     def show_extraction(self, extractor):
         if self.options.log_level > logging.INFO:
             return
-        elif self.did_print:
-            print
-        else:
-            self.did_print = True
-        print "%s:" % (self.current_filename,)
+        self.show_filename(self.current_filename)
         if extractor.contents is None:
             print self.current_handler.target
             return
@@ -1023,29 +1034,28 @@
 
 
 class ListAction(BaseAction):
-    def __init__(self, options, filenames):
-        BaseAction.__init__(self, options, filenames)
-        self.count = 0
-
-    def get_list(self, extractor):
-        # Note: The reason I'm getting all the filenames up front is
-        # because if we run into trouble partway through the archive, we'll
-        # try another extractor.  So before we display anything we have to
-        # be sure this one is successful.  We maybe don't have to be quite
-        # this conservative but this is the easy way out for now.
-        self.filelist = list(extractor.get_filenames())
-
-    def show_list(self, filename):
-        self.count += 1
-        if len(self.filenames) != 1:
-            if self.count > 1:
-                print
-            print "%s:" % (filename,)
-        print '\n'.join(self.filelist)
-
+    def list_filenames(self, extractor, filename):
+        # We get a line first to make sure there's not going to be some
+        # basic error before we show what filename we're listing.
+        filename_lister = extractor.get_filenames()
+        try:
+            first_line = filename_lister.next()
+        except StopIteration:
+            self.show_filename(filename)
+        else:
+            self.did_list = True
+            self.show_filename(filename)
+            print first_line
+        for line in filename_lister:
+            print line
+            
     def run(self, filename, extractor):
-        return (self.report(self.get_list, extractor) or
-                self.report(self.show_list, filename))
+        self.did_list = False
+        error = self.report(self.list_filenames, extractor, filename)
+        if error and self.did_list:
+            logger.error("lister failed: ignore above listing for %s" %
+                         (filename,))
+        return error
 
 
 class ExtractorApplication(object):
--- a/tests/tests.yml	Thu Apr 02 22:31:07 2009 -0400
+++ b/tests/tests.yml	Sun Apr 12 11:21:51 2009 -0400
@@ -518,6 +518,22 @@
   grep: "^1/2/3$"
   antigrep: "^dtrx:"
 
+- name: listing multiple file with misleading extensions
+  options: -l
+  filenames: trickery.tar.gz trickery.tar.gz
+  prerun: cp ${1}test-1.23.zip ${1}trickery.tar.gz
+  cleanup: rm -f ${1}trickery.tar.gz
+  output: |
+    trickery.tar.gz:
+    1/2/3
+    a/b
+    foobar
+    
+    trickery.tar.gz:
+    1/2/3
+    a/b
+    foobar
+
 - name: non-archive error
   filenames: /dev/null
   error: true
@@ -612,7 +628,6 @@
   directory: busydir
   filenames: ../test-onedir.tar.gz
   output: |
-    ../test-onedir.tar.gz:
     test/
     test/foobar
     test/quux

mercurial