Sun, 12 Apr 2009 11:21:51 -0400
Start printing results ASAP with -l or -t.
Doing -l on a large archive was painful because dtrx would wait until it
had all the listings before it displayed any. It did this to make sure
that the listing would be successful.
On reconsidering, though, I think it's really unlikely that there'll be a
case where a lister process provides *some* good output for a given file in
the wrong format. So, now -l will try to get one good line out of the
lister. If it gets that, then it will immediately start displaying results
as they come in. On the off chance that it detects an error later on, it
will display an error about that, and then try again with a different
extractor if appropriate.
NEWS | file | annotate | diff | comparison | revisions | |
scripts/dtrx | file | annotate | diff | comparison | revisions | |
tests/tests.yml | file | annotate | diff | comparison | revisions |
--- a/NEWS Thu Apr 02 22:31:07 2009 -0400 +++ b/NEWS Sun Apr 12 11:21:51 2009 -0400 @@ -4,6 +4,14 @@ Version 6.5 ----------- +Enhancements +~~~~~~~~~~~~ + + * When you list archive contents with -l or -t, dtrx will start printing + results much faster than it used to. There's a small chance that it + will print some incorrect listings if it misdetects the archive type of + a given file, but it will show you an error message when that happens. + Bug fixes ~~~~~~~~~
--- a/scripts/dtrx Thu Apr 02 22:31:07 2009 -0400 +++ b/scripts/dtrx Sun Apr 12 11:21:51 2009 -0400 @@ -171,12 +171,21 @@ return index return None + def add_process(self, processes, command, stdin, stdout): + try: + processes.append(subprocess.Popen(command, stdin=stdin, + stdout=stdout, + stderr=self.stderr)) + except OSError, error: + if error.errno == errno.ENOENT: + raise ExtractorUnusable("could not run %s" % (command[0],)) + raise + def run_pipes(self, final_stdout=None): if not self.pipes: return elif final_stdout is None: - # FIXME: Buffering this might be dumb. - final_stdout = tempfile.TemporaryFile() + final_stdout = open('/dev/null', 'w') num_pipes = len(self.pipes) last_pipe = num_pipes - 1 processes = [] @@ -189,14 +198,7 @@ stdout = final_stdout else: stdout = subprocess.PIPE - try: - processes.append(subprocess.Popen(command, stdin=stdin, - stdout=stdout, - stderr=self.stderr)) - except OSError, error: - if error.errno == errno.ENOENT: - raise ExtractorUnusable("could not run %s" % (command[0],)) - raise + self.add_process(processes, command, stdin, stdout) self.exit_codes = [pipe.wait() for pipe in processes] self.archive.close() for index in range(last_pipe): @@ -290,15 +292,22 @@ def get_filenames(self): self.pipe(self.list_pipe, "listing") - self.run_pipes() - self.check_success(False) - self.archive.seek(0, 0) + processes = [] + stdin = self.archive + for command in [pipe[0] for pipe in self.pipes]: + self.add_process(processes, command, stdin, subprocess.PIPE) + stdin = processes[-1].stdout + get_output_line = processes[-1].stdout.readline while True: - line = self.archive.readline() + line = get_output_line() if not line: - self.archive.close() - return + break yield line.rstrip('\n') + self.exit_codes = [pipe.wait() for pipe in processes] + self.archive.close() + for process in processes: + process.stdout.close() + self.check_success(False) class CompressionExtractor(BaseExtractor): @@ -952,6 +961,7 @@ self.options = options self.filenames = filenames self.target = None + self.do_print = False def report(self, function, *args): try: @@ -961,15 +971,20 @@ logger.debug(''.join(traceback.format_exception(*sys.exc_info()))) return error + def show_filename(self, filename): + if len(self.filenames) < 2: + return + elif self.do_print: + print + else: + self.do_print = True + print "%s:" % (filename,) + class ExtractionAction(BaseAction): handlers = [FlatHandler, OverwriteHandler, MatchHandler, EmptyHandler, BombHandler] - def __init__(self, options, filenames): - BaseAction.__init__(self, options, filenames) - self.did_print = False - def get_handler(self, extractor): if extractor.content_type in ONE_ENTRY_UNKNOWN: self.options.one_entry_policy.prep(self.current_filename, @@ -983,11 +998,7 @@ def show_extraction(self, extractor): if self.options.log_level > logging.INFO: return - elif self.did_print: - print - else: - self.did_print = True - print "%s:" % (self.current_filename,) + self.show_filename(self.current_filename) if extractor.contents is None: print self.current_handler.target return @@ -1023,29 +1034,28 @@ class ListAction(BaseAction): - def __init__(self, options, filenames): - BaseAction.__init__(self, options, filenames) - self.count = 0 - - def get_list(self, extractor): - # Note: The reason I'm getting all the filenames up front is - # because if we run into trouble partway through the archive, we'll - # try another extractor. So before we display anything we have to - # be sure this one is successful. We maybe don't have to be quite - # this conservative but this is the easy way out for now. - self.filelist = list(extractor.get_filenames()) - - def show_list(self, filename): - self.count += 1 - if len(self.filenames) != 1: - if self.count > 1: - print - print "%s:" % (filename,) - print '\n'.join(self.filelist) - + def list_filenames(self, extractor, filename): + # We get a line first to make sure there's not going to be some + # basic error before we show what filename we're listing. + filename_lister = extractor.get_filenames() + try: + first_line = filename_lister.next() + except StopIteration: + self.show_filename(filename) + else: + self.did_list = True + self.show_filename(filename) + print first_line + for line in filename_lister: + print line + def run(self, filename, extractor): - return (self.report(self.get_list, extractor) or - self.report(self.show_list, filename)) + self.did_list = False + error = self.report(self.list_filenames, extractor, filename) + if error and self.did_list: + logger.error("lister failed: ignore above listing for %s" % + (filename,)) + return error class ExtractorApplication(object):
--- a/tests/tests.yml Thu Apr 02 22:31:07 2009 -0400 +++ b/tests/tests.yml Sun Apr 12 11:21:51 2009 -0400 @@ -518,6 +518,22 @@ grep: "^1/2/3$" antigrep: "^dtrx:" +- name: listing multiple file with misleading extensions + options: -l + filenames: trickery.tar.gz trickery.tar.gz + prerun: cp ${1}test-1.23.zip ${1}trickery.tar.gz + cleanup: rm -f ${1}trickery.tar.gz + output: | + trickery.tar.gz: + 1/2/3 + a/b + foobar + + trickery.tar.gz: + 1/2/3 + a/b + foobar + - name: non-archive error filenames: /dev/null error: true @@ -612,7 +628,6 @@ directory: busydir filenames: ../test-onedir.tar.gz output: | - ../test-onedir.tar.gz: test/ test/foobar test/quux