# HG changeset patch # User brett # Date 1177175398 14400 # Node ID b240777ae53e24bb9bfe9f56c894f76ba21f6201 # Parent fd3e10410040ff39ebb7ab3b819c7809adc14fce [svn] Improve the way we check archive contents. If all the entries look like they're in ., they really shouldn't count as being in the same directory; look at the next piece of the path. If the archive only has one non-directory item, report that more clearly. You'll be able to tell by whether or not there's a trailing slash in the prompt. Improve the tests for doing straight decompression, and seek to the beginning of the archive before we start writing to the file -- otherwise, we write 0-byte files. Lots of new ideas in the TODO. I think I'll do another release once recursion is interactive. diff -r fd3e10410040 -r b240777ae53e TODO --- a/TODO Sat Apr 21 10:22:32 2007 -0400 +++ b/TODO Sat Apr 21 13:09:58 2007 -0400 @@ -1,8 +1,13 @@ Things which I have a use case/anti-use case for: -* Figure out what the deal is with strerror. +* Prompt the user to recurse. +* Use file to detect the archive type. +* Support lzma compression (http://tukaani.org/lzma/download) +* Support pisi packages (http://paketler.pardus.org.tr/pardus-2007/) +* Steal ideas from . +* Figure out what the deal is with strerror. (done?) * Better error messages (file doesn't exist, isn't readable, etc.) -* Interactivity: ask about overwriting, recursing, etc. * Consistently raise and handle exceptions. +* Take URLs as arguments. Things that are generally good: * Better tests. diff -r fd3e10410040 -r b240777ae53e scripts/dtrx --- a/scripts/dtrx Sat Apr 21 10:22:32 2007 -0400 +++ b/scripts/dtrx Sat Apr 21 13:09:58 2007 -0400 @@ -45,10 +45,10 @@ Public License for more details.""" % (VERSION,) MATCHING_DIRECTORY = 1 -# ONE_DIRECTORY = 2 +ONE_ENTRY = 2 BOMB = 3 EMPTY = 4 -COMPRESSED = 5 +ONE_ENTRY_KNOWN = 5 EXTRACT_HERE = 1 EXTRACT_WRAP = 2 @@ -171,28 +171,53 @@ def prepare(self): pass - def check_contents(self): - archive_type = None - filenames = self.get_filenames() + def check_included_archive(self, filename): + if extractor_map.has_key(mimetypes.guess_type(filename)[0]): + self.included_archives.append(filename) + + def check_first_filename(self, filenames): try: - filename = filenames.next() - if extractor_map.has_key(mimetypes.guess_type(filename)[0]): - self.included_archives.append(filename) - first_part = filename.split('/', 1)[0] + '/' + first_filename = filenames.next() except StopIteration: filenames.stop() - return EMPTY + return (None, None) + self.check_included_archive(first_filename) + parts = first_filename.split('/') + first_part = [parts[0]] + if parts[0] == '.': + first_part.append(parts[1]) + return (first_filename, '/'.join(first_part + [''])) + + def check_second_filename(self, filenames, first_part, first_filename): + try: + filename = filenames.next() + except StopIteration: + return ONE_ENTRY, first_filename + self.check_included_archive(filename) + if not filename.startswith(first_part): + return BOMB, None + return None, first_part + + def check_contents(self): + filenames = self.get_filenames() + first_filename, first_part = self.check_first_filename(filenames) + if first_filename is None: + return (EMPTY, None) + archive_type, type_info = self.check_second_filename(filenames, + first_part, + first_filename) for filename in filenames: - if extractor_map.has_key(mimetypes.guess_type(filename)[0]): - self.included_archives.append(filename) - if (archive_type is None) and (not filename.startswith(first_part)): + self.check_included_archive(filename) + if (archive_type != BOMB) and (not filename.startswith(first_part)): archive_type = BOMB + type_info = None filenames.stop() - if archive_type: - return archive_type - if self.basename() == first_part[:-1]: - return MATCHING_DIRECTORY - return first_part + if archive_type is None: + if self.basename() == first_part[:-1]: + archive_type = MATCHING_DIRECTORY + else: + archive_type = ONE_ENTRY + return archive_type, type_info def basename(self): pieces = os.path.basename(self.filename).split('.') @@ -270,7 +295,7 @@ def check_contents(self): CpioExtractor.check_contents(self) - return BOMB + return (BOMB, None) class DebExtractor(TarExtractor): @@ -291,7 +316,7 @@ def check_contents(self): TarExtractor.check_contents(self) - return BOMB + return (BOMB, None) class CompressionExtractor(BaseExtractor): @@ -308,19 +333,21 @@ yield self.basename() def check_contents(self): - return COMPRESSED + return (ONE_ENTRY_KNOWN, self.basename()) def extract(self, path): output = open(path, 'w') + self.archive.seek(0, 0) self.run(['cat'], "output write", stdin=self.archive, stdout=output) output.close() class BaseHandler(object): - def __init__(self, extractor, contents, options): + def __init__(self, extractor, contents, content_name, options): self.logger = logging.getLogger('dtrx-log') self.extractor = extractor self.contents = contents + self.content_name = content_name self.options = options self.target = None @@ -353,12 +380,12 @@ class FlatHandler(BaseHandler): def can_handle(contents, options): - return ((options.flat and (contents != COMPRESSED)) or + return ((options.flat and (contents != ONE_ENTRY_KNOWN)) or (options.overwrite and (contents == MATCHING_DIRECTORY))) can_handle = staticmethod(can_handle) - def __init__(self, extractor, contents, options): - BaseHandler.__init__(self, extractor, contents, options) + def __init__(self, extractor, contents, content_name, options): + BaseHandler.__init__(self, extractor, contents, content_name, options) self.target = '.' def cleanup(self): @@ -372,19 +399,19 @@ class OverwriteHandler(BaseHandler): def can_handle(contents, options): - return ((options.flat and (contents == COMPRESSED)) or + return ((options.flat and (contents == ONE_ENTRY_KNOWN)) or (options.overwrite and (contents != MATCHING_DIRECTORY))) can_handle = staticmethod(can_handle) - def __init__(self, extractor, contents, options): - BaseHandler.__init__(self, extractor, contents, options) + def __init__(self, extractor, contents, content_name, options): + BaseHandler.__init__(self, extractor, contents, content_name, options) self.target = self.extractor.basename() class MatchHandler(BaseHandler): def can_handle(contents, options): return ((contents == MATCHING_DIRECTORY) or - (hasattr(contents, 'encode') and + ((contents == ONE_ENTRY) and (options.onedir_policy in (EXTRACT_RENAME, EXTRACT_HERE)))) can_handle = staticmethod(can_handle) @@ -392,9 +419,9 @@ if self.contents == MATCHING_DIRECTORY: basename = destination = self.extractor.basename() elif self.options.onedir_policy == EXTRACT_HERE: - basename = destination = self.contents.rstrip('/') + basename = destination = self.content_name.rstrip('/') else: - basename = self.contents.rstrip('/') + basename = self.content_name.rstrip('/') destination = self.extractor.basename() self.target = tempdir = tempfile.mkdtemp(dir='.') result = BaseHandler.extract(self) @@ -411,7 +438,7 @@ return contents == EMPTY can_handle = staticmethod(can_handle) - def __init__(self, extractor, contents, options): pass + def __init__(self, extractor, contents, content_name, options): pass def extract(self): pass def cleanup(self): pass @@ -421,8 +448,8 @@ return True can_handle = staticmethod(can_handle) - def __init__(self, extractor, contents, options): - BaseHandler.__init__(self, extractor, contents, options) + def __init__(self, extractor, contents, content_name, options): + BaseHandler.__init__(self, extractor, contents, content_name, options) checker = self.extractor.name_checker(self.extractor.basename()) self.target = checker.check() @@ -496,7 +523,10 @@ last_line = question.pop() while True: print "\n".join(question) - answer = raw_input(last_line) + try: + answer = raw_input(last_line) + except EOFError: + return answers[''] try: return answers[answer.lower()] except KeyError: @@ -519,11 +549,10 @@ def get_handler(self): try: - content = self.current_extractor.check_contents() - if hasattr(content, 'encode'): # Archive contains one directory. - question = textwrap.wrap("%s contains one directory: %s. %s" % - (self.current_filename, content, - "You can:")) + content, content_name = self.current_extractor.check_contents() + if content == ONE_ENTRY: + question = textwrap.wrap("%s contains one entry: %s." % + (self.current_filename, content_name)) question.extend(["You can:", " * extract it Inside another directory", " * extract it and Rename the directory", @@ -534,7 +563,8 @@ for handler in handlers: if handler.can_handle(content, self.options): self.current_handler = handler(self.current_extractor, - content, self.options) + content, content_name, + self.options) break except ExtractorError, error: return str(error) diff -r fd3e10410040 -r b240777ae53e tests/test-text.bz2 Binary file tests/test-text.bz2 has changed diff -r fd3e10410040 -r b240777ae53e tests/test-text.gz Binary file tests/test-text.gz has changed diff -r fd3e10410040 -r b240777ae53e tests/tests.yml --- a/tests/tests.yml Sat Apr 21 10:22:32 2007 -0400 +++ b/tests/tests.yml Sat Apr 21 13:09:58 2007 -0400 @@ -53,12 +53,16 @@ filenames: ../test-text.gz baseline: | zcat $1 >test-text + posttest: | + if [ "x`cat test-text`" != "xhi" ]; then exit 1; fi - name: decompressing bz2 directory: inside-dir filenames: ../test-text.bz2 baseline: | bzcat $1 >test-text + posttest: | + if [ "x`cat test-text`" != "xhi" ]; then exit 1; fi - name: decompression with -r directory: inside-dir @@ -107,7 +111,7 @@ baseline: | tar -jxf $1 tar -xf test-badperms.tar - chmod 755 testdir + chmod 700 testdir posttest: | if [ "x`cat testdir/testfile`" != "xhey" ]; then exit 1; fi @@ -253,3 +257,20 @@ input: h baseline: | tar -zxf $1 + +- name: bomb with preceding dot in the table + filenames: test-dot-first-bomb.tar.gz + options: "" + antigrep: one entry + baseline: | + mkdir test-dot-first-bomb + cd test-dot-first-bomb + tar -zxf ../$1 + +- name: one directory preceded by dot in the table + filenames: test-dot-first-onedir.tar.gz + options: "" + grep: "one entry: ./dir" + input: h + baseline: | + tar -zxf $1