# HG changeset patch
# User brett
# Date 1177175398 14400
# Node ID b240777ae53e24bb9bfe9f56c894f76ba21f6201
# Parent  fd3e10410040ff39ebb7ab3b819c7809adc14fce
[svn] Improve the way we check archive contents.  If all the entries look like
they're in ., they really shouldn't count as being in the same directory;
look at the next piece of the path.  If the archive only has one
non-directory item, report that more clearly.  You'll be able to tell by
whether or not there's a trailing slash in the prompt.

Improve the tests for doing straight decompression, and seek to the
beginning of the archive before we start writing to the file -- otherwise,
we write 0-byte files.

Lots of new ideas in the TODO.  I think I'll do another release once
recursion is interactive.

diff -r fd3e10410040 -r b240777ae53e TODO
--- a/TODO	Sat Apr 21 10:22:32 2007 -0400
+++ b/TODO	Sat Apr 21 13:09:58 2007 -0400
@@ -1,8 +1,13 @@
 Things which I have a use case/anti-use case for:
-* Figure out what the deal is with strerror.
+* Prompt the user to recurse.
+* Use file to detect the archive type.
+* Support lzma compression (http://tukaani.org/lzma/download)
+* Support pisi packages (http://paketler.pardus.org.tr/pardus-2007/)
+* Steal ideas from <http://martin.ankerl.com/files/e>.
+* Figure out what the deal is with strerror. (done?)
 * Better error messages (file doesn't exist, isn't readable, etc.)
-* Interactivity: ask about overwriting, recursing, etc.
 * Consistently raise and handle exceptions.
+* Take URLs as arguments.
 
 Things that are generally good:
 * Better tests.
diff -r fd3e10410040 -r b240777ae53e scripts/dtrx
--- a/scripts/dtrx	Sat Apr 21 10:22:32 2007 -0400
+++ b/scripts/dtrx	Sat Apr 21 13:09:58 2007 -0400
@@ -45,10 +45,10 @@
 Public License for more details.""" % (VERSION,)
 
 MATCHING_DIRECTORY = 1
-# ONE_DIRECTORY = 2
+ONE_ENTRY = 2
 BOMB = 3
 EMPTY = 4
-COMPRESSED = 5
+ONE_ENTRY_KNOWN = 5
 
 EXTRACT_HERE = 1
 EXTRACT_WRAP = 2
@@ -171,28 +171,53 @@
     def prepare(self):
         pass
 
-    def check_contents(self):
-        archive_type = None
-        filenames = self.get_filenames()
+    def check_included_archive(self, filename):
+        if extractor_map.has_key(mimetypes.guess_type(filename)[0]):
+            self.included_archives.append(filename)
+
+    def check_first_filename(self, filenames):
         try:
-            filename = filenames.next()
-            if extractor_map.has_key(mimetypes.guess_type(filename)[0]):
-                self.included_archives.append(filename)
-            first_part = filename.split('/', 1)[0] + '/'
+            first_filename = filenames.next()
         except StopIteration:
             filenames.stop()
-            return EMPTY
+            return (None, None)
+        self.check_included_archive(first_filename)
+        parts = first_filename.split('/')
+        first_part = [parts[0]]
+        if parts[0] == '.':
+            first_part.append(parts[1])
+        return (first_filename, '/'.join(first_part + ['']))
+
+    def check_second_filename(self, filenames, first_part, first_filename):
+        try:
+            filename = filenames.next()
+        except StopIteration:
+            return ONE_ENTRY, first_filename
+        self.check_included_archive(filename)
+        if not filename.startswith(first_part):
+            return BOMB, None
+        return None, first_part
+        
+    def check_contents(self):
+        filenames = self.get_filenames()
+        first_filename, first_part = self.check_first_filename(filenames)
+        if first_filename is None:
+            return (EMPTY, None)
+        archive_type, type_info = self.check_second_filename(filenames,
+                                                             first_part,
+                                                             first_filename)
         for filename in filenames:
-            if extractor_map.has_key(mimetypes.guess_type(filename)[0]):
-                self.included_archives.append(filename)
-            if (archive_type is None) and (not filename.startswith(first_part)):
+            self.check_included_archive(filename)
+            if (archive_type != BOMB) and (not filename.startswith(first_part)):
                 archive_type = BOMB
+                type_info = None
         filenames.stop()
-        if archive_type:
-            return archive_type
-        if self.basename() == first_part[:-1]:
-            return MATCHING_DIRECTORY
-        return first_part
+        if archive_type is None:
+            if self.basename() == first_part[:-1]:
+                archive_type = MATCHING_DIRECTORY
+            else:
+                archive_type = ONE_ENTRY
+        return archive_type, type_info
 
     def basename(self):
         pieces = os.path.basename(self.filename).split('.')
@@ -270,7 +295,7 @@
 
     def check_contents(self):
         CpioExtractor.check_contents(self)
-        return BOMB
+        return (BOMB, None)
 
 
 class DebExtractor(TarExtractor):
@@ -291,7 +316,7 @@
 
     def check_contents(self):
         TarExtractor.check_contents(self)
-        return BOMB
+        return (BOMB, None)
         
 
 class CompressionExtractor(BaseExtractor):
@@ -308,19 +333,21 @@
         yield self.basename()
 
     def check_contents(self):
-        return COMPRESSED
+        return (ONE_ENTRY_KNOWN, self.basename())
 
     def extract(self, path):
         output = open(path, 'w')
+        self.archive.seek(0, 0)
         self.run(['cat'], "output write", stdin=self.archive, stdout=output)
         output.close()
         
 
 class BaseHandler(object):
-    def __init__(self, extractor, contents, options):
+    def __init__(self, extractor, contents, content_name, options):
         self.logger = logging.getLogger('dtrx-log')
         self.extractor = extractor
         self.contents = contents
+        self.content_name = content_name
         self.options = options
         self.target = None
 
@@ -353,12 +380,12 @@
 
 class FlatHandler(BaseHandler):
     def can_handle(contents, options):
-        return ((options.flat and (contents != COMPRESSED)) or
+        return ((options.flat and (contents != ONE_ENTRY_KNOWN)) or
                 (options.overwrite and (contents == MATCHING_DIRECTORY)))
     can_handle = staticmethod(can_handle)
 
-    def __init__(self, extractor, contents, options):
-        BaseHandler.__init__(self, extractor, contents, options)
+    def __init__(self, extractor, contents, content_name, options):
+        BaseHandler.__init__(self, extractor, contents, content_name, options)
         self.target = '.'
 
     def cleanup(self):
@@ -372,19 +399,19 @@
 
 class OverwriteHandler(BaseHandler):
     def can_handle(contents, options):
-        return ((options.flat and (contents == COMPRESSED)) or
+        return ((options.flat and (contents == ONE_ENTRY_KNOWN)) or
                 (options.overwrite and (contents != MATCHING_DIRECTORY)))
     can_handle = staticmethod(can_handle)
 
-    def __init__(self, extractor, contents, options):
-        BaseHandler.__init__(self, extractor, contents, options)
+    def __init__(self, extractor, contents, content_name, options):
+        BaseHandler.__init__(self, extractor, contents, content_name, options)
         self.target = self.extractor.basename()
         
 
 class MatchHandler(BaseHandler):
     def can_handle(contents, options):
         return ((contents == MATCHING_DIRECTORY) or
-                (hasattr(contents, 'encode') and
+                ((contents == ONE_ENTRY) and
                  (options.onedir_policy in (EXTRACT_RENAME, EXTRACT_HERE))))
     can_handle = staticmethod(can_handle)
 
@@ -392,9 +419,9 @@
         if self.contents == MATCHING_DIRECTORY:
             basename = destination = self.extractor.basename()
         elif self.options.onedir_policy == EXTRACT_HERE:
-            basename = destination = self.contents.rstrip('/')
+            basename = destination = self.content_name.rstrip('/')
         else:
-            basename = self.contents.rstrip('/')
+            basename = self.content_name.rstrip('/')
             destination = self.extractor.basename()
         self.target = tempdir = tempfile.mkdtemp(dir='.')
         result = BaseHandler.extract(self)
@@ -411,7 +438,7 @@
         return contents == EMPTY
     can_handle = staticmethod(can_handle)
 
-    def __init__(self, extractor, contents, options): pass
+    def __init__(self, extractor, contents, content_name, options): pass
     def extract(self): pass
     def cleanup(self): pass
 
@@ -421,8 +448,8 @@
         return True
     can_handle = staticmethod(can_handle)
 
-    def __init__(self, extractor, contents, options):
-        BaseHandler.__init__(self, extractor, contents, options)
+    def __init__(self, extractor, contents, content_name, options):
+        BaseHandler.__init__(self, extractor, contents, content_name, options)
         checker = self.extractor.name_checker(self.extractor.basename())
         self.target = checker.check()
 
@@ -496,7 +523,10 @@
         last_line = question.pop()
         while True:
             print "\n".join(question)
-            answer = raw_input(last_line)
+            try:
+                answer = raw_input(last_line)
+            except EOFError:
+                return answers['']
             try:
                 return answers[answer.lower()]
             except KeyError:
@@ -519,11 +549,10 @@
 
     def get_handler(self):
         try:
-            content = self.current_extractor.check_contents()
-            if hasattr(content, 'encode'):  # Archive contains one directory.
-                question = textwrap.wrap("%s contains one directory: %s.  %s" %
-                                         (self.current_filename, content,
-                                          "You can:"))
+            content, content_name = self.current_extractor.check_contents()
+            if content == ONE_ENTRY:
+                question = textwrap.wrap("%s contains one entry: %s." %
+                                         (self.current_filename, content_name))
                 question.extend(["You can:",
                                  " * extract it Inside another directory",
                                  " * extract it and Rename the directory",
@@ -534,7 +563,8 @@
             for handler in handlers:
                 if handler.can_handle(content, self.options):
                     self.current_handler = handler(self.current_extractor,
-                                                   content, self.options)
+                                                   content, content_name,
+                                                   self.options)
                     break
         except ExtractorError, error:
             return str(error)
diff -r fd3e10410040 -r b240777ae53e tests/test-text.bz2
Binary file tests/test-text.bz2 has changed
diff -r fd3e10410040 -r b240777ae53e tests/test-text.gz
Binary file tests/test-text.gz has changed
diff -r fd3e10410040 -r b240777ae53e tests/tests.yml
--- a/tests/tests.yml	Sat Apr 21 10:22:32 2007 -0400
+++ b/tests/tests.yml	Sat Apr 21 13:09:58 2007 -0400
@@ -53,12 +53,16 @@
   filenames: ../test-text.gz
   baseline: |
     zcat $1 >test-text
+  posttest: |
+    if [ "x`cat test-text`" != "xhi" ]; then exit 1; fi
 
 - name: decompressing bz2
   directory: inside-dir
   filenames: ../test-text.bz2
   baseline: |
     bzcat $1 >test-text
+  posttest: |
+    if [ "x`cat test-text`" != "xhi" ]; then exit 1; fi
 
 - name: decompression with -r
   directory: inside-dir
@@ -107,7 +111,7 @@
   baseline: |
     tar -jxf $1
     tar -xf test-badperms.tar
-    chmod 755 testdir
+    chmod 700 testdir
   posttest: |
     if [ "x`cat testdir/testfile`" != "xhey" ]; then exit 1; fi
 
@@ -253,3 +257,20 @@
   input: h
   baseline: |
     tar -zxf $1
+
+- name: bomb with preceding dot in the table
+  filenames: test-dot-first-bomb.tar.gz
+  options: ""
+  antigrep: one entry
+  baseline: |
+    mkdir test-dot-first-bomb
+    cd test-dot-first-bomb
+    tar -zxf ../$1
+
+- name: one directory preceded by dot in the table
+  filenames: test-dot-first-onedir.tar.gz
+  options: ""
+  grep: "one entry: ./dir"
+  input: h
+  baseline: |
+    tar -zxf $1