[svn] Add support for Ruby Gems, and extracting metadata from .deb/.gem files. trunk

Fri, 19 Oct 2007 23:03:17 -0400

author
brett
date
Fri, 19 Oct 2007 23:03:17 -0400
branch
trunk
changeset 29
5fad99c17221
parent 28
4d88f2231d33
child 30
1015bbd6dc5e

[svn] Add support for Ruby Gems, and extracting metadata from .deb/.gem files.

scripts/dtrx file | annotate | diff | comparison | revisions
tests/compare.py file | annotate | diff | comparison | revisions
tests/test-1.23.gem file | annotate | diff | comparison | revisions
tests/tests.yml file | annotate | diff | comparison | revisions
--- a/scripts/dtrx	Fri Oct 19 22:46:20 2007 -0400
+++ b/scripts/dtrx	Fri Oct 19 23:03:17 2007 -0400
@@ -224,6 +224,27 @@
             yield line.rstrip('\n')
     
 
+class CompressionExtractor(BaseExtractor):
+    name_checker = FilenameChecker
+
+    def basename(self):
+        pieces = os.path.basename(self.filename).split('.')
+        extension = '.' + pieces[-1]
+        if mimetypes.encodings_map.has_key(extension):
+            pieces.pop()
+        return '.'.join(pieces)
+
+    def get_filenames(self):
+        yield self.basename()
+
+    def extract(self):
+        self.content_type = ONE_ENTRY_KNOWN
+        self.content_name = self.basename()
+        output_fd, self.target = tempfile.mkstemp(prefix='.dtrx-', dir='.')
+        self.run_pipes(output_fd)
+        os.close(output_fd)
+        
+
 class TarExtractor(BaseExtractor):
     def get_filenames(self):
         self.pipe(['tar', '-t'], "listing")
@@ -301,26 +322,31 @@
         self.content_type = BOMB
 
 
-class CompressionExtractor(BaseExtractor):
-    name_checker = FilenameChecker
+class DebMetadataExtractor(DebExtractor):
+    def prepare(self):
+        self.pipe(['ar', 'p', self.filename, 'control.tar.gz'],
+                  "control.tar.gz extraction")
+        self.pipe(['zcat'], "control.tar.gz decompression")
+
+
+class GemExtractor(TarExtractor):
+    def prepare(self):
+        self.pipe(['tar', '-xO', 'data.tar.gz'], "data.tar.gz extraction")
+        self.pipe(['zcat'], "data.tar.gz decompression")
+
+    def check_contents(self):
+        self.check_included_archives(os.listdir('.'))
+        self.content_type = BOMB
+
+
+class GemMetadataExtractor(CompressionExtractor):
+    def prepare(self):
+        self.pipe(['tar', '-xO', 'metadata.gz'], "metadata.gz extraction")
+        self.pipe(['zcat'], "metadata.gz decompression")
 
     def basename(self):
-        pieces = os.path.basename(self.filename).split('.')
-        extension = '.' + pieces[-1]
-        if mimetypes.encodings_map.has_key(extension):
-            pieces.pop()
-        return '.'.join(pieces)
+        return os.path.basename(self.filename) + '-metadata.txt'
 
-    def get_filenames(self):
-        yield self.basename()
-
-    def extract(self):
-        self.content_type = ONE_ENTRY_KNOWN
-        self.content_name = self.basename()
-        output_fd, self.target = tempfile.mkstemp(prefix='.dtrx-', dir='.')
-        self.run_pipes(output_fd)
-        os.close(output_fd)
-        
 
 class BaseHandler(object):
     def __init__(self, extractor, options):
@@ -515,11 +541,50 @@
                  'application/x-debian-package': DebExtractor,
                  'application/x-redhat-package-manager': RPMExtractor,
                  'application/x-rpm': RPMExtractor,
-                 'application/x-cpio': CpioExtractor}
+                 'application/x-cpio': CpioExtractor,
+                 'application/x-ruby-gem': GemExtractor}
 
 handlers = [FlatHandler, OverwriteHandler, MatchHandler, EmptyHandler,
             BombHandler]
 
+class ExtractorBuilder(object):
+    extractor_map = {}
+    for args in ((TarExtractor, None, 'x-tar'),
+                 (ZipExtractor, None, 'zip', 'x-msdos-program'),
+                 (DebExtractor, DebMetadataExtractor, 'x-debian-package'),
+                 (RPMExtractor, None, 'x-redhat-package-manager', 'x-rpm'),
+                 (CpioExtractor, None, 'x-cpio'),
+                 (GemExtractor, GemMetadataExtractor, 'x-ruby-gem')):
+        for entry in args[2:]:
+            if '/' not in entry:
+                entry = 'application/' + entry
+            extractor_map[entry] = args[0:2]
+
+    def __init__(self, filename, options):
+        self.filename = filename
+        self.options = options
+        self.mimetype, self.encoding = mimetypes.guess_type(self.filename)
+
+    def get_extractor(self):
+        extractor = self.find_extractor()
+        if extractor is None:
+            raise ExtractorError("not a known archive type")
+        return extractor(self.filename, self.encoding)
+
+    def find_extractor(self):
+        extractor = None
+        try:
+            extractors = self.extractor_map[self.mimetype]
+            if self.options.metadata and (extractors[1] is not None):
+                extractor = extractors[1]
+            else:
+                extractor = extractors[0]
+        except KeyError:
+            if self.encoding:
+                extractor = CompressionExtractor
+        return extractor
+
+
 class ExtractorApplication(object):
     def __init__(self, arguments):
         self.parse_options(arguments)
@@ -554,6 +619,9 @@
         parser.add_option('-n', '--noninteractive', dest='batch',
                           action='store_true', default=False,
                           help="don't ask how to handle special cases")
+        parser.add_option('-m', '--metadata', dest='metadata',
+                          action='store_true', default=False,
+                          help="extract metadata from a .deb/.gem/etc.")
         self.options, filenames = parser.parse_args(arguments)
         if not filenames:
             parser.error("you did not list any archives")
@@ -571,18 +639,8 @@
         self.logger.addHandler(handler)
 
     def get_extractor(self):
-        mimetype, encoding = mimetypes.guess_type(self.current_filename)
-        try:
-            extractor = extractor_map[mimetype]
-        except KeyError:
-            if encoding:
-                extractor = CompressionExtractor
-            else:
-                return "not a known archive type"
-        try:
-            self.current_extractor = extractor(self.current_filename, encoding)
-        except ExtractorError, error:
-            return str(error)
+        builder = ExtractorBuilder(self.current_filename, self.options)
+        self.current_extractor = builder.get_extractor()
 
     def get_handler(self):
         for var_name in ('type', 'name'):
--- a/tests/compare.py	Fri Oct 19 22:46:20 2007 -0400
+++ b/tests/compare.py	Fri Oct 19 23:03:17 2007 -0400
@@ -55,7 +55,7 @@
         setattr(self, 'options', kwargs.get('options', '-n').split())
         setattr(self, 'filenames', kwargs.get('filenames', '').split())
         for key in ('directory', 'prerun', 'posttest', 'baseline', 'error',
-                    'grep', 'antigrep', 'input', 'output'):
+                    'grep', 'antigrep', 'input', 'output', 'cleanup'):
             setattr(self, key, kwargs.get(key, None))
         
     def get_results(self, commands, stdin=None):
@@ -102,6 +102,9 @@
         return subprocess.call(['sh', TESTSCRIPT_NAME])
 
     def clean(self):
+        if self.cleanup is not None:
+            self.write_script(self.cleanup)
+            subprocess.call(['sh', TESTSCRIPT_NAME])
         if self.directory:
             target = os.path.join(ROOT_DIR, self.directory)
             extra_options = ['!', '-name', TESTSCRIPT_NAME]
Binary file tests/test-1.23.gem has changed
--- a/tests/tests.yml	Fri Oct 19 22:46:20 2007 -0400
+++ b/tests/tests.yml	Fri Oct 19 23:03:17 2007 -0400
@@ -25,13 +25,33 @@
 - name: basic .deb
   filenames: test-1.23_all.deb
   baseline: |
-    TD=$PWD
+    mkdir test-1.23
+    cd test-1.23
+    ar p ../$1 data.tar.gz | tar -zx
+
+- name: basic .gem
+  filenames: test-1.23.gem
+  baseline: |
     mkdir test-1.23
-    cd /tmp
-    ar x $TD/$1 data.tar.gz
-    cd $TD/test-1.23
-    tar -zxf /tmp/data.tar.gz
-    rm /tmp/data.tar.gz
+    cd test-1.23
+    tar -xOf ../$1 data.tar.gz | tar -zx
+
+- name: .deb metadata
+  filenames: test-1.23_all.deb
+  options: --metadata
+  baseline: |
+    mkdir test-1.23
+    cd test-1.23
+    ar p ../$1 control.tar.gz | tar -zx
+    
+- name: .gem metadata
+  filenames: test-1.23.gem
+  options: -m
+  baseline: |
+    tar -xOf $1 metadata.gz | zcat > test-1.23.gem-metadata.txt
+  cleanup: rm -f test-1.23.gem-metadata.txt
+  posttest: |
+    if [ "x`cat test-1.23.gem-metadata.txt`" != "xhi" ]; then exit 1; fi
 
 - name: recursion and permissions
   filenames: test-recursive-badperms.tar.bz2
@@ -45,8 +65,8 @@
     tar -xf ../test-badperms.tar
     chmod 700 testdir
   posttest: |
-    if [ "x`cat test-recursive-badperms/test-badperms/testdir/testfile`" = \
-         "xhey" ]; then exit 0; else exit 1; fi
+    if [ "x`cat test-recursive-badperms/test-badperms/testdir/testfile`" != \
+         "xhey" ]; then exit 1; fi
 
 - name: decompressing gz
   directory: inside-dir

mercurial