Fri, 19 Oct 2007 23:03:17 -0400
[svn] Add support for Ruby Gems, and extracting metadata from .deb/.gem files.
scripts/dtrx | file | annotate | diff | comparison | revisions | |
tests/compare.py | file | annotate | diff | comparison | revisions | |
tests/test-1.23.gem | file | annotate | diff | comparison | revisions | |
tests/tests.yml | file | annotate | diff | comparison | revisions |
--- a/scripts/dtrx Fri Oct 19 22:46:20 2007 -0400 +++ b/scripts/dtrx Fri Oct 19 23:03:17 2007 -0400 @@ -224,6 +224,27 @@ yield line.rstrip('\n') +class CompressionExtractor(BaseExtractor): + name_checker = FilenameChecker + + def basename(self): + pieces = os.path.basename(self.filename).split('.') + extension = '.' + pieces[-1] + if mimetypes.encodings_map.has_key(extension): + pieces.pop() + return '.'.join(pieces) + + def get_filenames(self): + yield self.basename() + + def extract(self): + self.content_type = ONE_ENTRY_KNOWN + self.content_name = self.basename() + output_fd, self.target = tempfile.mkstemp(prefix='.dtrx-', dir='.') + self.run_pipes(output_fd) + os.close(output_fd) + + class TarExtractor(BaseExtractor): def get_filenames(self): self.pipe(['tar', '-t'], "listing") @@ -301,26 +322,31 @@ self.content_type = BOMB -class CompressionExtractor(BaseExtractor): - name_checker = FilenameChecker +class DebMetadataExtractor(DebExtractor): + def prepare(self): + self.pipe(['ar', 'p', self.filename, 'control.tar.gz'], + "control.tar.gz extraction") + self.pipe(['zcat'], "control.tar.gz decompression") + + +class GemExtractor(TarExtractor): + def prepare(self): + self.pipe(['tar', '-xO', 'data.tar.gz'], "data.tar.gz extraction") + self.pipe(['zcat'], "data.tar.gz decompression") + + def check_contents(self): + self.check_included_archives(os.listdir('.')) + self.content_type = BOMB + + +class GemMetadataExtractor(CompressionExtractor): + def prepare(self): + self.pipe(['tar', '-xO', 'metadata.gz'], "metadata.gz extraction") + self.pipe(['zcat'], "metadata.gz decompression") def basename(self): - pieces = os.path.basename(self.filename).split('.') - extension = '.' + pieces[-1] - if mimetypes.encodings_map.has_key(extension): - pieces.pop() - return '.'.join(pieces) + return os.path.basename(self.filename) + '-metadata.txt' - def get_filenames(self): - yield self.basename() - - def extract(self): - self.content_type = ONE_ENTRY_KNOWN - self.content_name = self.basename() - output_fd, self.target = tempfile.mkstemp(prefix='.dtrx-', dir='.') - self.run_pipes(output_fd) - os.close(output_fd) - class BaseHandler(object): def __init__(self, extractor, options): @@ -515,11 +541,50 @@ 'application/x-debian-package': DebExtractor, 'application/x-redhat-package-manager': RPMExtractor, 'application/x-rpm': RPMExtractor, - 'application/x-cpio': CpioExtractor} + 'application/x-cpio': CpioExtractor, + 'application/x-ruby-gem': GemExtractor} handlers = [FlatHandler, OverwriteHandler, MatchHandler, EmptyHandler, BombHandler] +class ExtractorBuilder(object): + extractor_map = {} + for args in ((TarExtractor, None, 'x-tar'), + (ZipExtractor, None, 'zip', 'x-msdos-program'), + (DebExtractor, DebMetadataExtractor, 'x-debian-package'), + (RPMExtractor, None, 'x-redhat-package-manager', 'x-rpm'), + (CpioExtractor, None, 'x-cpio'), + (GemExtractor, GemMetadataExtractor, 'x-ruby-gem')): + for entry in args[2:]: + if '/' not in entry: + entry = 'application/' + entry + extractor_map[entry] = args[0:2] + + def __init__(self, filename, options): + self.filename = filename + self.options = options + self.mimetype, self.encoding = mimetypes.guess_type(self.filename) + + def get_extractor(self): + extractor = self.find_extractor() + if extractor is None: + raise ExtractorError("not a known archive type") + return extractor(self.filename, self.encoding) + + def find_extractor(self): + extractor = None + try: + extractors = self.extractor_map[self.mimetype] + if self.options.metadata and (extractors[1] is not None): + extractor = extractors[1] + else: + extractor = extractors[0] + except KeyError: + if self.encoding: + extractor = CompressionExtractor + return extractor + + class ExtractorApplication(object): def __init__(self, arguments): self.parse_options(arguments) @@ -554,6 +619,9 @@ parser.add_option('-n', '--noninteractive', dest='batch', action='store_true', default=False, help="don't ask how to handle special cases") + parser.add_option('-m', '--metadata', dest='metadata', + action='store_true', default=False, + help="extract metadata from a .deb/.gem/etc.") self.options, filenames = parser.parse_args(arguments) if not filenames: parser.error("you did not list any archives") @@ -571,18 +639,8 @@ self.logger.addHandler(handler) def get_extractor(self): - mimetype, encoding = mimetypes.guess_type(self.current_filename) - try: - extractor = extractor_map[mimetype] - except KeyError: - if encoding: - extractor = CompressionExtractor - else: - return "not a known archive type" - try: - self.current_extractor = extractor(self.current_filename, encoding) - except ExtractorError, error: - return str(error) + builder = ExtractorBuilder(self.current_filename, self.options) + self.current_extractor = builder.get_extractor() def get_handler(self): for var_name in ('type', 'name'):
--- a/tests/compare.py Fri Oct 19 22:46:20 2007 -0400 +++ b/tests/compare.py Fri Oct 19 23:03:17 2007 -0400 @@ -55,7 +55,7 @@ setattr(self, 'options', kwargs.get('options', '-n').split()) setattr(self, 'filenames', kwargs.get('filenames', '').split()) for key in ('directory', 'prerun', 'posttest', 'baseline', 'error', - 'grep', 'antigrep', 'input', 'output'): + 'grep', 'antigrep', 'input', 'output', 'cleanup'): setattr(self, key, kwargs.get(key, None)) def get_results(self, commands, stdin=None): @@ -102,6 +102,9 @@ return subprocess.call(['sh', TESTSCRIPT_NAME]) def clean(self): + if self.cleanup is not None: + self.write_script(self.cleanup) + subprocess.call(['sh', TESTSCRIPT_NAME]) if self.directory: target = os.path.join(ROOT_DIR, self.directory) extra_options = ['!', '-name', TESTSCRIPT_NAME]
--- a/tests/tests.yml Fri Oct 19 22:46:20 2007 -0400 +++ b/tests/tests.yml Fri Oct 19 23:03:17 2007 -0400 @@ -25,13 +25,33 @@ - name: basic .deb filenames: test-1.23_all.deb baseline: | - TD=$PWD + mkdir test-1.23 + cd test-1.23 + ar p ../$1 data.tar.gz | tar -zx + +- name: basic .gem + filenames: test-1.23.gem + baseline: | mkdir test-1.23 - cd /tmp - ar x $TD/$1 data.tar.gz - cd $TD/test-1.23 - tar -zxf /tmp/data.tar.gz - rm /tmp/data.tar.gz + cd test-1.23 + tar -xOf ../$1 data.tar.gz | tar -zx + +- name: .deb metadata + filenames: test-1.23_all.deb + options: --metadata + baseline: | + mkdir test-1.23 + cd test-1.23 + ar p ../$1 control.tar.gz | tar -zx + +- name: .gem metadata + filenames: test-1.23.gem + options: -m + baseline: | + tar -xOf $1 metadata.gz | zcat > test-1.23.gem-metadata.txt + cleanup: rm -f test-1.23.gem-metadata.txt + posttest: | + if [ "x`cat test-1.23.gem-metadata.txt`" != "xhi" ]; then exit 1; fi - name: recursion and permissions filenames: test-recursive-badperms.tar.bz2 @@ -45,8 +65,8 @@ tar -xf ../test-badperms.tar chmod 700 testdir posttest: | - if [ "x`cat test-recursive-badperms/test-badperms/testdir/testfile`" = \ - "xhey" ]; then exit 0; else exit 1; fi + if [ "x`cat test-recursive-badperms/test-badperms/testdir/testfile`" != \ + "xhey" ]; then exit 1; fi - name: decompressing gz directory: inside-dir