262 def extract_archive(self): |
267 def extract_archive(self): |
263 self.pipe(['tar', '-x']) |
268 self.pipe(['tar', '-x']) |
264 self.run_pipes() |
269 self.run_pipes() |
265 |
270 |
266 |
271 |
267 class ZipExtractor(BaseExtractor): |
|
268 def __init__(self, filename, encoding): |
|
269 BaseExtractor.__init__(self, '/dev/null', None) |
|
270 self.filename = os.path.realpath(filename) |
|
271 |
|
272 def get_filenames(self): |
|
273 self.pipe(['zipinfo', '-1', self.filename], "listing") |
|
274 return BaseExtractor.get_filenames(self) |
|
275 |
|
276 def extract_archive(self): |
|
277 self.pipe(['unzip', '-q', self.filename]) |
|
278 self.run_pipes() |
|
279 |
|
280 |
|
281 class CpioExtractor(BaseExtractor): |
272 class CpioExtractor(BaseExtractor): |
282 def get_filenames(self): |
273 def get_filenames(self): |
283 self.pipe(['cpio', '-t'], "listing") |
274 self.pipe(['cpio', '-t'], "listing") |
284 return BaseExtractor.get_filenames(self) |
275 return BaseExtractor.get_filenames(self) |
285 |
276 |
355 |
346 |
356 def basename(self): |
347 def basename(self): |
357 return os.path.basename(self.filename) + '-metadata.txt' |
348 return os.path.basename(self.filename) + '-metadata.txt' |
358 |
349 |
359 |
350 |
360 class SevenExtractor(BaseExtractor): |
351 class NoPipeExtractor(BaseExtractor): |
361 border_re = re.compile('^[- ]+$') |
352 # Some extraction tools won't accept the archive from stdin. With |
362 |
353 # these, the piping infrastructure we normally set up generally doesn't |
|
354 # work, at least at first. We can still use most of it; we just can't |
|
355 # seed self.archive with the archive file. So instead we seed it with |
|
356 # /dev/null, and specify the filename on the command line as necessary. |
|
357 # This class doesn't do anything by itself; it's just meant to be a |
|
358 # base class for extractors that rely on these dumb tools. |
363 def __init__(self, filename, encoding): |
359 def __init__(self, filename, encoding): |
364 BaseExtractor.__init__(self, '/dev/null', None) |
360 BaseExtractor.__init__(self, '/dev/null', None) |
365 self.filename = os.path.realpath(filename) |
361 self.filename = os.path.realpath(filename) |
|
362 |
|
363 |
|
364 class ZipExtractor(NoPipeExtractor): |
|
365 def get_filenames(self): |
|
366 self.pipe(['zipinfo', '-1', self.filename], "listing") |
|
367 return BaseExtractor.get_filenames(self) |
|
368 |
|
369 def extract_archive(self): |
|
370 self.pipe(['unzip', '-q', self.filename]) |
|
371 self.run_pipes() |
|
372 |
|
373 |
|
374 class SevenExtractor(NoPipeExtractor): |
|
375 border_re = re.compile('^[- ]+$') |
366 |
376 |
367 def get_filenames(self): |
377 def get_filenames(self): |
368 self.pipe(['7z', 'l', self.filename], "listing") |
378 self.pipe(['7z', 'l', self.filename], "listing") |
369 self.run_pipes() |
379 self.run_pipes() |
370 self.archive.seek(0, 0) |
380 self.archive.seek(0, 0) |
381 |
391 |
382 def extract_archive(self): |
392 def extract_archive(self): |
383 self.pipe(['7z', 'x', self.filename]) |
393 self.pipe(['7z', 'x', self.filename]) |
384 self.run_pipes() |
394 self.run_pipes() |
385 |
395 |
|
396 |
|
397 class CABExtractor(NoPipeExtractor): |
|
398 border_re = re.compile(r'^[-\+]+$') |
|
399 |
|
400 def get_filenames(self): |
|
401 self.pipe(['cabextract', '-l', self.filename], "listing") |
|
402 self.run_pipes() |
|
403 self.archive.seek(0, 0) |
|
404 fn_index = None |
|
405 for line in self.archive: |
|
406 if self.border_re.match(line): |
|
407 break |
|
408 for line in self.archive: |
|
409 try: |
|
410 yield line.split(' | ', 2)[2].rstrip('\n') |
|
411 except IndexError: |
|
412 break |
|
413 self.archive.close() |
|
414 |
|
415 def extract_archive(self): |
|
416 self.pipe(['cabextract', '-q', self.filename]) |
|
417 self.run_pipes() |
|
418 |
386 |
419 |
387 class BaseHandler(object): |
420 class BaseHandler(object): |
388 def __init__(self, extractor, options): |
421 def __init__(self, extractor, options): |
389 self.extractor = extractor |
422 self.extractor = extractor |
390 self.options = options |
423 self.options = options |
456 ((contents == ONE_ENTRY) and |
489 ((contents == ONE_ENTRY) and |
457 options.one_entry_policy.ok_for_match())) |
490 options.one_entry_policy.ok_for_match())) |
458 can_handle = staticmethod(can_handle) |
491 can_handle = staticmethod(can_handle) |
459 |
492 |
460 def organize(self): |
493 def organize(self): |
|
494 source = os.path.join(self.extractor.target, |
|
495 os.listdir(self.extractor.target)[0]) |
|
496 if os.path.isdir(source): |
|
497 checker = DirectoryChecker |
|
498 else: |
|
499 checker = FilenameChecker |
461 if self.options.one_entry_policy == EXTRACT_HERE: |
500 if self.options.one_entry_policy == EXTRACT_HERE: |
462 destination = self.extractor.content_name.rstrip('/') |
501 destination = self.extractor.content_name.rstrip('/') |
463 else: |
502 else: |
464 destination = self.extractor.basename() |
503 destination = self.extractor.basename() |
465 self.target = self.extractor.name_checker(destination).check() |
504 self.target = checker(destination).check() |
466 if os.path.isdir(self.extractor.target): |
505 if os.path.isdir(self.extractor.target): |
467 os.rename(os.path.join(self.extractor.target, |
506 os.rename(source, self.target) |
468 os.listdir(self.extractor.target)[0]), |
|
469 self.target) |
|
470 os.rmdir(self.extractor.target) |
507 os.rmdir(self.extractor.target) |
471 else: |
508 else: |
472 os.rename(self.extractor.target, self.target) |
509 os.rename(self.extractor.target, self.target) |
473 |
510 |
474 |
511 |
578 'deb': (DebExtractor, DebMetadataExtractor), |
615 'deb': (DebExtractor, DebMetadataExtractor), |
579 'rpm': (RPMExtractor, None), |
616 'rpm': (RPMExtractor, None), |
580 'cpio': (CpioExtractor, None), |
617 'cpio': (CpioExtractor, None), |
581 'gem': (GemExtractor, GemMetadataExtractor), |
618 'gem': (GemExtractor, GemMetadataExtractor), |
582 'compress': (CompressionExtractor, None), |
619 'compress': (CompressionExtractor, None), |
583 '7z': (SevenExtractor, None)} |
620 '7z': (SevenExtractor, None), |
|
621 'cab': (CABExtractor, None)} |
584 |
622 |
585 mimetype_map = {} |
623 mimetype_map = {} |
586 for mapping in (('tar', 'x-tar'), |
624 for mapping in (('tar', 'x-tar'), |
587 ('zip', 'x-msdos-program', 'zip'), |
625 ('zip', 'x-msdos-program', 'zip'), |
588 ('deb', 'x-debian-package'), |
626 ('deb', 'x-debian-package'), |
589 ('rpm', 'x-redhat-package-manager', 'x-rpm'), |
627 ('rpm', 'x-redhat-package-manager', 'x-rpm'), |
590 ('cpio', 'x-cpio'), |
628 ('cpio', 'x-cpio'), |
591 ('gem', 'x-ruby-gem'), |
629 ('gem', 'x-ruby-gem'), |
592 ('7z', 'x-7z-compressed')): |
630 ('7z', 'x-7z-compressed'), |
|
631 ('cab', 'x-cab')): |
593 for mimetype in mapping[1:]: |
632 for mimetype in mapping[1:]: |
594 if '/' not in mimetype: |
633 if '/' not in mimetype: |
595 mimetype = 'application/' + mimetype |
634 mimetype = 'application/' + mimetype |
596 mimetype_map[mimetype] = mapping[0] |
635 mimetype_map[mimetype] = mapping[0] |
597 |
636 |
599 for mapping in (('deb', 'Debian binary package'), |
638 for mapping in (('deb', 'Debian binary package'), |
600 ('cpio', 'cpio archive'), |
639 ('cpio', 'cpio archive'), |
601 ('tar', 'POSIX tar archive'), |
640 ('tar', 'POSIX tar archive'), |
602 ('zip', 'Zip archive'), |
641 ('zip', 'Zip archive'), |
603 ('rpm', 'RPM'), |
642 ('rpm', 'RPM'), |
604 ('7z', '7-zip archive')): |
643 ('7z', '7-zip archive'), |
|
644 ('cab', 'Microsoft Cabinet archive')): |
605 for pattern in mapping[1:]: |
645 for pattern in mapping[1:]: |
606 magic_mime_map[re.compile(pattern)] = mapping[0] |
646 magic_mime_map[re.compile(pattern)] = mapping[0] |
607 |
647 |
608 magic_encoding_map = {} |
648 magic_encoding_map = {} |
609 for mapping in (('bzip2', 'bzip2 compressed'), |
649 for mapping in (('bzip2', 'bzip2 compressed'), |
618 ('zip', None, 'zip', 'exe'), |
658 ('zip', None, 'zip', 'exe'), |
619 ('deb', None, 'deb'), |
659 ('deb', None, 'deb'), |
620 ('rpm', None, 'rpm'), |
660 ('rpm', None, 'rpm'), |
621 ('cpio', None, 'cpio'), |
661 ('cpio', None, 'cpio'), |
622 ('gem', None, 'gem'), |
662 ('gem', None, 'gem'), |
623 ('compress', None, 'Z', 'gz', 'bz2', 'lzma'), |
663 ('compress', 'gzip', 'Z', 'gz'), |
624 ('7z', None, '7z')): |
664 ('compress', 'bzip2', 'bz2'), |
|
665 ('compress', 'lzma', 'lzma'), |
|
666 ('7z', None, '7z'), |
|
667 ('cab', None, 'cab', 'exe')): |
625 for extension in mapping[2:]: |
668 for extension in mapping[2:]: |
626 extension_map[extension] = mapping[:2] |
669 extension_map.setdefault(extension, []).append(mapping[:2]) |
627 |
670 |
628 def __init__(self, filename, options): |
671 def __init__(self, filename, options): |
629 self.filename = filename |
672 self.filename = filename |
630 self.options = options |
673 self.options = options |
631 |
674 |
637 extractor = extractors[0] |
680 extractor = extractors[0] |
638 return extractor(self.filename, encoding) |
681 return extractor(self.filename, encoding) |
639 |
682 |
640 def get_extractor(self): |
683 def get_extractor(self): |
641 for func_name in ('mimetype', 'extension', 'magic'): |
684 for func_name in ('mimetype', 'extension', 'magic'): |
642 archive_type, encoding = \ |
685 logger.debug("getting extractors by %s" % (func_name,)) |
643 getattr(self, 'try_by_' + func_name)(self.filename) |
686 extractor_types = \ |
644 logger.debug("%s extractor is %s, %s" % |
687 getattr(self, 'try_by_' + func_name)(self.filename) |
645 (func_name, archive_type, encoding)) |
688 logger.debug("done getting extractors") |
646 if archive_type is not None: |
689 for ext_args in extractor_types: |
647 yield self.build_extractor(archive_type, encoding) |
690 logger.debug("trying %s extractor from %s" % |
|
691 (ext_args, func_name)) |
|
692 yield self.build_extractor(*ext_args) |
648 |
693 |
649 def try_by_mimetype(cls, filename): |
694 def try_by_mimetype(cls, filename): |
650 mimetype, encoding = mimetypes.guess_type(filename) |
695 mimetype, encoding = mimetypes.guess_type(filename) |
651 try: |
696 try: |
652 return cls.mimetype_map[mimetype], encoding |
697 return [(cls.mimetype_map[mimetype], encoding)] |
653 except KeyError: |
698 except KeyError: |
654 if encoding: |
699 if encoding: |
655 return 'compress', encoding |
700 return [('compress', encoding)] |
656 return None, None |
701 return [] |
657 try_by_mimetype = classmethod(try_by_mimetype) |
702 try_by_mimetype = classmethod(try_by_mimetype) |
658 |
703 |
|
704 def magic_map_matches(cls, output, magic_map): |
|
705 return [result for regexp, result in magic_map.items() |
|
706 if regexp.search(output)] |
|
707 magic_map_matches = classmethod(magic_map_matches) |
|
708 |
659 def try_by_magic(cls, filename): |
709 def try_by_magic(cls, filename): |
660 process = subprocess.Popen(['file', '-z', filename], |
710 process = subprocess.Popen(['file', '-z', filename], |
661 stdout=subprocess.PIPE) |
711 stdout=subprocess.PIPE) |
662 status = process.wait() |
712 status = process.wait() |
663 if status != 0: |
713 if status != 0: |
664 return None, None |
714 return [] |
665 output = process.stdout.readline() |
715 output = process.stdout.readline() |
666 process.stdout.close() |
716 process.stdout.close() |
667 if output.startswith('%s: ' % filename): |
717 if output.startswith('%s: ' % filename): |
668 output = output[len(filename) + 2:] |
718 output = output[len(filename) + 2:] |
669 results = [None, None] |
719 mimes = cls.magic_map_matches(output, cls.magic_mime_map) |
670 for index, mapping in enumerate((cls.magic_mime_map, |
720 encodings = cls.magic_map_matches(output, cls.magic_encoding_map) |
671 cls.magic_encoding_map)): |
721 if mimes and not encodings: |
672 for regexp, result in mapping.items(): |
722 encodings = [None] |
673 if regexp.search(output): |
723 elif encodings and not mimes: |
674 results[index] = result |
724 mimes = ['compress'] |
675 break |
725 return [(m, e) for m in mimes for e in encodings] |
676 return results |
|
677 try_by_magic = classmethod(try_by_magic) |
726 try_by_magic = classmethod(try_by_magic) |
678 |
727 |
679 def try_by_extension(cls, filename): |
728 def try_by_extension(cls, filename): |
680 parts = filename.rsplit('.', 2)[1:] |
729 parts = filename.rsplit('.', 2)[1:] |
|
730 results = [] |
681 while parts: |
731 while parts: |
682 try: |
732 results.extend(cls.extension_map.get('.'.join(parts), [])) |
683 return cls.extension_map['.'.join(parts)] |
733 del parts[0] |
684 except KeyError: |
734 return results |
685 del parts[0] |
|
686 return [None, None] |
|
687 try_by_extension = classmethod(try_by_extension) |
735 try_by_extension = classmethod(try_by_extension) |
688 |
736 |
689 |
737 |
690 class BaseAction(object): |
738 class BaseAction(object): |
691 def __init__(self, options, filenames): |
739 def __init__(self, options, filenames): |
713 if extractor.content_type == ONE_ENTRY: |
761 if extractor.content_type == ONE_ENTRY: |
714 self.options.one_entry_policy.prep(self.current_filename, |
762 self.options.one_entry_policy.prep(self.current_filename, |
715 extractor.content_name) |
763 extractor.content_name) |
716 for handler in self.handlers: |
764 for handler in self.handlers: |
717 if handler.can_handle(extractor.content_type, self.options): |
765 if handler.can_handle(extractor.content_type, self.options): |
|
766 logger.debug("using %s handler" % (handler.__name__,)) |
718 self.current_handler = handler(extractor, self.options) |
767 self.current_handler = handler(extractor, self.options) |
719 break |
768 break |
720 |
769 |
721 def run(self, filename, extractor): |
770 def run(self, filename, extractor): |
722 self.current_filename = filename |
771 self.current_filename = filename |
787 parser.add_option('-n', '--noninteractive', dest='batch', |
836 parser.add_option('-n', '--noninteractive', dest='batch', |
788 action='store_true', default=False, |
837 action='store_true', default=False, |
789 help="don't ask how to handle special cases") |
838 help="don't ask how to handle special cases") |
790 parser.add_option('-m', '--metadata', dest='metadata', |
839 parser.add_option('-m', '--metadata', dest='metadata', |
791 action='store_true', default=False, |
840 action='store_true', default=False, |
792 help="extract metadata from a .deb/.gem/etc.") |
841 help="extract metadata from a .deb/.gem") |
793 self.options, filenames = parser.parse_args(arguments) |
842 self.options, filenames = parser.parse_args(arguments) |
794 if not filenames: |
843 if not filenames: |
795 parser.error("you did not list any archives") |
844 parser.error("you did not list any archives") |
796 self.options.one_entry_policy = OneEntryPolicy(self.options) |
845 self.options.one_entry_policy = OneEntryPolicy(self.options) |
797 self.options.recursion_policy = RecursionPolicy(self.options) |
846 self.options.recursion_policy = RecursionPolicy(self.options) |