546 |
547 |
547 handlers = [FlatHandler, OverwriteHandler, MatchHandler, EmptyHandler, |
548 handlers = [FlatHandler, OverwriteHandler, MatchHandler, EmptyHandler, |
548 BombHandler] |
549 BombHandler] |
549 |
550 |
550 class ExtractorBuilder(object): |
551 class ExtractorBuilder(object): |
551 extractor_map = {} |
552 extractor_map = {'tar': (TarExtractor, None), |
552 for args in ((TarExtractor, None, 'x-tar'), |
553 'zip': (ZipExtractor, None), |
553 (ZipExtractor, None, 'zip', 'x-msdos-program'), |
554 'deb': (DebExtractor, DebMetadataExtractor), |
554 (DebExtractor, DebMetadataExtractor, 'x-debian-package'), |
555 'rpm': (RPMExtractor, None), |
555 (RPMExtractor, None, 'x-redhat-package-manager', 'x-rpm'), |
556 'cpio': (CpioExtractor, None), |
556 (CpioExtractor, None, 'x-cpio'), |
557 'gem': (GemExtractor, GemMetadataExtractor), |
557 (GemExtractor, GemMetadataExtractor, 'x-ruby-gem')): |
558 'compress': (CompressionExtractor, None)} |
558 for entry in args[2:]: |
559 |
559 if '/' not in entry: |
560 mimetype_map = {} |
560 entry = 'application/' + entry |
561 for mapping in (('tar', 'x-tar'), |
561 extractor_map[entry] = args[0:2] |
562 ('zip', 'x-msdos-program', 'zip'), |
|
563 ('deb', 'x-debian-package'), |
|
564 ('rpm', 'x-redhat-package-manager', 'x-rpm'), |
|
565 ('cpio', 'x-cpio'), |
|
566 ('gem', 'x-ruby-gem')): |
|
567 for mimetype in mapping[1:]: |
|
568 if '/' not in mimetype: |
|
569 mimetype = 'application/' + mimetype |
|
570 mimetype_map[mimetype] = mapping[0] |
|
571 |
|
572 magic_mime_map = {} |
|
573 for mapping in (('deb', 'Debian binary package'), |
|
574 ('cpio', 'cpio archive'), |
|
575 ('tar', 'POSIX tar archive'), |
|
576 ('zip', 'Zip archive'), |
|
577 ('rpm', 'RPM')): |
|
578 for pattern in mapping[1:]: |
|
579 magic_mime_map[re.compile(pattern)] = mapping[0] |
|
580 |
|
581 magic_encoding_map = {} |
|
582 for mapping in (('bzip2', 'bzip2 compressed'), |
|
583 ('gzip', 'gzip compressed')): |
|
584 for pattern in mapping[1:]: |
|
585 magic_encoding_map[re.compile(pattern)] = mapping[0] |
562 |
586 |
563 def __init__(self, filename, options): |
587 def __init__(self, filename, options): |
564 self.filename = filename |
588 self.filename = filename |
565 self.options = options |
589 self.options = options |
566 self.mimetype, self.encoding = mimetypes.guess_type(self.filename) |
590 |
|
591 def build_extractor(self, archive_type, encoding): |
|
592 extractors = self.extractor_map[archive_type] |
|
593 if self.options.metadata and (extractors[1] is not None): |
|
594 extractor = extractors[1] |
|
595 else: |
|
596 extractor = extractors[0] |
|
597 return extractor(self.filename, encoding) |
567 |
598 |
568 def get_extractor(self): |
599 def get_extractor(self): |
569 extractor = self.find_extractor() |
600 for func_name in ('mimetype', 'magic'): |
570 if extractor is None: |
601 archive_type, encoding = getattr(self, 'try_by_' + func_name)() |
571 raise ExtractorError("not a known archive type") |
602 if archive_type is not None: |
572 return extractor(self.filename, self.encoding) |
603 yield self.build_extractor(archive_type, encoding) |
573 |
604 |
574 def find_extractor(self): |
605 def try_by_mimetype(self): |
575 extractor = None |
606 mimetype, encoding = mimetypes.guess_type(self.filename) |
576 try: |
607 try: |
577 extractors = self.extractor_map[self.mimetype] |
608 return self.mimetype_map[mimetype], encoding |
578 if self.options.metadata and (extractors[1] is not None): |
|
579 extractor = extractors[1] |
|
580 else: |
|
581 extractor = extractors[0] |
|
582 except KeyError: |
609 except KeyError: |
583 if self.encoding: |
610 if encoding: |
584 extractor = CompressionExtractor |
611 return 'compress', encoding |
585 return extractor |
612 return None, None |
|
613 |
|
614 def try_by_magic(self): |
|
615 process = subprocess.Popen(['file', '-z', self.filename], |
|
616 stdout=subprocess.PIPE) |
|
617 status = process.wait() |
|
618 if status != 0: |
|
619 return None, None |
|
620 output = process.stdout.readline() |
|
621 process.stdout.close() |
|
622 if output.startswith('%s: ' % self.filename): |
|
623 output = output[len(self.filename) + 2:] |
|
624 results = [None, None] |
|
625 for index, mapping in enumerate((self.magic_mime_map, |
|
626 self.magic_encoding_map)): |
|
627 for regexp, result in mapping.items(): |
|
628 if regexp.search(output): |
|
629 results[index] = result |
|
630 break |
|
631 return results |
586 |
632 |
587 |
633 |
588 class ExtractorApplication(object): |
634 class ExtractorApplication(object): |
589 def __init__(self, arguments): |
635 def __init__(self, arguments): |
590 self.parse_options(arguments) |
636 self.parse_options(arguments) |
681 self.successes.append(self.current_filename) |
723 self.successes.append(self.current_filename) |
682 else: |
724 else: |
683 self.failures.append(self.current_filename) |
725 self.failures.append(self.current_filename) |
684 |
726 |
685 def extract(self): |
727 def extract(self): |
686 while self.archives: |
728 success = (self.report(self.current_extractor.extract) and |
687 self.current_directory, filenames = self.archives.popitem() |
729 self.report(self.get_handler) and |
688 for filename in filenames: |
730 self.report(self.current_handler.handle)) |
689 os.chdir(self.current_directory) |
731 if success: |
690 self.current_filename = filename |
732 self.recurse() |
691 success = (self.report(self.get_extractor) and |
733 return success |
692 self.report(self.current_extractor.extract) and |
|
693 self.report(self.get_handler) and |
|
694 self.report(self.current_handler.handle)) |
|
695 if success: |
|
696 self.recurse() |
|
697 self.record_status(success) |
|
698 self.options.one_entry_policy.permanent_policy = EXTRACT_WRAP |
|
699 |
734 |
700 def show_contents(self): |
735 def show_contents(self): |
701 for filename in self.current_extractor.get_filenames(): |
736 for filename in self.current_extractor.get_filenames(): |
702 print filename |
737 print filename |
703 |
738 |
704 def show_list(self): |
739 def make_list(self): |
705 filenames = self.archives.values()[0] |
740 if len(self.archives.values()[0]) == 1: |
706 if len(filenames) > 1: |
741 def show_list(): |
707 header = "%s:\n" |
742 return self.report(self.show_contents) |
708 else: |
743 else: |
709 header = None |
744 def show_list(): |
710 for filename in filenames: |
745 if self.current_filename == self.filenames[0]: |
711 if header: |
746 print "%s:\n" % (self.current_filename,), |
712 print header % (filename,), |
747 else: |
713 header = "\n%s:\n" |
748 print "\n%s:\n" % (self.current_filename,), |
714 self.current_filename = filename |
749 return self.report(self.show_contents) |
715 success = (self.report(self.get_extractor) and |
750 return show_list |
716 self.report(self.show_contents)) |
|
717 self.record_status(success) |
|
718 |
751 |
719 def run(self): |
752 def run(self): |
720 if self.options.show_list: |
753 if self.options.show_list: |
721 self.show_list() |
754 action_function = self.make_list() |
722 else: |
755 else: |
723 self.extract() |
756 action_function = self.extract |
|
757 while self.archives: |
|
758 self.current_directory, self.filenames = self.archives.popitem() |
|
759 os.chdir(self.current_directory) |
|
760 for filename in self.filenames: |
|
761 self.current_filename = filename |
|
762 builder = ExtractorBuilder(self.current_filename, self.options) |
|
763 for extractor in builder.get_extractor(): |
|
764 self.current_extractor = extractor |
|
765 success = action_function() |
|
766 if success: |
|
767 self.record_status(success) |
|
768 break |
|
769 else: |
|
770 self.record_status(success=False) |
|
771 self.options.one_entry_policy.permanent_policy = EXTRACT_WRAP |
724 if self.failures: |
772 if self.failures: |
725 return 1 |
773 return 1 |
726 return 0 |
774 return 0 |
727 |
775 |
728 |
776 |