729 def ok_to_recurse(self): |
729 def ok_to_recurse(self): |
730 return self.current_policy in (RECURSE_ALWAYS, RECURSE_ONCE) |
730 return self.current_policy in (RECURSE_ALWAYS, RECURSE_ONCE) |
731 |
731 |
732 |
732 |
733 class ExtractorBuilder(object): |
733 class ExtractorBuilder(object): |
734 extractor_map = {'tar': (TarExtractor, None), |
734 extractor_map = {'tar': {'extractor': TarExtractor, |
735 'zip': (ZipExtractor, None), |
735 'mimetypes': ('x-tar',), |
736 'deb': (DebExtractor, DebMetadataExtractor), |
736 'extensions': ('tar',), |
737 'rpm': (RPMExtractor, None), |
737 'magic': ('POSIX tar archive',)}, |
738 'cpio': (CpioExtractor, None), |
738 'zip': {'extractor': ZipExtractor, |
739 'gem': (GemExtractor, GemMetadataExtractor), |
739 'mimetypes': ('zip',), |
740 'compress': (CompressionExtractor, None), |
740 'extensions': ('zip',), |
741 '7z': (SevenExtractor, None), |
741 'magic': ('(Zip|ZIP self-extracting) archive',)}, |
742 'cab': (CABExtractor, None), |
742 'rpm': {'extractor': RPMExtractor, |
743 'shield': (ShieldExtractor, None)} |
743 'mimetypes': ('x-redhat-package-manager', 'x-rpm'), |
|
744 'extensions': ('rpm',), |
|
745 'magic': ('RPM',)}, |
|
746 'deb': {'extractor': DebExtractor, |
|
747 'metadata': DebMetadataExtractor, |
|
748 'mimetypes': ('x-debian-package',), |
|
749 'extensions': ('deb',), |
|
750 'magic': ('Debian binary package',)}, |
|
751 'cpio': {'extractor': CpioExtractor, |
|
752 'mimetypes': ('x-cpio',), |
|
753 'extensions': ('cpio',), |
|
754 'magic': ('cpio archive',)}, |
|
755 'gem': {'extractor': GemExtractor, |
|
756 'metadata': GemMetadataExtractor, |
|
757 'mimetypes': ('x-ruby-gem',), |
|
758 'extensions': ('gem',)}, |
|
759 '7z': {'extractor': SevenExtractor, |
|
760 'mimetypes': ('x-7z-compressed',), |
|
761 'extensions': ('7z',), |
|
762 'magic': ('7-zip archive',)}, |
|
763 'cab': {'extractor': CABExtractor, |
|
764 'mimetypes': ('x-cab',), |
|
765 'extensions': ('cab',), |
|
766 'magic': ('Microsoft Cabinet Archive',)}, |
|
767 'shield': {'extractor': ShieldExtractor, |
|
768 'mimetypes': ('x-cab',), |
|
769 'extensions': ('cab', 'hdr'), |
|
770 'magic': ('InstallShield CAB',)}, |
|
771 'compress': {'extractor': CompressionExtractor} |
|
772 } |
744 |
773 |
745 mimetype_map = {} |
774 mimetype_map = {} |
746 for mapping in (('tar', 'x-tar'), |
775 magic_mime_map = {} |
747 ('zip', 'zip'), |
776 extension_map = {} |
748 ('deb', 'x-debian-package'), |
777 for ext_name, ext_info in extractor_map.items(): |
749 ('rpm', 'x-redhat-package-manager', 'x-rpm'), |
778 for mimetype in ext_info.get('mimetypes', ()): |
750 ('cpio', 'x-cpio'), |
|
751 ('gem', 'x-ruby-gem'), |
|
752 ('7z', 'x-7z-compressed'), |
|
753 ('cab', 'x-cab'), |
|
754 ('shield', 'x-cab')): |
|
755 for mimetype in mapping[1:]: |
|
756 if '/' not in mimetype: |
779 if '/' not in mimetype: |
757 mimetype = 'application/' + mimetype |
780 mimetype = 'application/' + mimetype |
758 mimetype_map[mimetype] = mapping[0] |
781 mimetype_map[mimetype] = ext_name |
759 |
782 for magic_re in ext_info.get('magic', ()): |
760 magic_mime_map = {} |
783 magic_mime_map[re.compile(magic_re)] = ext_name |
761 for mapping in (('deb', 'Debian binary package'), |
784 for extension in ext_info.get('extensions', ()): |
762 ('cpio', 'cpio archive'), |
785 extension_map.setdefault(extension, []).append((ext_name, None)) |
763 ('tar', 'POSIX tar archive'), |
786 |
764 ('zip', '(Zip|ZIP self-extracting) archive'), |
787 for mapping in (('tar', 'bzip2', 'tar.bz2'), |
765 ('rpm', 'RPM'), |
788 ('tar', 'gzip', 'tar.gz', 'tgz'), |
766 ('7z', '7-zip archive'), |
789 ('compress', 'gzip', 'Z', 'gz'), |
767 ('cab', 'Microsoft Cabinet archive'), |
790 ('compress', 'bzip2', 'bz2'), |
768 ('shield', 'InstallShield CAB')): |
791 ('compress', 'lzma', 'lzma')): |
769 for pattern in mapping[1:]: |
792 for extension in mapping[2:]: |
770 magic_mime_map[re.compile(pattern)] = mapping[0] |
793 extension_map.setdefault(extension, []).append(mapping[:2]) |
771 |
794 |
772 magic_encoding_map = {} |
795 magic_encoding_map = {} |
773 for mapping in (('bzip2', 'bzip2 compressed'), |
796 for mapping in (('bzip2', 'bzip2 compressed'), |
774 ('gzip', 'gzip compressed')): |
797 ('gzip', 'gzip compressed')): |
775 for pattern in mapping[1:]: |
798 for pattern in mapping[1:]: |
776 magic_encoding_map[re.compile(pattern)] = mapping[0] |
799 magic_encoding_map[re.compile(pattern)] = mapping[0] |
777 |
800 |
778 extension_map = {} |
|
779 for mapping in (('tar', 'bzip2', 'tar.bz2'), |
|
780 ('tar', 'gzip', 'tar.gz', 'tgz'), |
|
781 ('tar', None, 'tar'), |
|
782 ('zip', None, 'zip'), |
|
783 ('deb', None, 'deb'), |
|
784 ('rpm', None, 'rpm'), |
|
785 ('cpio', None, 'cpio'), |
|
786 ('gem', None, 'gem'), |
|
787 ('compress', 'gzip', 'Z', 'gz'), |
|
788 ('compress', 'bzip2', 'bz2'), |
|
789 ('compress', 'lzma', 'lzma'), |
|
790 ('7z', None, '7z'), |
|
791 ('cab', None, 'cab'), |
|
792 ('shield', None, 'cab', 'hdr')): |
|
793 for extension in mapping[2:]: |
|
794 extension_map.setdefault(extension, []).append(mapping[:2]) |
|
795 |
|
796 def __init__(self, filename, options): |
801 def __init__(self, filename, options): |
797 self.filename = filename |
802 self.filename = filename |
798 self.options = options |
803 self.options = options |
799 |
804 |
800 def build_extractor(self, archive_type, encoding): |
805 def build_extractor(self, archive_type, encoding): |
801 extractors = self.extractor_map[archive_type] |
806 extractors = self.extractor_map[archive_type] |
802 if self.options.metadata and (extractors[1] is not None): |
807 if self.options.metadata and extractors.has_key('metadata'): |
803 extractor = extractors[1] |
808 extractor = extractors['metadata'] |
804 else: |
809 else: |
805 extractor = extractors[0] |
810 extractor = extractors['extractor'] |
806 return extractor(self.filename, encoding) |
811 return extractor(self.filename, encoding) |
807 |
812 |
808 def get_extractor(self): |
813 def get_extractor(self): |
809 tried_types = set() |
814 tried_types = set() |
810 # As smart as it is, the magic test can't go first, because at least |
815 # As smart as it is, the magic test can't go first, because at least |