scripts/dtrx

branch
trunk
changeset 81
18f4fe62eff2
parent 80
df9b3428e28f
child 82
6db35db38795
equal deleted inserted replaced
80:df9b3428e28f 81:18f4fe62eff2
729 def ok_to_recurse(self): 729 def ok_to_recurse(self):
730 return self.current_policy in (RECURSE_ALWAYS, RECURSE_ONCE) 730 return self.current_policy in (RECURSE_ALWAYS, RECURSE_ONCE)
731 731
732 732
733 class ExtractorBuilder(object): 733 class ExtractorBuilder(object):
734 extractor_map = {'tar': (TarExtractor, None), 734 extractor_map = {'tar': {'extractor': TarExtractor,
735 'zip': (ZipExtractor, None), 735 'mimetypes': ('x-tar',),
736 'deb': (DebExtractor, DebMetadataExtractor), 736 'extensions': ('tar',),
737 'rpm': (RPMExtractor, None), 737 'magic': ('POSIX tar archive',)},
738 'cpio': (CpioExtractor, None), 738 'zip': {'extractor': ZipExtractor,
739 'gem': (GemExtractor, GemMetadataExtractor), 739 'mimetypes': ('zip',),
740 'compress': (CompressionExtractor, None), 740 'extensions': ('zip',),
741 '7z': (SevenExtractor, None), 741 'magic': ('(Zip|ZIP self-extracting) archive',)},
742 'cab': (CABExtractor, None), 742 'rpm': {'extractor': RPMExtractor,
743 'shield': (ShieldExtractor, None)} 743 'mimetypes': ('x-redhat-package-manager', 'x-rpm'),
744 'extensions': ('rpm',),
745 'magic': ('RPM',)},
746 'deb': {'extractor': DebExtractor,
747 'metadata': DebMetadataExtractor,
748 'mimetypes': ('x-debian-package',),
749 'extensions': ('deb',),
750 'magic': ('Debian binary package',)},
751 'cpio': {'extractor': CpioExtractor,
752 'mimetypes': ('x-cpio',),
753 'extensions': ('cpio',),
754 'magic': ('cpio archive',)},
755 'gem': {'extractor': GemExtractor,
756 'metadata': GemMetadataExtractor,
757 'mimetypes': ('x-ruby-gem',),
758 'extensions': ('gem',)},
759 '7z': {'extractor': SevenExtractor,
760 'mimetypes': ('x-7z-compressed',),
761 'extensions': ('7z',),
762 'magic': ('7-zip archive',)},
763 'cab': {'extractor': CABExtractor,
764 'mimetypes': ('x-cab',),
765 'extensions': ('cab',),
766 'magic': ('Microsoft Cabinet Archive',)},
767 'shield': {'extractor': ShieldExtractor,
768 'mimetypes': ('x-cab',),
769 'extensions': ('cab', 'hdr'),
770 'magic': ('InstallShield CAB',)},
771 'compress': {'extractor': CompressionExtractor}
772 }
744 773
745 mimetype_map = {} 774 mimetype_map = {}
746 for mapping in (('tar', 'x-tar'), 775 magic_mime_map = {}
747 ('zip', 'zip'), 776 extension_map = {}
748 ('deb', 'x-debian-package'), 777 for ext_name, ext_info in extractor_map.items():
749 ('rpm', 'x-redhat-package-manager', 'x-rpm'), 778 for mimetype in ext_info.get('mimetypes', ()):
750 ('cpio', 'x-cpio'),
751 ('gem', 'x-ruby-gem'),
752 ('7z', 'x-7z-compressed'),
753 ('cab', 'x-cab'),
754 ('shield', 'x-cab')):
755 for mimetype in mapping[1:]:
756 if '/' not in mimetype: 779 if '/' not in mimetype:
757 mimetype = 'application/' + mimetype 780 mimetype = 'application/' + mimetype
758 mimetype_map[mimetype] = mapping[0] 781 mimetype_map[mimetype] = ext_name
759 782 for magic_re in ext_info.get('magic', ()):
760 magic_mime_map = {} 783 magic_mime_map[re.compile(magic_re)] = ext_name
761 for mapping in (('deb', 'Debian binary package'), 784 for extension in ext_info.get('extensions', ()):
762 ('cpio', 'cpio archive'), 785 extension_map.setdefault(extension, []).append((ext_name, None))
763 ('tar', 'POSIX tar archive'), 786
764 ('zip', '(Zip|ZIP self-extracting) archive'), 787 for mapping in (('tar', 'bzip2', 'tar.bz2'),
765 ('rpm', 'RPM'), 788 ('tar', 'gzip', 'tar.gz', 'tgz'),
766 ('7z', '7-zip archive'), 789 ('compress', 'gzip', 'Z', 'gz'),
767 ('cab', 'Microsoft Cabinet archive'), 790 ('compress', 'bzip2', 'bz2'),
768 ('shield', 'InstallShield CAB')): 791 ('compress', 'lzma', 'lzma')):
769 for pattern in mapping[1:]: 792 for extension in mapping[2:]:
770 magic_mime_map[re.compile(pattern)] = mapping[0] 793 extension_map.setdefault(extension, []).append(mapping[:2])
771 794
772 magic_encoding_map = {} 795 magic_encoding_map = {}
773 for mapping in (('bzip2', 'bzip2 compressed'), 796 for mapping in (('bzip2', 'bzip2 compressed'),
774 ('gzip', 'gzip compressed')): 797 ('gzip', 'gzip compressed')):
775 for pattern in mapping[1:]: 798 for pattern in mapping[1:]:
776 magic_encoding_map[re.compile(pattern)] = mapping[0] 799 magic_encoding_map[re.compile(pattern)] = mapping[0]
777 800
778 extension_map = {}
779 for mapping in (('tar', 'bzip2', 'tar.bz2'),
780 ('tar', 'gzip', 'tar.gz', 'tgz'),
781 ('tar', None, 'tar'),
782 ('zip', None, 'zip'),
783 ('deb', None, 'deb'),
784 ('rpm', None, 'rpm'),
785 ('cpio', None, 'cpio'),
786 ('gem', None, 'gem'),
787 ('compress', 'gzip', 'Z', 'gz'),
788 ('compress', 'bzip2', 'bz2'),
789 ('compress', 'lzma', 'lzma'),
790 ('7z', None, '7z'),
791 ('cab', None, 'cab'),
792 ('shield', None, 'cab', 'hdr')):
793 for extension in mapping[2:]:
794 extension_map.setdefault(extension, []).append(mapping[:2])
795
796 def __init__(self, filename, options): 801 def __init__(self, filename, options):
797 self.filename = filename 802 self.filename = filename
798 self.options = options 803 self.options = options
799 804
800 def build_extractor(self, archive_type, encoding): 805 def build_extractor(self, archive_type, encoding):
801 extractors = self.extractor_map[archive_type] 806 extractors = self.extractor_map[archive_type]
802 if self.options.metadata and (extractors[1] is not None): 807 if self.options.metadata and extractors.has_key('metadata'):
803 extractor = extractors[1] 808 extractor = extractors['metadata']
804 else: 809 else:
805 extractor = extractors[0] 810 extractor = extractors['extractor']
806 return extractor(self.filename, encoding) 811 return extractor(self.filename, encoding)
807 812
808 def get_extractor(self): 813 def get_extractor(self):
809 tried_types = set() 814 tried_types = set()
810 # As smart as it is, the magic test can't go first, because at least 815 # As smart as it is, the magic test can't go first, because at least

mercurial