scripts/dtrx

branch
trunk
changeset 30
1015bbd6dc5e
parent 29
5fad99c17221
child 31
c3a2760d1c3a
equal deleted inserted replaced
29:5fad99c17221 30:1015bbd6dc5e
20 import errno 20 import errno
21 import logging 21 import logging
22 import mimetypes 22 import mimetypes
23 import optparse 23 import optparse
24 import os 24 import os
25 import re
25 import stat 26 import stat
26 import subprocess 27 import subprocess
27 import sys 28 import sys
28 import tempfile 29 import tempfile
29 import textwrap 30 import textwrap
546 547
547 handlers = [FlatHandler, OverwriteHandler, MatchHandler, EmptyHandler, 548 handlers = [FlatHandler, OverwriteHandler, MatchHandler, EmptyHandler,
548 BombHandler] 549 BombHandler]
549 550
550 class ExtractorBuilder(object): 551 class ExtractorBuilder(object):
551 extractor_map = {} 552 extractor_map = {'tar': (TarExtractor, None),
552 for args in ((TarExtractor, None, 'x-tar'), 553 'zip': (ZipExtractor, None),
553 (ZipExtractor, None, 'zip', 'x-msdos-program'), 554 'deb': (DebExtractor, DebMetadataExtractor),
554 (DebExtractor, DebMetadataExtractor, 'x-debian-package'), 555 'rpm': (RPMExtractor, None),
555 (RPMExtractor, None, 'x-redhat-package-manager', 'x-rpm'), 556 'cpio': (CpioExtractor, None),
556 (CpioExtractor, None, 'x-cpio'), 557 'gem': (GemExtractor, GemMetadataExtractor),
557 (GemExtractor, GemMetadataExtractor, 'x-ruby-gem')): 558 'compress': (CompressionExtractor, None)}
558 for entry in args[2:]: 559
559 if '/' not in entry: 560 mimetype_map = {}
560 entry = 'application/' + entry 561 for mapping in (('tar', 'x-tar'),
561 extractor_map[entry] = args[0:2] 562 ('zip', 'x-msdos-program', 'zip'),
563 ('deb', 'x-debian-package'),
564 ('rpm', 'x-redhat-package-manager', 'x-rpm'),
565 ('cpio', 'x-cpio'),
566 ('gem', 'x-ruby-gem')):
567 for mimetype in mapping[1:]:
568 if '/' not in mimetype:
569 mimetype = 'application/' + mimetype
570 mimetype_map[mimetype] = mapping[0]
571
572 magic_mime_map = {}
573 for mapping in (('deb', 'Debian binary package'),
574 ('cpio', 'cpio archive'),
575 ('tar', 'POSIX tar archive'),
576 ('zip', 'Zip archive'),
577 ('rpm', 'RPM')):
578 for pattern in mapping[1:]:
579 magic_mime_map[re.compile(pattern)] = mapping[0]
580
581 magic_encoding_map = {}
582 for mapping in (('bzip2', 'bzip2 compressed'),
583 ('gzip', 'gzip compressed')):
584 for pattern in mapping[1:]:
585 magic_encoding_map[re.compile(pattern)] = mapping[0]
562 586
563 def __init__(self, filename, options): 587 def __init__(self, filename, options):
564 self.filename = filename 588 self.filename = filename
565 self.options = options 589 self.options = options
566 self.mimetype, self.encoding = mimetypes.guess_type(self.filename) 590
591 def build_extractor(self, archive_type, encoding):
592 extractors = self.extractor_map[archive_type]
593 if self.options.metadata and (extractors[1] is not None):
594 extractor = extractors[1]
595 else:
596 extractor = extractors[0]
597 return extractor(self.filename, encoding)
567 598
568 def get_extractor(self): 599 def get_extractor(self):
569 extractor = self.find_extractor() 600 for func_name in ('mimetype', 'magic'):
570 if extractor is None: 601 archive_type, encoding = getattr(self, 'try_by_' + func_name)()
571 raise ExtractorError("not a known archive type") 602 if archive_type is not None:
572 return extractor(self.filename, self.encoding) 603 yield self.build_extractor(archive_type, encoding)
573 604
574 def find_extractor(self): 605 def try_by_mimetype(self):
575 extractor = None 606 mimetype, encoding = mimetypes.guess_type(self.filename)
576 try: 607 try:
577 extractors = self.extractor_map[self.mimetype] 608 return self.mimetype_map[mimetype], encoding
578 if self.options.metadata and (extractors[1] is not None):
579 extractor = extractors[1]
580 else:
581 extractor = extractors[0]
582 except KeyError: 609 except KeyError:
583 if self.encoding: 610 if encoding:
584 extractor = CompressionExtractor 611 return 'compress', encoding
585 return extractor 612 return None, None
613
614 def try_by_magic(self):
615 process = subprocess.Popen(['file', '-z', self.filename],
616 stdout=subprocess.PIPE)
617 status = process.wait()
618 if status != 0:
619 return None, None
620 output = process.stdout.readline()
621 process.stdout.close()
622 if output.startswith('%s: ' % self.filename):
623 output = output[len(self.filename) + 2:]
624 results = [None, None]
625 for index, mapping in enumerate((self.magic_mime_map,
626 self.magic_encoding_map)):
627 for regexp, result in mapping.items():
628 if regexp.search(output):
629 results[index] = result
630 break
631 return results
586 632
587 633
588 class ExtractorApplication(object): 634 class ExtractorApplication(object):
589 def __init__(self, arguments): 635 def __init__(self, arguments):
590 self.parse_options(arguments) 636 self.parse_options(arguments)
636 handler.setLevel(10 * (self.options.quiet - self.options.verbose)) 682 handler.setLevel(10 * (self.options.quiet - self.options.verbose))
637 formatter = logging.Formatter("dtrx: %(levelname)s: %(message)s") 683 formatter = logging.Formatter("dtrx: %(levelname)s: %(message)s")
638 handler.setFormatter(formatter) 684 handler.setFormatter(formatter)
639 self.logger.addHandler(handler) 685 self.logger.addHandler(handler)
640 686
641 def get_extractor(self):
642 builder = ExtractorBuilder(self.current_filename, self.options)
643 self.current_extractor = builder.get_extractor()
644
645 def get_handler(self): 687 def get_handler(self):
646 for var_name in ('type', 'name'): 688 for var_name in ('type', 'name'):
647 exec('content_%s = self.current_extractor.content_%s' % 689 exec('content_%s = self.current_extractor.content_%s' %
648 (var_name, var_name)) 690 (var_name, var_name))
649 if content_type == ONE_ENTRY: 691 if content_type == ONE_ENTRY:
681 self.successes.append(self.current_filename) 723 self.successes.append(self.current_filename)
682 else: 724 else:
683 self.failures.append(self.current_filename) 725 self.failures.append(self.current_filename)
684 726
685 def extract(self): 727 def extract(self):
686 while self.archives: 728 success = (self.report(self.current_extractor.extract) and
687 self.current_directory, filenames = self.archives.popitem() 729 self.report(self.get_handler) and
688 for filename in filenames: 730 self.report(self.current_handler.handle))
689 os.chdir(self.current_directory) 731 if success:
690 self.current_filename = filename 732 self.recurse()
691 success = (self.report(self.get_extractor) and 733 return success
692 self.report(self.current_extractor.extract) and
693 self.report(self.get_handler) and
694 self.report(self.current_handler.handle))
695 if success:
696 self.recurse()
697 self.record_status(success)
698 self.options.one_entry_policy.permanent_policy = EXTRACT_WRAP
699 734
700 def show_contents(self): 735 def show_contents(self):
701 for filename in self.current_extractor.get_filenames(): 736 for filename in self.current_extractor.get_filenames():
702 print filename 737 print filename
703 738
704 def show_list(self): 739 def make_list(self):
705 filenames = self.archives.values()[0] 740 if len(self.archives.values()[0]) == 1:
706 if len(filenames) > 1: 741 def show_list():
707 header = "%s:\n" 742 return self.report(self.show_contents)
708 else: 743 else:
709 header = None 744 def show_list():
710 for filename in filenames: 745 if self.current_filename == self.filenames[0]:
711 if header: 746 print "%s:\n" % (self.current_filename,),
712 print header % (filename,), 747 else:
713 header = "\n%s:\n" 748 print "\n%s:\n" % (self.current_filename,),
714 self.current_filename = filename 749 return self.report(self.show_contents)
715 success = (self.report(self.get_extractor) and 750 return show_list
716 self.report(self.show_contents))
717 self.record_status(success)
718 751
719 def run(self): 752 def run(self):
720 if self.options.show_list: 753 if self.options.show_list:
721 self.show_list() 754 action_function = self.make_list()
722 else: 755 else:
723 self.extract() 756 action_function = self.extract
757 while self.archives:
758 self.current_directory, self.filenames = self.archives.popitem()
759 os.chdir(self.current_directory)
760 for filename in self.filenames:
761 self.current_filename = filename
762 builder = ExtractorBuilder(self.current_filename, self.options)
763 for extractor in builder.get_extractor():
764 self.current_extractor = extractor
765 success = action_function()
766 if success:
767 self.record_status(success)
768 break
769 else:
770 self.record_status(success=False)
771 self.options.one_entry_policy.permanent_policy = EXTRACT_WRAP
724 if self.failures: 772 if self.failures:
725 return 1 773 return 1
726 return 0 774 return 0
727 775
728 776

mercurial