scripts/dtrx

branch
trunk
changeset 45
37d555407334
parent 43
4591a32eedc8
child 46
652871d804ab
equal deleted inserted replaced
44:69cd94403088 45:37d555407334
20 import logging 20 import logging
21 import mimetypes 21 import mimetypes
22 import optparse 22 import optparse
23 import os 23 import os
24 import re 24 import re
25 import shutil
25 import stat 26 import stat
26 import subprocess 27 import subprocess
27 import sys 28 import sys
28 import tempfile 29 import tempfile
29 import textwrap 30 import textwrap
30 import traceback 31 import traceback
31 32
32 from sets import Set 33 from sets import Set
33 34
34 VERSION = "5.0" 35 VERSION = "6.0"
35 VERSION_BANNER = """dtrx version %s 36 VERSION_BANNER = """dtrx version %s
36 Copyright (c) 2006, 2007 Brett Smith <brettcsmith@brettcsmith.org> 37 Copyright (c) 2006, 2007, 2008 Brett Smith <brettcsmith@brettcsmith.org>
37 38
38 This program is free software; you can redistribute it and/or modify it 39 This program is free software; you can redistribute it and/or modify it
39 under the terms of the GNU General Public License as published by the 40 under the terms of the GNU General Public License as published by the
40 Free Software Foundation; either version 3 of the License, or (at your 41 Free Software Foundation; either version 3 of the License, or (at your
41 option) any later version. 42 option) any later version.
118 119
119 120
120 class ExtractorError(Exception): 121 class ExtractorError(Exception):
121 pass 122 pass
122 123
124
125 class ExtractorUnusable(Exception):
126 pass
127
128
129 EXTRACTION_ERRORS = (ExtractorError, ExtractorUnusable, OSError, IOError)
123 130
124 class BaseExtractor(object): 131 class BaseExtractor(object):
125 decoders = {'bzip2': 'bzcat', 'gzip': 'zcat', 'compress': 'zcat', 132 decoders = {'bzip2': 'bzcat', 'gzip': 'zcat', 'compress': 'zcat',
126 'lzma': 'lzcat'} 133 'lzma': 'lzcat'}
127 134
165 stdin = processes[-1].stdout 172 stdin = processes[-1].stdout
166 if index == last_pipe: 173 if index == last_pipe:
167 stdout = final_stdout 174 stdout = final_stdout
168 else: 175 else:
169 stdout = subprocess.PIPE 176 stdout = subprocess.PIPE
170 processes.append(subprocess.Popen(command, stdin=stdin, 177 try:
171 stdout=stdout, 178 processes.append(subprocess.Popen(command, stdin=stdin,
172 stderr=subprocess.PIPE)) 179 stdout=stdout,
180 stderr=subprocess.PIPE))
181 except OSError, error:
182 if error.errno == errno.ENOENT:
183 raise ExtractorUnusable("could not run %s" % (command[0],))
184 raise
173 exit_codes = [pipe.wait() for pipe in processes] 185 exit_codes = [pipe.wait() for pipe in processes]
174 self.archive.close() 186 self.archive.close()
175 for index in range(last_pipe): 187 for index in range(last_pipe):
176 processes[index].stdout.close() 188 processes[index].stdout.close()
177 processes[index].stderr.close() 189 processes[index].stderr.close()
228 os.chdir(self.target) 240 os.chdir(self.target)
229 try: 241 try:
230 self.archive.seek(0, 0) 242 self.archive.seek(0, 0)
231 self.extract_archive() 243 self.extract_archive()
232 self.check_contents() 244 self.check_contents()
233 except ExtractorError: 245 except EXTRACTION_ERRORS:
234 os.chdir(old_path) 246 os.chdir(old_path)
235 subprocess.call(['rm', '-rf', self.target]) 247 shutil.rmtree(self.target, ignore_errors=True)
236 raise 248 raise
237 os.chdir(old_path) 249 os.chdir(old_path)
238 250
239 def get_filenames(self): 251 def get_filenames(self):
240 self.run_pipes() 252 self.run_pipes()
246 return 258 return
247 yield line.rstrip('\n') 259 yield line.rstrip('\n')
248 260
249 261
250 class CompressionExtractor(BaseExtractor): 262 class CompressionExtractor(BaseExtractor):
263 file_type = 'compressed file'
251 name_checker = FilenameChecker 264 name_checker = FilenameChecker
252 265
253 def basename(self): 266 def basename(self):
254 pieces = os.path.basename(self.filename).split('.') 267 pieces = os.path.basename(self.filename).split('.')
255 extension = '.' + pieces[-1] 268 extension = '.' + pieces[-1]
267 output_fd, self.target = tempfile.mkstemp(prefix='.dtrx-', dir='.') 280 output_fd, self.target = tempfile.mkstemp(prefix='.dtrx-', dir='.')
268 except (OSError, IOError), error: 281 except (OSError, IOError), error:
269 raise ExtractorError("cannot extract here: %s" % (error.strerror,)) 282 raise ExtractorError("cannot extract here: %s" % (error.strerror,))
270 try: 283 try:
271 self.run_pipes(output_fd) 284 self.run_pipes(output_fd)
272 except ExtractorError: 285 except EXTRACTION_ERRORS:
273 os.close(output_fd) 286 os.close(output_fd)
274 os.unlink(self.target) 287 os.unlink(self.target)
275 raise 288 raise
276 os.close(output_fd) 289 os.close(output_fd)
277 290
278 291
279 class TarExtractor(BaseExtractor): 292 class TarExtractor(BaseExtractor):
293 file_type = 'tar file'
294
280 def get_filenames(self): 295 def get_filenames(self):
281 self.pipe(['tar', '-t'], "listing") 296 self.pipe(['tar', '-t'], "listing")
282 return BaseExtractor.get_filenames(self) 297 return BaseExtractor.get_filenames(self)
283 298
284 def extract_archive(self): 299 def extract_archive(self):
285 self.pipe(['tar', '-x']) 300 self.pipe(['tar', '-x'])
286 self.run_pipes() 301 self.run_pipes()
287 302
288 303
289 class CpioExtractor(BaseExtractor): 304 class CpioExtractor(BaseExtractor):
305 file_type = 'cpio file'
306
290 def get_filenames(self): 307 def get_filenames(self):
291 self.pipe(['cpio', '-t'], "listing") 308 self.pipe(['cpio', '-t'], "listing")
292 return BaseExtractor.get_filenames(self) 309 return BaseExtractor.get_filenames(self)
293 310
294 def extract_archive(self): 311 def extract_archive(self):
296 '--no-absolute-filenames']) 313 '--no-absolute-filenames'])
297 self.run_pipes() 314 self.run_pipes()
298 315
299 316
300 class RPMExtractor(CpioExtractor): 317 class RPMExtractor(CpioExtractor):
318 file_type = 'RPM'
319
301 def prepare(self): 320 def prepare(self):
302 self.pipe(['rpm2cpio', '-'], "rpm2cpio") 321 self.pipe(['rpm2cpio', '-'], "rpm2cpio")
303 322
304 def basename(self): 323 def basename(self):
305 pieces = os.path.basename(self.filename).split('.') 324 pieces = os.path.basename(self.filename).split('.')
318 self.check_included_archives(os.listdir('.')) 337 self.check_included_archives(os.listdir('.'))
319 self.content_type = BOMB 338 self.content_type = BOMB
320 339
321 340
322 class DebExtractor(TarExtractor): 341 class DebExtractor(TarExtractor):
342 file_type = 'Debian package'
343
323 def prepare(self): 344 def prepare(self):
324 self.pipe(['ar', 'p', self.filename, 'data.tar.gz'], 345 self.pipe(['ar', 'p', self.filename, 'data.tar.gz'],
325 "data.tar.gz extraction") 346 "data.tar.gz extraction")
326 self.pipe(['zcat'], "data.tar.gz decompression") 347 self.pipe(['zcat'], "data.tar.gz decompression")
327 348
345 "control.tar.gz extraction") 366 "control.tar.gz extraction")
346 self.pipe(['zcat'], "control.tar.gz decompression") 367 self.pipe(['zcat'], "control.tar.gz decompression")
347 368
348 369
349 class GemExtractor(TarExtractor): 370 class GemExtractor(TarExtractor):
371 file_type = 'Ruby gem'
372
350 def prepare(self): 373 def prepare(self):
351 self.pipe(['tar', '-xO', 'data.tar.gz'], "data.tar.gz extraction") 374 self.pipe(['tar', '-xO', 'data.tar.gz'], "data.tar.gz extraction")
352 self.pipe(['zcat'], "data.tar.gz decompression") 375 self.pipe(['zcat'], "data.tar.gz decompression")
353 376
354 def check_contents(self): 377 def check_contents(self):
355 self.check_included_archives(os.listdir('.')) 378 self.check_included_archives(os.listdir('.'))
356 self.content_type = BOMB 379 self.content_type = BOMB
357 380
358 381
359 class GemMetadataExtractor(CompressionExtractor): 382 class GemMetadataExtractor(CompressionExtractor):
383 file_type = 'Ruby gem'
384
360 def prepare(self): 385 def prepare(self):
361 self.pipe(['tar', '-xO', 'metadata.gz'], "metadata.gz extraction") 386 self.pipe(['tar', '-xO', 'metadata.gz'], "metadata.gz extraction")
362 self.pipe(['zcat'], "metadata.gz decompression") 387 self.pipe(['zcat'], "metadata.gz decompression")
363 388
364 def basename(self): 389 def basename(self):
381 BaseExtractor.__init__(self, '/dev/null', None) 406 BaseExtractor.__init__(self, '/dev/null', None)
382 self.filename = os.path.realpath(filename) 407 self.filename = os.path.realpath(filename)
383 408
384 409
385 class ZipExtractor(NoPipeExtractor): 410 class ZipExtractor(NoPipeExtractor):
411 file_type = 'Zip file'
412
386 def get_filenames(self): 413 def get_filenames(self):
387 self.pipe(['zipinfo', '-1', self.filename], "listing") 414 self.pipe(['zipinfo', '-1', self.filename], "listing")
388 return BaseExtractor.get_filenames(self) 415 return BaseExtractor.get_filenames(self)
389 416
390 def extract_archive(self): 417 def extract_archive(self):
391 self.pipe(['unzip', '-q', self.filename]) 418 self.pipe(['unzip', '-q', self.filename])
392 self.run_pipes() 419 self.run_pipes()
393 420
394 421
395 class SevenExtractor(NoPipeExtractor): 422 class SevenExtractor(NoPipeExtractor):
423 file_type = '7z file'
396 border_re = re.compile('^[- ]+$') 424 border_re = re.compile('^[- ]+$')
397 425
398 def get_filenames(self): 426 def get_filenames(self):
399 self.pipe(['7z', 'l', self.filename], "listing") 427 self.pipe(['7z', 'l', self.filename], "listing")
400 self.run_pipes() 428 self.run_pipes()
414 self.pipe(['7z', 'x', self.filename]) 442 self.pipe(['7z', 'x', self.filename])
415 self.run_pipes() 443 self.run_pipes()
416 444
417 445
418 class CABExtractor(NoPipeExtractor): 446 class CABExtractor(NoPipeExtractor):
447 file_type = 'CAB archive'
419 border_re = re.compile(r'^[-\+]+$') 448 border_re = re.compile(r'^[-\+]+$')
420 449
421 def get_filenames(self): 450 def get_filenames(self):
422 self.pipe(['cabextract', '-l', self.filename], "listing") 451 self.pipe(['cabextract', '-l', self.filename], "listing")
423 self.run_pipes() 452 self.run_pipes()
778 self.target = None 807 self.target = None
779 808
780 def report(self, function, *args): 809 def report(self, function, *args):
781 try: 810 try:
782 error = function(*args) 811 error = function(*args)
783 except (ExtractorError, IOError, OSError), exception: 812 except EXTRACTION_ERRORS, exception:
784 error = str(exception) 813 error = str(exception)
785 logger.debug(''.join(traceback.format_exception(*sys.exc_info()))) 814 logger.debug(''.join(traceback.format_exception(*sys.exc_info())))
786 return error 815 return error
787 816
788 817
811 840
812 841
813 class ListAction(BaseAction): 842 class ListAction(BaseAction):
814 def __init__(self, options, filenames): 843 def __init__(self, options, filenames):
815 BaseAction.__init__(self, options, filenames) 844 BaseAction.__init__(self, options, filenames)
845 self.count = 0
816 846
817 def get_list(self, extractor): 847 def get_list(self, extractor):
818 # Note: The reason I'm getting all the filenames up front is 848 # Note: The reason I'm getting all the filenames up front is
819 # because if we run into trouble partway through the archive, we'll 849 # because if we run into trouble partway through the archive, we'll
820 # try another extractor. So before we display anything we have to 850 # try another extractor. So before we display anything we have to
821 # be sure this one is successful. We maybe don't have to be quite 851 # be sure this one is successful. We maybe don't have to be quite
822 # this conservative but this is the easy way out for now. 852 # this conservative but this is the easy way out for now.
823 self.filelist = list(extractor.get_filenames()) 853 self.filelist = list(extractor.get_filenames())
824 854
825 def show_list(self, filename): 855 def show_list(self, filename):
856 self.count += 1
826 if len(self.filenames) != 1: 857 if len(self.filenames) != 1:
827 if filename != self.filenames[0]: 858 if self.count > 1:
828 print 859 print
829 print "%s:" % (filename,) 860 print "%s:" % (filename,)
830 print '\n'.join(self.filelist) 861 print '\n'.join(self.filelist)
831 862
832 def run(self, filename, extractor): 863 def run(self, filename, extractor):
833 self.current_filename = filename
834 return (self.report(self.get_list, extractor) or 864 return (self.report(self.get_list, extractor) or
835 self.report(self.show_list, filename)) 865 self.report(self.show_list, filename))
836 866
837 867
838 class ExtractorApplication(object): 868 class ExtractorApplication(object):
907 return error.strerror 937 return error.strerror
908 if stat.S_ISDIR(result.st_mode): 938 if stat.S_ISDIR(result.st_mode):
909 return "cannot extract a directory" 939 return "cannot extract a directory"
910 940
911 def try_extractors(self, filename, builder): 941 def try_extractors(self, filename, builder):
912 last_error = "could not find a way to extract this" 942 errors = []
913 while True: 943 for extractor in builder:
914 try:
915 extractor = builder.next()
916 except StopIteration:
917 return last_error
918 except (IOError, OSError, ExtractorError), error:
919 return str(error)
920 error = self.action.run(filename, extractor) 944 error = self.action.run(filename, extractor)
921 if error: 945 if error:
922 logger.info("%s: %s" % (filename, error)) 946 errors.append((extractor.file_type, extractor.encoding, error))
923 last_error = error
924 else: 947 else:
925 self.recurse(filename, extractor, self.action) 948 self.recurse(filename, extractor, self.action)
926 return 949 return
950 logger.error("could not handle %s" % (filename,))
951 if not errors:
952 logger.error("not a known archive type")
953 return True
954 for file_type, encoding, error in errors:
955 message = ["treating as", file_type, "failed:", error]
956 if encoding:
957 message.insert(1, "%s-encoded" % (encoding,))
958 logger.error(' '.join(message))
959 return True
927 960
928 def run(self): 961 def run(self):
929 if self.options.show_list: 962 if self.options.show_list:
930 action = ListAction 963 action = ListAction
931 else: 964 else:
937 for filename in self.filenames: 970 for filename in self.filenames:
938 builder = ExtractorBuilder(filename, self.options) 971 builder = ExtractorBuilder(filename, self.options)
939 error = (self.check_file(filename) or 972 error = (self.check_file(filename) or
940 self.try_extractors(filename, builder.get_extractor())) 973 self.try_extractors(filename, builder.get_extractor()))
941 if error: 974 if error:
942 logger.error("%s: %s" % (filename, error)) 975 if error != True:
976 logger.error("%s: %s" % (filename, error))
943 self.failures.append(filename) 977 self.failures.append(filename)
944 else: 978 else:
945 self.successes.append(filename) 979 self.successes.append(filename)
946 self.options.one_entry_policy.permanent_policy = EXTRACT_WRAP 980 self.options.one_entry_policy.permanent_policy = EXTRACT_WRAP
947 if self.failures: 981 if self.failures:

mercurial