44 |
44 |
45 MATCHING_DIRECTORY = 1 |
45 MATCHING_DIRECTORY = 1 |
46 # ONE_DIRECTORY = 2 |
46 # ONE_DIRECTORY = 2 |
47 BOMB = 3 |
47 BOMB = 3 |
48 EMPTY = 4 |
48 EMPTY = 4 |
|
49 DECOMPRESSED = 5 |
49 |
50 |
50 mimetypes.encodings_map.setdefault('.bz2', 'bzip2') |
51 mimetypes.encodings_map.setdefault('.bz2', 'bzip2') |
51 mimetypes.types_map['.exe'] = 'application/x-msdos-program' |
52 mimetypes.types_map['.exe'] = 'application/x-msdos-program' |
|
53 |
|
54 def run_command(command, description, stdout=None, stderr=None, stdin=None): |
|
55 process = subprocess.Popen(command, stdin=stdin, stdout=stdout, |
|
56 stderr=stderr) |
|
57 status = process.wait() |
|
58 for pipe in (process.stdout, process.stderr): |
|
59 try: |
|
60 pipe.close() |
|
61 except AttributeError: |
|
62 pass |
|
63 if status != 0: |
|
64 return ("%s error: '%s' returned status code %s" % |
|
65 (description, ' '.join(command), status)) |
|
66 return None |
|
67 |
|
68 class FilenameChecker(object): |
|
69 def __init__(self, original_name): |
|
70 self.original_name = original_name |
|
71 |
|
72 def is_free(self, filename=None): |
|
73 if filename is None: |
|
74 filename = self.original_name |
|
75 return self._is_free(filename) |
|
76 |
|
77 def _is_free(self, filename): |
|
78 return not os.path.exists(filename) |
|
79 |
|
80 def check(self): |
|
81 for suffix in [''] + ['.%s' % (x,) for x in range(1, 10)]: |
|
82 filename = '%s%s' % (self.original_name, suffix) |
|
83 if self.is_free(filename): |
|
84 return filename |
|
85 raise ValueError("all alternatives for name %s taken" % |
|
86 (self.original_name,)) |
|
87 |
|
88 |
|
89 class DirectoryChecker(FilenameChecker): |
|
90 def _is_free(self, filename): |
|
91 try: |
|
92 os.mkdir(filename) |
|
93 except OSError, error: |
|
94 if error.errno == errno.EEXIST: |
|
95 return False |
|
96 raise |
|
97 return True |
|
98 |
52 |
99 |
53 class ExtractorError(Exception): |
100 class ExtractorError(Exception): |
54 pass |
101 pass |
55 |
102 |
56 |
103 |
105 self.pipe([self.decoders[encoding]], "decoding") |
154 self.pipe([self.decoders[encoding]], "decoding") |
106 self.prepare() |
155 self.prepare() |
107 |
156 |
108 def run(self, command, description="extraction", stdout=None, stderr=None, |
157 def run(self, command, description="extraction", stdout=None, stderr=None, |
109 stdin=None): |
158 stdin=None): |
110 process = subprocess.Popen(command, stdin=stdin, stdout=stdout, |
159 error = run_command(command, description, stdout, stderr, stdin) |
111 stderr=stderr) |
160 if error: |
112 status = process.wait() |
161 raise ExtractorError(error) |
113 if status != 0: |
|
114 raise ExtractorError("%s error: '%s' returned status code %s" % |
|
115 (description, ' '.join(command), status)) |
|
116 for pipe in (process.stdout, process.stderr): |
|
117 try: |
|
118 pipe.close() |
|
119 except AttributeError: |
|
120 pass |
|
121 |
162 |
122 def pipe(self, command, description, stderr=None): |
163 def pipe(self, command, description, stderr=None): |
123 output = tempfile.TemporaryFile() |
164 output = tempfile.TemporaryFile() |
124 self.run(command, description, output, stderr, self.archive) |
165 self.run(command, description, output, stderr, self.archive) |
125 self.archive.close() |
166 self.archive.close() |
164 mimetypes.suffix_map.has_key(extension)): |
205 mimetypes.suffix_map.has_key(extension)): |
165 pieces.pop() |
206 pieces.pop() |
166 return '.'.join(pieces) |
207 return '.'.join(pieces) |
167 |
208 |
168 def extract(self, path): |
209 def extract(self, path): |
|
210 old_path = os.path.realpath(os.curdir) |
|
211 os.chdir(path) |
169 self.archive.seek(0, 0) |
212 self.archive.seek(0, 0) |
170 self.extract_archive() |
213 self.extract_archive() |
|
214 os.chdir(old_path) |
171 |
215 |
172 |
216 |
173 class TarExtractor(BaseExtractor): |
217 class TarExtractor(BaseExtractor): |
174 def get_filenames(self): |
218 def get_filenames(self): |
175 return ProcessStreamer(['tar', '-t'], self.archive) |
219 return ProcessStreamer(['tar', '-t'], self.archive) |
176 |
220 |
177 def extract_archive(self): |
221 def extract_archive(self): |
178 self.run(['tar', '-x'], stdin=self.archive) |
222 self.run(['tar', '-x'], stdin=self.archive) |
179 |
223 |
180 |
224 |
181 class ZipExtractor(BaseExtractor): |
225 class ZipExtractor(BaseExtractor): |
182 def __init__(self, filename, mimetype, encoding): |
226 def __init__(self, filename, mimetype, encoding): |
183 self.filename = filename |
227 self.filename = os.path.realpath(filename) |
184 self.mimetype = mimetype |
228 self.mimetype = mimetype |
185 self.encoding = encoding |
229 self.encoding = encoding |
186 self.included_archives = [] |
230 self.included_archives = [] |
187 self.archive = StringIO() |
231 self.archive = StringIO() |
188 |
232 |
189 def get_filenames(self): |
233 def get_filenames(self): |
190 return ProcessStreamer(['zipinfo', '-1', self.filename], None) |
234 return ProcessStreamer(['zipinfo', '-1', self.filename], None) |
191 |
235 |
192 def extract(self, path): |
236 def extract_archive(self): |
193 self.run(['unzip', '-q', os.path.join(path, self.filename)]) |
237 self.run(['unzip', '-q', self.filename]) |
194 |
238 |
195 |
239 |
196 class CpioExtractor(BaseExtractor): |
240 class CpioExtractor(BaseExtractor): |
197 def get_filenames(self): |
241 def get_filenames(self): |
198 return ProcessStreamer(['cpio', '-t'], self.archive, |
242 return ProcessStreamer(['cpio', '-t'], self.archive, |
245 def check_contents(self): |
289 def check_contents(self): |
246 TarExtractor.check_contents(self) |
290 TarExtractor.check_contents(self) |
247 return BOMB |
291 return BOMB |
248 |
292 |
249 |
293 |
250 class MatchHandler(object): |
294 class CompressionExtractor(BaseExtractor): |
251 def __init__(self, extractor, contents): |
295 name_checker = FilenameChecker |
|
296 |
|
297 def basename(self): |
|
298 pieces = os.path.basename(self.filename).split('.') |
|
299 extension = '.' + pieces[-1] |
|
300 if mimetypes.encodings_map.has_key(extension): |
|
301 pieces.pop() |
|
302 return '.'.join(pieces) |
|
303 |
|
304 def suggest_target(self): |
|
305 return FilenameChecker().check(self.basename()) |
|
306 |
|
307 def check_contents(self): |
|
308 return self.basename() |
|
309 |
|
310 def extract(self, path): |
|
311 output = open(path, 'w') |
|
312 self.run(['cat'], "output write", stdin=self.archive, stdout=output) |
|
313 output.close() |
|
314 |
|
315 |
|
316 class BaseHandler(object): |
|
317 def __init__(self, extractor, contents, options): |
252 self.logger = logging.getLogger('x-log') |
318 self.logger = logging.getLogger('x-log') |
253 self.extractor = extractor |
319 self.extractor = extractor |
254 self.contents = contents |
320 self.contents = contents |
255 self.directory = extractor.basename() |
321 self.options = options |
256 |
322 |
257 def extract(self, directory='.'): |
323 def extract(self): |
|
324 checker = self.extractor.name_checker(self.extractor.basename()) |
|
325 if self.options.overwrite or checker.is_free(): |
|
326 self.target = self.extractor.basename() |
|
327 self.overwrite() |
|
328 else: |
|
329 self.target = checker.check() |
|
330 self.safe_extract() |
|
331 |
|
332 def do_extract(self, directory): |
258 try: |
333 try: |
259 self.extractor.extract(directory) |
334 self.extractor.extract(directory) |
260 except ExtractorError, error: |
335 except ExtractorError, error: |
261 return error.strerror |
336 return error.strerror |
262 |
337 |
263 def cleanup(self): |
338 def cleanup(self): |
264 command = 'chmod' |
339 command = 'find' |
265 status = subprocess.call(['chmod', '-R', 'u+rw', self.directory]) |
340 status = subprocess.call(['find', self.target, '-type', 'd', |
|
341 '-exec', 'chmod', 'u+rwx', '{}', ';']) |
266 if status == 0: |
342 if status == 0: |
267 command = 'find' |
343 command = 'chmod' |
268 status = subprocess.call(['find', self.directory, '-type', 'd', |
344 status = subprocess.call(['chmod', '-R', 'u+rw', self.target]) |
269 '-exec', 'chmod', 'u+x', '{}', ';']) |
|
270 if status != 0: |
345 if status != 0: |
271 return "%s returned with exit status %s" % (command, status) |
346 return "%s returned with exit status %s" % (command, status) |
272 |
347 |
273 |
348 |
274 class BombHandler(MatchHandler): |
349 class MatchHandler(BaseHandler): |
275 def __init__(self, extractor, contents): |
350 def overwrite(self): |
276 MatchHandler.__init__(self, extractor, contents) |
351 return self.do_extract('.') |
277 basename = self.directory |
352 |
278 for suffix in [''] + ['.%s' % (x,) for x in range(1, 10)]: |
353 def safe_extract(self): |
279 self.directory = '%s%s' % (basename, suffix) |
354 tempdir = tempfile.mkdtemp() |
280 try: |
355 result = self.do_extract(tempdir) |
281 os.mkdir(self.directory) |
356 if result is None: |
282 except OSError, error: |
357 os.rename(os.path.join(tempdir, self.extractor.basename()), |
283 if error.errno == errno.EEXIST: |
358 self.target) |
284 continue |
359 os.rmdir(tempdir) |
285 raise ValueError("could not make extraction directory %s: %s" % |
360 return result |
286 (error.filename, error.strerror)) |
361 |
287 if suffix != '': |
362 |
288 self.logger.warning("%s: extracted to %s", |
363 class BombHandler(BaseHandler): |
289 extractor.filename, self.directory) |
364 def safe_extract(self): |
290 break |
365 return self.do_extract(self.target) |
291 else: |
366 |
292 raise ValueError("all good names for an extraction directory taken") |
367 def overwrite(self): |
293 |
368 self.target = self.extractor.basename() |
294 def extract(self): |
369 return self.do_extract(self.target) |
295 os.chdir(self.directory) |
370 |
296 return MatchHandler.extract(self, '..') |
|
297 |
|
298 def cleanup(self): |
|
299 os.chdir('..') |
|
300 return MatchHandler.cleanup(self) |
|
301 |
|
302 |
371 |
303 class EmptyHandler(object): |
372 class EmptyHandler(object): |
304 def __init__(self, extractor, contents): pass |
373 def __init__(self, extractor, contents, options): pass |
305 def extract(self): pass |
374 def extract(self): pass |
306 def cleanup(self): pass |
375 def cleanup(self): pass |
|
376 |
307 |
377 |
308 extractor_map = {'application/x-tar': TarExtractor, |
378 extractor_map = {'application/x-tar': TarExtractor, |
309 'application/zip': ZipExtractor, |
379 'application/zip': ZipExtractor, |
310 'application/x-msdos-program': ZipExtractor, |
380 'application/x-msdos-program': ZipExtractor, |
311 'application/x-debian-package': DebExtractor, |
381 'application/x-debian-package': DebExtractor, |
312 'application/x-redhat-package-manager': RPMExtractor, |
382 'application/x-redhat-package-manager': RPMExtractor, |
313 'application/x-rpm': RPMExtractor, |
383 'application/x-rpm': RPMExtractor, |
314 'application/x-cpio': CpioExtractor} |
384 'application/x-cpio': CpioExtractor} |
315 |
385 |
316 handler_map = {EMPTY: EmptyHandler, |
386 handler_map = {EMPTY: EmptyHandler, |
317 MATCHING_DIRECTORY: MatchHandler} |
387 MATCHING_DIRECTORY: MatchHandler, |
|
388 DECOMPRESSED: BombHandler} |
318 |
389 |
319 class ExtractorApplication(object): |
390 class ExtractorApplication(object): |
320 def __init__(self, arguments): |
391 def __init__(self, arguments): |
321 self.parse_options(arguments) |
392 self.parse_options(arguments) |
322 self.setup_logger() |
393 self.setup_logger() |
336 action='count', default=3, |
407 action='count', default=3, |
337 help='suppress warning/error messages') |
408 help='suppress warning/error messages') |
338 parser.add_option('-v', '--verbose', dest='verbose', |
409 parser.add_option('-v', '--verbose', dest='verbose', |
339 action='count', default=0, |
410 action='count', default=0, |
340 help='be verbose/print debugging information') |
411 help='be verbose/print debugging information') |
|
412 parser.add_option('-o', '--overwrite', dest='overwrite', |
|
413 action='store_true', default=False, |
|
414 help='overwrite any existing target directory') |
|
415 ## parser.add_option('-f', '--flat', '--no-directory', dest='flat', |
|
416 ## action='store_true', default=False, |
|
417 ## help="don't put contents in their own directory") |
|
418 ## parser.add_option('-n', '--noninteractive', dest='batch', |
|
419 ## action='store_true', default=False, |
|
420 ## help="don't ask how to handle special cases") |
341 self.options, filenames = parser.parse_args(arguments) |
421 self.options, filenames = parser.parse_args(arguments) |
342 if not filenames: |
422 if not filenames: |
343 parser.error("you did not list any archives") |
423 parser.error("you did not list any archives") |
344 self.archives = {os.path.realpath(os.curdir): filenames} |
424 self.archives = {os.path.realpath(os.curdir): filenames} |
345 |
425 |
355 def get_extractor(self): |
435 def get_extractor(self): |
356 mimetype, encoding = mimetypes.guess_type(self.current_filename) |
436 mimetype, encoding = mimetypes.guess_type(self.current_filename) |
357 try: |
437 try: |
358 extractor = extractor_map[mimetype] |
438 extractor = extractor_map[mimetype] |
359 except KeyError: |
439 except KeyError: |
360 return "not a known archive type" |
440 if encoding: |
|
441 extractor = CompressionExtractor |
|
442 else: |
|
443 return "not a known archive type" |
361 try: |
444 try: |
362 self.current_extractor = extractor(self.current_filename, mimetype, |
445 self.current_extractor = extractor(self.current_filename, mimetype, |
363 encoding) |
446 encoding) |
364 content = self.current_extractor.check_contents() |
447 content = self.current_extractor.check_contents() |
365 handler = handler_map.get(content, BombHandler) |
448 handler = handler_map.get(content, BombHandler) |
366 self.current_handler = handler(self.current_extractor, content) |
449 self.current_handler = handler(self.current_extractor, content, |
|
450 self.options) |
367 except ExtractorError, error: |
451 except ExtractorError, error: |
368 return str(error) |
452 return str(error) |
369 |
453 |
370 def recurse(self): |
454 def recurse(self): |
371 if not self.options.recursive: |
455 if not self.options.recursive: |
372 return |
456 return |
373 for filename in self.current_extractor.included_archives: |
457 for filename in self.current_extractor.included_archives: |
374 tail_path, basename = os.path.split(filename) |
458 tail_path, basename = os.path.split(filename) |
375 directory = os.path.join(self.current_directory, |
459 directory = os.path.join(self.current_directory, |
376 self.current_handler.directory, tail_path) |
460 self.current_handler.target, tail_path) |
377 self.archives.setdefault(directory, []).append(basename) |
461 self.archives.setdefault(directory, []).append(basename) |
378 |
462 |
379 def report(self, function, *args): |
463 def report(self, function, *args): |
380 error = function(*args) |
464 error = function(*args) |
381 if error: |
465 if error: |