Sun, 29 Apr 2007 15:25:04 -0400
[svn] Little DRY cleanups.
1 | 1 | #!/usr/bin/env python |
2 | # | |
19 | 3 | # dtrx -- Intelligently extract various archive types. |
23
039dd321a7d0
[svn] If an archive contains other archives, and the user didn't specify that
brett
parents:
22
diff
changeset
|
4 | # Copyright (c) 2006, 2007 Brett Smith <brettcsmith@brettcsmith.org>. |
1 | 5 | # |
6 | # This program is free software; you can redistribute it and/or modify it | |
7 | # under the terms of the GNU General Public License as published by the | |
8 | # Free Software Foundation; either version 2 of the License, or (at your | |
9 | # option) any later version. | |
10 | # | |
11 | # This program is distributed in the hope that it will be useful, but | |
12 | # WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General | |
14 | # Public License for more details. | |
15 | # | |
16 | # You should have received a copy of the GNU General Public License along | |
17 | # with this program; if not, write to the Free Software Foundation, Inc., | |
18 | # 51 Franklin Street, 5th Floor, Boston, MA, 02111. | |
19 | ||
5 | 20 | import errno |
12
5d202467c589
[svn] Introduce a real logging system. Right now all this really gets us is the
brett
parents:
11
diff
changeset
|
21 | import logging |
1 | 22 | import mimetypes |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
23 | import optparse |
1 | 24 | import os |
15
28dbd52a8bb8
[svn] Add a -f/--flat option, which will extract the archive contents into the
brett
parents:
14
diff
changeset
|
25 | import stat |
1 | 26 | import subprocess |
27 | import sys | |
28 | import tempfile | |
20
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
29 | import textwrap |
1 | 30 | |
31 | from cStringIO import StringIO | |
32 | ||
19 | 33 | VERSION = "4.0" |
34 | VERSION_BANNER = """dtrx version %s | |
23
039dd321a7d0
[svn] If an archive contains other archives, and the user didn't specify that
brett
parents:
22
diff
changeset
|
35 | Copyright (c) 2006, 2007 Brett Smith <brettcsmith@brettcsmith.org> |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
36 | |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
37 | This program is free software; you can redistribute it and/or modify it |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
38 | under the terms of the GNU General Public License as published by the |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
39 | Free Software Foundation; either version 2 of the License, or (at your |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
40 | option) any later version. |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
41 | |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
42 | This program is distributed in the hope that it will be useful, but |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
43 | WITHOUT ANY WARRANTY; without even the implied warranty of |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
44 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
45 | Public License for more details.""" % (VERSION,) |
1 | 46 | |
47 | MATCHING_DIRECTORY = 1 | |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
48 | ONE_ENTRY = 2 |
1 | 49 | BOMB = 3 |
50 | EMPTY = 4 | |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
51 | ONE_ENTRY_KNOWN = 5 |
1 | 52 | |
20
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
53 | EXTRACT_HERE = 1 |
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
54 | EXTRACT_WRAP = 2 |
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
55 | EXTRACT_RENAME = 3 |
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
56 | |
23
039dd321a7d0
[svn] If an archive contains other archives, and the user didn't specify that
brett
parents:
22
diff
changeset
|
57 | RECURSE_ALWAYS = 1 |
039dd321a7d0
[svn] If an archive contains other archives, and the user didn't specify that
brett
parents:
22
diff
changeset
|
58 | RECURSE_ONCE = 2 |
039dd321a7d0
[svn] If an archive contains other archives, and the user didn't specify that
brett
parents:
22
diff
changeset
|
59 | RECURSE_NOT_NOW = 3 |
039dd321a7d0
[svn] If an archive contains other archives, and the user didn't specify that
brett
parents:
22
diff
changeset
|
60 | RECURSE_NEVER = 4 |
039dd321a7d0
[svn] If an archive contains other archives, and the user didn't specify that
brett
parents:
22
diff
changeset
|
61 | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
62 | mimetypes.encodings_map.setdefault('.bz2', 'bzip2') |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
63 | mimetypes.types_map['.exe'] = 'application/x-msdos-program' |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
64 | |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
65 | def run_command(command, description, stdout=None, stderr=None, stdin=None): |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
66 | process = subprocess.Popen(command, stdin=stdin, stdout=stdout, |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
67 | stderr=stderr) |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
68 | status = process.wait() |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
69 | for pipe in (process.stdout, process.stderr): |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
70 | try: |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
71 | pipe.close() |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
72 | except AttributeError: |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
73 | pass |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
74 | if status != 0: |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
75 | return ("%s error: '%s' returned status code %s" % |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
76 | (description, ' '.join(command), status)) |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
77 | return None |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
78 | |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
79 | class FilenameChecker(object): |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
80 | def __init__(self, original_name): |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
81 | self.original_name = original_name |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
82 | |
17
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
83 | def is_free(self, filename): |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
84 | return not os.path.exists(filename) |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
85 | |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
86 | def check(self): |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
87 | for suffix in [''] + ['.%s' % (x,) for x in range(1, 10)]: |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
88 | filename = '%s%s' % (self.original_name, suffix) |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
89 | if self.is_free(filename): |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
90 | return filename |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
91 | raise ValueError("all alternatives for name %s taken" % |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
92 | (self.original_name,)) |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
93 | |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
94 | |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
95 | class DirectoryChecker(FilenameChecker): |
17
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
96 | def is_free(self, filename): |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
97 | try: |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
98 | os.mkdir(filename) |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
99 | except OSError, error: |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
100 | if error.errno == errno.EEXIST: |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
101 | return False |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
102 | raise |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
103 | return True |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
104 | |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
105 | |
1 | 106 | class ExtractorError(Exception): |
107 | pass | |
108 | ||
109 | ||
110 | class ProcessStreamer(object): | |
111 | def __init__(self, command, stdin, description="checking contents", | |
112 | stderr=None): | |
113 | self.process = subprocess.Popen(command, bufsize=1, stdin=stdin, | |
114 | stdout=subprocess.PIPE, stderr=stderr) | |
115 | self.command = ' '.join(command) | |
116 | self.description = description | |
117 | ||
118 | def __iter__(self): | |
119 | return self | |
120 | ||
121 | def next(self): | |
122 | line = self.process.stdout.readline() | |
123 | if line: | |
124 | return line.rstrip('\n') | |
125 | else: | |
126 | raise StopIteration | |
127 | ||
128 | def stop(self): | |
129 | while self.process.stdout.readline(): | |
130 | pass | |
131 | self.process.stdout.close() | |
132 | status = self.process.wait() | |
133 | if status != 0: | |
134 | raise ExtractorError("%s error: '%s' returned status code %s" % | |
135 | (self.description, self.command, status)) | |
136 | try: | |
137 | self.process.stderr.close() | |
138 | except AttributeError: | |
139 | pass | |
140 | ||
141 | ||
142 | class BaseExtractor(object): | |
143 | decoders = {'bzip2': 'bzcat', 'gzip': 'zcat', 'compress': 'zcat'} | |
144 | ||
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
145 | name_checker = DirectoryChecker |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
146 | |
1 | 147 | def __init__(self, filename, mimetype, encoding): |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
148 | if encoding and (not self.decoders.has_key(encoding)): |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
149 | raise ValueError("unrecognized encoding %s" % (encoding,)) |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
150 | self.filename = os.path.realpath(filename) |
1 | 151 | self.mimetype = mimetype |
152 | self.encoding = encoding | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
153 | self.included_archives = [] |
5 | 154 | try: |
155 | self.archive = open(filename, 'r') | |
156 | except (IOError, OSError), error: | |
157 | raise ExtractorError("could not open %s: %s" % | |
158 | (filename, error.strerror)) | |
1 | 159 | if encoding: |
160 | self.pipe([self.decoders[encoding]], "decoding") | |
161 | self.prepare() | |
162 | ||
163 | def run(self, command, description="extraction", stdout=None, stderr=None, | |
5 | 164 | stdin=None): |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
165 | error = run_command(command, description, stdout, stderr, stdin) |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
166 | if error: |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
167 | raise ExtractorError(error) |
1 | 168 | |
169 | def pipe(self, command, description, stderr=None): | |
170 | output = tempfile.TemporaryFile() | |
5 | 171 | self.run(command, description, output, stderr, self.archive) |
1 | 172 | self.archive.close() |
173 | self.archive = output | |
174 | self.archive.flush() | |
175 | ||
176 | def prepare(self): | |
177 | pass | |
178 | ||
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
179 | def check_included_archive(self, filename): |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
180 | if extractor_map.has_key(mimetypes.guess_type(filename)[0]): |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
181 | self.included_archives.append(filename) |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
182 | |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
183 | def check_first_filename(self, filenames): |
1 | 184 | try: |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
185 | first_filename = filenames.next() |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
186 | except StopIteration: |
1 | 187 | filenames.stop() |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
188 | return (None, None) |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
189 | self.check_included_archive(first_filename) |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
190 | parts = first_filename.split('/') |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
191 | first_part = [parts[0]] |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
192 | if parts[0] == '.': |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
193 | first_part.append(parts[1]) |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
194 | return (first_filename, '/'.join(first_part + [''])) |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
195 | |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
196 | def check_second_filename(self, filenames, first_part, first_filename): |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
197 | try: |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
198 | filename = filenames.next() |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
199 | except StopIteration: |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
200 | return ONE_ENTRY, first_filename |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
201 | self.check_included_archive(filename) |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
202 | if not filename.startswith(first_part): |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
203 | return BOMB, None |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
204 | return None, first_part |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
205 | |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
206 | def check_contents(self): |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
207 | filenames = self.get_filenames() |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
208 | first_filename, first_part = self.check_first_filename(filenames) |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
209 | if first_filename is None: |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
210 | return (EMPTY, None) |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
211 | archive_type, type_info = self.check_second_filename(filenames, |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
212 | first_part, |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
213 | first_filename) |
1 | 214 | for filename in filenames: |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
215 | self.check_included_archive(filename) |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
216 | if (archive_type != BOMB) and (not filename.startswith(first_part)): |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
217 | archive_type = BOMB |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
218 | type_info = None |
1 | 219 | filenames.stop() |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
220 | if archive_type is None: |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
221 | if self.basename() == first_part[:-1]: |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
222 | archive_type = MATCHING_DIRECTORY |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
223 | else: |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
224 | archive_type = ONE_ENTRY |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
225 | return archive_type, type_info |
1 | 226 | |
227 | def basename(self): | |
5 | 228 | pieces = os.path.basename(self.filename).split('.') |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
229 | extension = '.' + pieces[-1] |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
230 | if mimetypes.encodings_map.has_key(extension): |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
231 | pieces.pop() |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
232 | extension = '.' + pieces[-1] |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
233 | if (mimetypes.types_map.has_key(extension) or |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
234 | mimetypes.common_types.has_key(extension) or |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
235 | mimetypes.suffix_map.has_key(extension)): |
1 | 236 | pieces.pop() |
237 | return '.'.join(pieces) | |
238 | ||
239 | def extract(self, path): | |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
240 | old_path = os.path.realpath(os.curdir) |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
241 | os.chdir(path) |
1 | 242 | self.archive.seek(0, 0) |
243 | self.extract_archive() | |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
244 | os.chdir(old_path) |
1 | 245 | |
246 | ||
247 | class TarExtractor(BaseExtractor): | |
248 | def get_filenames(self): | |
15
28dbd52a8bb8
[svn] Add a -f/--flat option, which will extract the archive contents into the
brett
parents:
14
diff
changeset
|
249 | self.archive.seek(0, 0) |
1 | 250 | return ProcessStreamer(['tar', '-t'], self.archive) |
251 | ||
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
252 | def extract_archive(self): |
1 | 253 | self.run(['tar', '-x'], stdin=self.archive) |
254 | ||
255 | ||
256 | class ZipExtractor(BaseExtractor): | |
257 | def __init__(self, filename, mimetype, encoding): | |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
258 | self.filename = os.path.realpath(filename) |
1 | 259 | self.mimetype = mimetype |
260 | self.encoding = encoding | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
261 | self.included_archives = [] |
1 | 262 | self.archive = StringIO() |
263 | ||
264 | def get_filenames(self): | |
15
28dbd52a8bb8
[svn] Add a -f/--flat option, which will extract the archive contents into the
brett
parents:
14
diff
changeset
|
265 | self.archive.seek(0, 0) |
1 | 266 | return ProcessStreamer(['zipinfo', '-1', self.filename], None) |
267 | ||
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
268 | def extract_archive(self): |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
269 | self.run(['unzip', '-q', self.filename]) |
1 | 270 | |
271 | ||
272 | class CpioExtractor(BaseExtractor): | |
273 | def get_filenames(self): | |
15
28dbd52a8bb8
[svn] Add a -f/--flat option, which will extract the archive contents into the
brett
parents:
14
diff
changeset
|
274 | self.archive.seek(0, 0) |
1 | 275 | return ProcessStreamer(['cpio', '-t'], self.archive, |
276 | stderr=subprocess.PIPE) | |
277 | ||
278 | def extract_archive(self): | |
279 | self.run(['cpio', '-i', '--make-directories', | |
280 | '--no-absolute-filenames'], | |
281 | stderr=subprocess.PIPE, stdin=self.archive) | |
282 | ||
283 | ||
284 | class RPMExtractor(CpioExtractor): | |
285 | def prepare(self): | |
286 | self.pipe(['rpm2cpio', '-'], "rpm2cpio") | |
287 | ||
288 | def basename(self): | |
9
920417b8acc9
[svn] Fix issues with basename methods. First, string's rsplit method only
brett
parents:
8
diff
changeset
|
289 | pieces = os.path.basename(self.filename).split('.') |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
290 | if len(pieces) == 1: |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
291 | return pieces[0] |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
292 | elif pieces[-1] != 'rpm': |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
293 | return BaseExtractor.basename(self) |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
294 | pieces.pop() |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
295 | if len(pieces) == 1: |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
296 | return pieces[0] |
9
920417b8acc9
[svn] Fix issues with basename methods. First, string's rsplit method only
brett
parents:
8
diff
changeset
|
297 | elif len(pieces[-1]) < 8: |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
298 | pieces.pop() |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
299 | return '.'.join(pieces) |
1 | 300 | |
301 | def check_contents(self): | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
302 | CpioExtractor.check_contents(self) |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
303 | return (BOMB, None) |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
304 | |
1 | 305 | |
306 | class DebExtractor(TarExtractor): | |
307 | def prepare(self): | |
308 | self.pipe(['ar', 'p', self.filename, 'data.tar.gz'], | |
309 | "data.tar.gz extraction") | |
310 | self.archive.seek(0, 0) | |
311 | self.pipe(['zcat'], "data.tar.gz decompression") | |
312 | ||
313 | def basename(self): | |
9
920417b8acc9
[svn] Fix issues with basename methods. First, string's rsplit method only
brett
parents:
8
diff
changeset
|
314 | pieces = os.path.basename(self.filename).split('_') |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
315 | if len(pieces) == 1: |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
316 | return pieces[0] |
9
920417b8acc9
[svn] Fix issues with basename methods. First, string's rsplit method only
brett
parents:
8
diff
changeset
|
317 | last_piece = pieces.pop() |
920417b8acc9
[svn] Fix issues with basename methods. First, string's rsplit method only
brett
parents:
8
diff
changeset
|
318 | if (len(last_piece) > 10) or (not last_piece.endswith('.deb')): |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
319 | return BaseExtractor.basename(self) |
9
920417b8acc9
[svn] Fix issues with basename methods. First, string's rsplit method only
brett
parents:
8
diff
changeset
|
320 | return '_'.join(pieces) |
1 | 321 | |
322 | def check_contents(self): | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
323 | TarExtractor.check_contents(self) |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
324 | return (BOMB, None) |
1 | 325 | |
326 | ||
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
327 | class CompressionExtractor(BaseExtractor): |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
328 | name_checker = FilenameChecker |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
329 | |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
330 | def basename(self): |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
331 | pieces = os.path.basename(self.filename).split('.') |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
332 | extension = '.' + pieces[-1] |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
333 | if mimetypes.encodings_map.has_key(extension): |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
334 | pieces.pop() |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
335 | return '.'.join(pieces) |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
336 | |
15
28dbd52a8bb8
[svn] Add a -f/--flat option, which will extract the archive contents into the
brett
parents:
14
diff
changeset
|
337 | def get_filenames(self): |
28dbd52a8bb8
[svn] Add a -f/--flat option, which will extract the archive contents into the
brett
parents:
14
diff
changeset
|
338 | yield self.basename() |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
339 | |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
340 | def check_contents(self): |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
341 | return (ONE_ENTRY_KNOWN, self.basename()) |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
342 | |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
343 | def extract(self, path): |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
344 | output = open(path, 'w') |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
345 | self.archive.seek(0, 0) |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
346 | self.run(['cat'], "output write", stdin=self.archive, stdout=output) |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
347 | output.close() |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
348 | |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
349 | |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
350 | class BaseHandler(object): |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
351 | def __init__(self, extractor, contents, content_name, options): |
19 | 352 | self.logger = logging.getLogger('dtrx-log') |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
353 | self.extractor = extractor |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
354 | self.contents = contents |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
355 | self.content_name = content_name |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
356 | self.options = options |
17
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
357 | self.target = None |
16
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
358 | |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
359 | def extract(self): |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
360 | try: |
16
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
361 | self.extractor.extract(self.target) |
17
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
362 | except (ExtractorError, IOError, OSError), error: |
15
28dbd52a8bb8
[svn] Add a -f/--flat option, which will extract the archive contents into the
brett
parents:
14
diff
changeset
|
363 | return str(error) |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
364 | |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
365 | def cleanup(self): |
17
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
366 | if self.target is None: |
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
367 | return |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
368 | command = 'find' |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
369 | status = subprocess.call(['find', self.target, '-type', 'd', |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
370 | '-exec', 'chmod', 'u+rwx', '{}', ';']) |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
371 | if status == 0: |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
372 | command = 'chmod' |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
373 | status = subprocess.call(['chmod', '-R', 'u+rw', self.target]) |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
374 | if status != 0: |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
375 | return "%s returned with exit status %s" % (command, status) |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
376 | |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
377 | |
16
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
378 | # The "where to extract" table, with options and archive types. |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
379 | # This dictates the contents of each can_handle method. |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
380 | # |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
381 | # Flat Overwrite None |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
382 | # File basename basename FilenameChecked |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
383 | # Match . . tempdir + checked |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
384 | # Bomb . basename DirectoryChecked |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
385 | |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
386 | class FlatHandler(BaseHandler): |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
387 | def can_handle(contents, options): |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
388 | return ((options.flat and (contents != ONE_ENTRY_KNOWN)) or |
16
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
389 | (options.overwrite and (contents == MATCHING_DIRECTORY))) |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
390 | can_handle = staticmethod(can_handle) |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
391 | |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
392 | def __init__(self, extractor, contents, content_name, options): |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
393 | BaseHandler.__init__(self, extractor, contents, content_name, options) |
17
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
394 | self.target = '.' |
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
395 | |
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
396 | def cleanup(self): |
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
397 | for filename in self.extractor.get_filenames(): |
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
398 | stat_info = os.stat(filename) |
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
399 | perms = stat.S_IRUSR | stat.S_IWUSR |
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
400 | if stat.S_ISDIR(stat_info.st_mode): |
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
401 | perms |= stat.S_IXUSR |
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
402 | os.chmod(filename, stat_info.st_mode | perms) |
16
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
403 | |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
404 | |
16
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
405 | class OverwriteHandler(BaseHandler): |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
406 | def can_handle(contents, options): |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
407 | return ((options.flat and (contents == ONE_ENTRY_KNOWN)) or |
16
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
408 | (options.overwrite and (contents != MATCHING_DIRECTORY))) |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
409 | can_handle = staticmethod(can_handle) |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
410 | |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
411 | def __init__(self, extractor, contents, content_name, options): |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
412 | BaseHandler.__init__(self, extractor, contents, content_name, options) |
16
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
413 | self.target = self.extractor.basename() |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
414 | |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
415 | |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
416 | class MatchHandler(BaseHandler): |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
417 | def can_handle(contents, options): |
20
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
418 | return ((contents == MATCHING_DIRECTORY) or |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
419 | ((contents == ONE_ENTRY) and |
25
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
420 | options.one_entry_policy.ok_for_match())) |
16
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
421 | can_handle = staticmethod(can_handle) |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
422 | |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
423 | def extract(self): |
20
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
424 | if self.contents == MATCHING_DIRECTORY: |
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
425 | basename = destination = self.extractor.basename() |
25
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
426 | elif self.options.one_entry_policy == EXTRACT_HERE: |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
427 | basename = destination = self.content_name.rstrip('/') |
20
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
428 | else: |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
429 | basename = self.content_name.rstrip('/') |
20
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
430 | destination = self.extractor.basename() |
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
431 | self.target = tempdir = tempfile.mkdtemp(dir='.') |
16
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
432 | result = BaseHandler.extract(self) |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
433 | if result is None: |
20
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
434 | checker = self.extractor.name_checker(destination) |
16
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
435 | self.target = checker.check() |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
436 | os.rename(os.path.join(tempdir, basename), self.target) |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
437 | os.rmdir(tempdir) |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
438 | return result |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
439 | |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
440 | |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
441 | class EmptyHandler(object): |
16
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
442 | def can_handle(contents, options): |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
443 | return contents == EMPTY |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
444 | can_handle = staticmethod(can_handle) |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
445 | |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
446 | def __init__(self, extractor, contents, content_name, options): pass |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
447 | def extract(self): pass |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
448 | def cleanup(self): pass |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
449 | |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
450 | |
16
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
451 | class BombHandler(BaseHandler): |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
452 | def can_handle(contents, options): |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
453 | return True |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
454 | can_handle = staticmethod(can_handle) |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
455 | |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
456 | def __init__(self, extractor, contents, content_name, options): |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
457 | BaseHandler.__init__(self, extractor, contents, content_name, options) |
16
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
458 | checker = self.extractor.name_checker(self.extractor.basename()) |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
459 | self.target = checker.check() |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
460 | |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
461 | |
25
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
462 | class BasePolicy(object): |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
463 | def __init__(self, options): |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
464 | self.current_policy = None |
26 | 465 | if options.batch: |
466 | self.permanent_policy = self.answers[''] | |
467 | else: | |
468 | self.permanent_policy = None | |
25
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
469 | |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
470 | def ask_question(self, question): |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
471 | question = textwrap.wrap(question) + self.choices |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
472 | while True: |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
473 | print "\n".join(question) |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
474 | try: |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
475 | answer = raw_input(self.prompt) |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
476 | except EOFError: |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
477 | return self.answers[''] |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
478 | try: |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
479 | return self.answers[answer.lower()] |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
480 | except KeyError: |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
481 | |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
482 | |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
483 | def __cmp__(self, other): |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
484 | return cmp(self.current_policy, other) |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
485 | |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
486 | |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
487 | class OneEntryPolicy(BasePolicy): |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
488 | answers = {'h': EXTRACT_HERE, 'i': EXTRACT_WRAP, 'r': EXTRACT_RENAME, |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
489 | '': EXTRACT_WRAP} |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
490 | choices = ["You can:", |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
491 | " * extract it Inside another directory", |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
492 | " * extract it and Rename the directory", |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
493 | " * extract it Here"] |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
494 | prompt = "What do you want to do? (I/r/h) " |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
495 | |
26 | 496 | def prep(self, archive_filename, entry_name): |
497 | question = ("%s contains one entry: %s." % | |
498 | (archive_filename, entry_name)) | |
499 | self.current_policy = (self.permanent_policy or | |
500 | self.ask_question(question)) | |
25
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
501 | |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
502 | def ok_for_match(self): |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
503 | return self.current_policy in (EXTRACT_RENAME, EXTRACT_HERE) |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
504 | |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
505 | |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
506 | class RecursionPolicy(BasePolicy): |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
507 | answers = {'o': RECURSE_ONCE, 'a': RECURSE_ALWAYS, 'n': RECURSE_NOT_NOW, |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
508 | 'v': RECURSE_NEVER, '': RECURSE_NOT_NOW} |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
509 | choices = ["You can:", |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
510 | " * Always extract included archives", |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
511 | " * extract included archives this Once", |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
512 | " * choose Not to extract included archives", |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
513 | " * neVer extract included archives"] |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
514 | prompt = "What do you want to do? (a/o/N/v) " |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
515 | |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
516 | def __init__(self, options): |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
517 | BasePolicy.__init__(self, options) |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
518 | if options.recursive: |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
519 | self.permanent_policy = RECURSE_ALWAYS |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
520 | |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
521 | def prep(self, current_filename, included_archives): |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
522 | archive_count = len(included_archives) |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
523 | if (self.permanent_policy is not None) or (archive_count == 0): |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
524 | self.current_policy = self.permanent_policy or RECURSE_NOT_NOW |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
525 | return |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
526 | elif archive_count > 1: |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
527 | question = ("%s contains %s other archive files." % |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
528 | (current_filename, archive_count)) |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
529 | else: |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
530 | question = ("%s contains another archive: %s." % |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
531 | (current_filename, included_archives[0])) |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
532 | self.current_policy = self.ask_question(question) |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
533 | if self.current_policy in (RECURSE_ALWAYS, RECURSE_NEVER): |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
534 | self.permanent_policy = self.current_policy |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
535 | |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
536 | def ok_to_recurse(self): |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
537 | return self.current_policy in (RECURSE_ALWAYS, RECURSE_ONCE) |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
538 | |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
539 | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
540 | extractor_map = {'application/x-tar': TarExtractor, |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
541 | 'application/zip': ZipExtractor, |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
542 | 'application/x-msdos-program': ZipExtractor, |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
543 | 'application/x-debian-package': DebExtractor, |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
544 | 'application/x-redhat-package-manager': RPMExtractor, |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
545 | 'application/x-rpm': RPMExtractor, |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
546 | 'application/x-cpio': CpioExtractor} |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
547 | |
16
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
548 | handlers = [FlatHandler, OverwriteHandler, MatchHandler, EmptyHandler, |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
549 | BombHandler] |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
550 | |
5 | 551 | class ExtractorApplication(object): |
552 | def __init__(self, arguments): | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
553 | self.parse_options(arguments) |
12
5d202467c589
[svn] Introduce a real logging system. Right now all this really gets us is the
brett
parents:
11
diff
changeset
|
554 | self.setup_logger() |
5 | 555 | self.successes = [] |
556 | self.failures = [] | |
557 | ||
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
558 | def parse_options(self, arguments): |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
559 | parser = optparse.OptionParser( |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
560 | usage="%prog [options] archive [archive2 ...]", |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
561 | description="Intelligent archive extractor", |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
562 | version=VERSION_BANNER |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
563 | ) |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
564 | parser.add_option('-r', '--recursive', dest='recursive', |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
565 | action='store_true', default=False, |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
566 | help='extract archives contained in the ones listed') |
13
0a3ef1b9f6d4
[svn] Add options to tweak the logging level to taste.
brett
parents:
12
diff
changeset
|
567 | parser.add_option('-q', '--quiet', dest='quiet', |
0a3ef1b9f6d4
[svn] Add options to tweak the logging level to taste.
brett
parents:
12
diff
changeset
|
568 | action='count', default=3, |
0a3ef1b9f6d4
[svn] Add options to tweak the logging level to taste.
brett
parents:
12
diff
changeset
|
569 | help='suppress warning/error messages') |
0a3ef1b9f6d4
[svn] Add options to tweak the logging level to taste.
brett
parents:
12
diff
changeset
|
570 | parser.add_option('-v', '--verbose', dest='verbose', |
0a3ef1b9f6d4
[svn] Add options to tweak the logging level to taste.
brett
parents:
12
diff
changeset
|
571 | action='count', default=0, |
0a3ef1b9f6d4
[svn] Add options to tweak the logging level to taste.
brett
parents:
12
diff
changeset
|
572 | help='be verbose/print debugging information') |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
573 | parser.add_option('-o', '--overwrite', dest='overwrite', |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
574 | action='store_true', default=False, |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
575 | help='overwrite any existing target directory') |
15
28dbd52a8bb8
[svn] Add a -f/--flat option, which will extract the archive contents into the
brett
parents:
14
diff
changeset
|
576 | parser.add_option('-f', '--flat', '--no-directory', dest='flat', |
28dbd52a8bb8
[svn] Add a -f/--flat option, which will extract the archive contents into the
brett
parents:
14
diff
changeset
|
577 | action='store_true', default=False, |
28dbd52a8bb8
[svn] Add a -f/--flat option, which will extract the archive contents into the
brett
parents:
14
diff
changeset
|
578 | help="don't put contents in their own directory") |
19 | 579 | parser.add_option('-l', '-t', '--list', '--table', dest='show_list', |
580 | action='store_true', default=False, | |
581 | help="list contents of archives on standard output") | |
20
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
582 | parser.add_option('-n', '--noninteractive', dest='batch', |
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
583 | action='store_true', default=False, |
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
584 | help="don't ask how to handle special cases") |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
585 | self.options, filenames = parser.parse_args(arguments) |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
586 | if not filenames: |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
587 | parser.error("you did not list any archives") |
25
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
588 | self.options.one_entry_policy = OneEntryPolicy(self.options) |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
589 | self.options.recursion_policy = RecursionPolicy(self.options) |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
590 | self.archives = {os.path.realpath(os.curdir): filenames} |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
591 | |
12
5d202467c589
[svn] Introduce a real logging system. Right now all this really gets us is the
brett
parents:
11
diff
changeset
|
592 | def setup_logger(self): |
19 | 593 | self.logger = logging.getLogger('dtrx-log') |
12
5d202467c589
[svn] Introduce a real logging system. Right now all this really gets us is the
brett
parents:
11
diff
changeset
|
594 | handler = logging.StreamHandler() |
13
0a3ef1b9f6d4
[svn] Add options to tweak the logging level to taste.
brett
parents:
12
diff
changeset
|
595 | # WARNING is the default. |
0a3ef1b9f6d4
[svn] Add options to tweak the logging level to taste.
brett
parents:
12
diff
changeset
|
596 | handler.setLevel(10 * (self.options.quiet - self.options.verbose)) |
19 | 597 | formatter = logging.Formatter("dtrx: %(levelname)s: %(message)s") |
12
5d202467c589
[svn] Introduce a real logging system. Right now all this really gets us is the
brett
parents:
11
diff
changeset
|
598 | handler.setFormatter(formatter) |
5d202467c589
[svn] Introduce a real logging system. Right now all this really gets us is the
brett
parents:
11
diff
changeset
|
599 | self.logger.addHandler(handler) |
1 | 600 | |
5 | 601 | def get_extractor(self): |
602 | mimetype, encoding = mimetypes.guess_type(self.current_filename) | |
1 | 603 | try: |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
604 | extractor = extractor_map[mimetype] |
1 | 605 | except KeyError: |
14
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
606 | if encoding: |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
607 | extractor = CompressionExtractor |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
608 | else: |
6f9e1bb59719
[svn] Add support for just decompressing files that are compressed. So, if you
brett
parents:
13
diff
changeset
|
609 | return "not a known archive type" |
5 | 610 | try: |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
611 | self.current_extractor = extractor(self.current_filename, mimetype, |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
612 | encoding) |
20
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
613 | except ExtractorError, error: |
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
614 | return str(error) |
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
615 | |
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
616 | def get_handler(self): |
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
617 | try: |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
618 | content, content_name = self.current_extractor.check_contents() |
25
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
619 | if content == ONE_ENTRY: |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
620 | self.options.one_entry_policy.prep(self.current_filename, |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
621 | content_name) |
16
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
622 | for handler in handlers: |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
623 | if handler.can_handle(content, self.options): |
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
624 | self.current_handler = handler(self.current_extractor, |
22
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
625 | content, content_name, |
b240777ae53e
[svn] Improve the way we check archive contents. If all the entries look like
brett
parents:
20
diff
changeset
|
626 | self.options) |
16
29794d4d41aa
[svn] There's now an entirely new object hierarchy for handlers, because the
brett
parents:
15
diff
changeset
|
627 | break |
5 | 628 | except ExtractorError, error: |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
629 | return str(error) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
630 | |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
631 | def recurse(self): |
25
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
632 | archives = self.current_extractor.included_archives |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
633 | self.options.recursion_policy.prep(self.current_filename, archives) |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
634 | if self.options.recursion_policy.ok_to_recurse(): |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
635 | for filename in archives: |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
636 | tail_path, basename = os.path.split(filename) |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
637 | directory = os.path.join(self.current_directory, |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
638 | self.current_handler.target, tail_path) |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
639 | self.archives.setdefault(directory, []).append(basename) |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
640 | |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
641 | def report(self, function, *args): |
17
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
642 | try: |
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
643 | error = function(*args) |
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
644 | except (ExtractorError, IOError, OSError), exception: |
481a2b4be471
[svn] Lots of tests for various boundary cases, and slightly better handling for
brett
parents:
16
diff
changeset
|
645 | error = str(exception) |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
646 | if error: |
12
5d202467c589
[svn] Introduce a real logging system. Right now all this really gets us is the
brett
parents:
11
diff
changeset
|
647 | self.logger.error("%s: %s", self.current_filename, error) |
5 | 648 | return False |
649 | return True | |
650 | ||
19 | 651 | def record_status(self, success): |
652 | if success: | |
653 | self.successes.append(self.current_filename) | |
654 | else: | |
655 | self.failures.append(self.current_filename) | |
656 | ||
657 | def extract(self): | |
23
039dd321a7d0
[svn] If an archive contains other archives, and the user didn't specify that
brett
parents:
22
diff
changeset
|
658 | first_run = True |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
659 | while self.archives: |
25
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
660 | if not first_run: |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
661 | self.options.one_entry_policy.permanent_policy = EXTRACT_WRAP |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
662 | else: |
ef62f2f55eb8
[svn] Move policy-handling code into a dedicated set of classes. This makes
brett
parents:
23
diff
changeset
|
663 | first_run = False |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
664 | self.current_directory, filenames = self.archives.popitem() |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
665 | for filename in filenames: |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
666 | os.chdir(self.current_directory) |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
667 | self.current_filename = filename |
20
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
668 | success = (self.report(self.get_extractor) and |
69c93c3e6972
[svn] If the archive contains one directory with the "wrong" name, ask the user
brett
parents:
19
diff
changeset
|
669 | self.report(self.get_handler)) |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
670 | if success: |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
671 | for name in 'extract', 'cleanup': |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
672 | success = (self.report(getattr(self.current_handler, |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
673 | name)) and success) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
674 | self.recurse() |
19 | 675 | self.record_status(success) |
676 | ||
677 | def show_contents(self): | |
678 | for filename in self.current_extractor.get_filenames(): | |
679 | print filename | |
680 | ||
681 | def show_list(self): | |
682 | filenames = self.archives.values()[0] | |
683 | if len(filenames) > 1: | |
684 | header = "%s:\n" | |
685 | else: | |
686 | header = None | |
687 | for filename in filenames: | |
688 | if header: | |
689 | print header % (filename,), | |
690 | header = "\n%s:\n" | |
691 | self.current_filename = filename | |
692 | success = (self.report(self.get_extractor) and | |
693 | self.report(self.show_contents)) | |
694 | self.record_status(success) | |
695 | ||
696 | def run(self): | |
697 | if self.options.show_list: | |
698 | self.show_list() | |
699 | else: | |
700 | self.extract() | |
5 | 701 | if self.failures: |
702 | return 1 | |
703 | return 0 | |
704 | ||
1 | 705 | |
706 | if __name__ == '__main__': | |
5 | 707 | app = ExtractorApplication(sys.argv[1:]) |
708 | sys.exit(app.run()) |