Mon, 13 Nov 2006 23:06:30 -0500
[svn] Make ExtractorApplication suck less. Now the strategies for handling
different archive types are out in their own classes, and polymorphism
takes care of everything for us. This is way cleaner.
While I was at it I changed the behavior in the case where an archive
contains one directory that doesn't match the basename. I now treat that
the same as a bomb. This can lead to silly directory structures but
ensures that there's no "data" loss nor unexpected results.
1 | 1 | #!/usr/bin/env python |
2 | # | |
3 | # x -- Intelligently extract various archive types. | |
4 | # Copyright (c) 2006 Brett Smith <brettcsmith@brettcsmith.org>. | |
5 | # | |
6 | # This program is free software; you can redistribute it and/or modify it | |
7 | # under the terms of the GNU General Public License as published by the | |
8 | # Free Software Foundation; either version 2 of the License, or (at your | |
9 | # option) any later version. | |
10 | # | |
11 | # This program is distributed in the hope that it will be useful, but | |
12 | # WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General | |
14 | # Public License for more details. | |
15 | # | |
16 | # You should have received a copy of the GNU General Public License along | |
17 | # with this program; if not, write to the Free Software Foundation, Inc., | |
18 | # 51 Franklin Street, 5th Floor, Boston, MA, 02111. | |
19 | ||
5 | 20 | import errno |
1 | 21 | import mimetypes |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
22 | import optparse |
1 | 23 | import os |
24 | import subprocess | |
25 | import sys | |
26 | import tempfile | |
27 | ||
28 | from cStringIO import StringIO | |
29 | ||
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
30 | VERSION = "1.1" |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
31 | VERSION_BANNER = """x version %s |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
32 | Copyright (c) 2006 Brett Smith <brettcsmith@brettcsmith.org> |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
33 | |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
34 | This program is free software; you can redistribute it and/or modify it |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
35 | under the terms of the GNU General Public License as published by the |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
36 | Free Software Foundation; either version 2 of the License, or (at your |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
37 | option) any later version. |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
38 | |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
39 | This program is distributed in the hope that it will be useful, but |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
40 | WITHOUT ANY WARRANTY; without even the implied warranty of |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
41 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
42 | Public License for more details.""" % (VERSION,) |
1 | 43 | |
44 | MATCHING_DIRECTORY = 1 | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
45 | # ONE_DIRECTORY = 2 |
1 | 46 | BOMB = 3 |
47 | EMPTY = 4 | |
48 | ||
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
49 | mimetypes.encodings_map.setdefault('.bz2', 'bzip2') |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
50 | mimetypes.types_map['.exe'] = 'application/x-msdos-program' |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
51 | |
1 | 52 | class ExtractorError(Exception): |
53 | pass | |
54 | ||
55 | ||
56 | class ProcessStreamer(object): | |
57 | def __init__(self, command, stdin, description="checking contents", | |
58 | stderr=None): | |
59 | self.process = subprocess.Popen(command, bufsize=1, stdin=stdin, | |
60 | stdout=subprocess.PIPE, stderr=stderr) | |
61 | self.command = ' '.join(command) | |
62 | self.description = description | |
63 | ||
64 | def __iter__(self): | |
65 | return self | |
66 | ||
67 | def next(self): | |
68 | line = self.process.stdout.readline() | |
69 | if line: | |
70 | return line.rstrip('\n') | |
71 | else: | |
72 | raise StopIteration | |
73 | ||
74 | def stop(self): | |
75 | while self.process.stdout.readline(): | |
76 | pass | |
77 | self.process.stdout.close() | |
78 | status = self.process.wait() | |
79 | if status != 0: | |
80 | raise ExtractorError("%s error: '%s' returned status code %s" % | |
81 | (self.description, self.command, status)) | |
82 | try: | |
83 | self.process.stderr.close() | |
84 | except AttributeError: | |
85 | pass | |
86 | ||
87 | ||
88 | class BaseExtractor(object): | |
89 | decoders = {'bzip2': 'bzcat', 'gzip': 'zcat', 'compress': 'zcat'} | |
90 | ||
91 | def __init__(self, filename, mimetype, encoding): | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
92 | if encoding and (not self.decoders.has_key(encoding)): |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
93 | raise ValueError("unrecognized encoding %s" % (encoding,)) |
1 | 94 | self.filename = filename |
95 | self.mimetype = mimetype | |
96 | self.encoding = encoding | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
97 | self.included_archives = [] |
5 | 98 | try: |
99 | self.archive = open(filename, 'r') | |
100 | except (IOError, OSError), error: | |
101 | raise ExtractorError("could not open %s: %s" % | |
102 | (filename, error.strerror)) | |
1 | 103 | if encoding: |
104 | self.pipe([self.decoders[encoding]], "decoding") | |
105 | self.prepare() | |
106 | ||
107 | def run(self, command, description="extraction", stdout=None, stderr=None, | |
5 | 108 | stdin=None): |
1 | 109 | process = subprocess.Popen(command, stdin=stdin, stdout=stdout, |
110 | stderr=stderr) | |
111 | status = process.wait() | |
112 | if status != 0: | |
113 | raise ExtractorError("%s error: '%s' returned status code %s" % | |
114 | (description, ' '.join(command), status)) | |
5 | 115 | for pipe in (process.stdout, process.stderr): |
1 | 116 | try: |
5 | 117 | pipe.close() |
1 | 118 | except AttributeError: |
119 | pass | |
120 | ||
121 | def pipe(self, command, description, stderr=None): | |
122 | output = tempfile.TemporaryFile() | |
5 | 123 | self.run(command, description, output, stderr, self.archive) |
1 | 124 | self.archive.close() |
125 | self.archive = output | |
126 | self.archive.flush() | |
127 | ||
128 | def prepare(self): | |
129 | pass | |
130 | ||
131 | def check_contents(self): | |
132 | self.archive.seek(0, 0) | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
133 | archive_type = None |
1 | 134 | filenames = self.get_filenames() |
135 | try: | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
136 | filename = filenames.next() |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
137 | if extractor_map.has_key(mimetypes.guess_type(filename)[0]): |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
138 | self.included_archives.append(filename) |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
139 | first_part = filename.split('/', 1)[0] + '/' |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
140 | except StopIteration: |
1 | 141 | filenames.stop() |
142 | return EMPTY | |
143 | for filename in filenames: | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
144 | if extractor_map.has_key(mimetypes.guess_type(filename)[0]): |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
145 | self.included_archives.append(filename) |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
146 | if (archive_type is None) and (not filename.startswith(first_part)): |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
147 | archive_type = BOMB |
1 | 148 | filenames.stop() |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
149 | if archive_type: |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
150 | return archive_type |
1 | 151 | if self.basename() == first_part[:-1]: |
152 | return MATCHING_DIRECTORY | |
5 | 153 | return first_part |
1 | 154 | |
155 | def basename(self): | |
5 | 156 | pieces = os.path.basename(self.filename).split('.') |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
157 | extension = '.' + pieces[-1] |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
158 | if mimetypes.encodings_map.has_key(extension): |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
159 | pieces.pop() |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
160 | extension = '.' + pieces[-1] |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
161 | if (mimetypes.types_map.has_key(extension) or |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
162 | mimetypes.common_types.has_key(extension) or |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
163 | mimetypes.suffix_map.has_key(extension)): |
1 | 164 | pieces.pop() |
165 | return '.'.join(pieces) | |
166 | ||
167 | def extract(self, path): | |
168 | self.archive.seek(0, 0) | |
169 | self.extract_archive() | |
170 | ||
171 | ||
172 | class TarExtractor(BaseExtractor): | |
173 | def get_filenames(self): | |
174 | return ProcessStreamer(['tar', '-t'], self.archive) | |
175 | ||
176 | def extract_archive(self): | |
177 | self.run(['tar', '-x'], stdin=self.archive) | |
178 | ||
179 | ||
180 | class ZipExtractor(BaseExtractor): | |
181 | def __init__(self, filename, mimetype, encoding): | |
182 | self.filename = filename | |
183 | self.mimetype = mimetype | |
184 | self.encoding = encoding | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
185 | self.included_archives = [] |
1 | 186 | self.archive = StringIO() |
187 | ||
188 | def get_filenames(self): | |
189 | return ProcessStreamer(['zipinfo', '-1', self.filename], None) | |
190 | ||
191 | def extract(self, path): | |
192 | self.run(['unzip', '-q', os.path.join(path, self.filename)]) | |
193 | ||
194 | ||
195 | class CpioExtractor(BaseExtractor): | |
196 | def get_filenames(self): | |
197 | return ProcessStreamer(['cpio', '-t'], self.archive, | |
198 | stderr=subprocess.PIPE) | |
199 | ||
200 | def extract_archive(self): | |
201 | self.run(['cpio', '-i', '--make-directories', | |
202 | '--no-absolute-filenames'], | |
203 | stderr=subprocess.PIPE, stdin=self.archive) | |
204 | ||
205 | ||
206 | class RPMExtractor(CpioExtractor): | |
207 | def prepare(self): | |
208 | self.pipe(['rpm2cpio', '-'], "rpm2cpio") | |
209 | ||
210 | def basename(self): | |
5 | 211 | pieces = os.path.basename(self.filename).rsplit('.', 2) |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
212 | if len(pieces) == 1: |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
213 | return pieces[0] |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
214 | elif pieces[-1] != 'rpm': |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
215 | return BaseExtractor.basename(self) |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
216 | pieces.pop() |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
217 | if len(pieces) == 1: |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
218 | return pieces[0] |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
219 | elif len(pieces[-1]) < 6: |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
220 | pieces.pop() |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
221 | return '.'.join(pieces) |
1 | 222 | |
223 | def check_contents(self): | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
224 | CpioExtractor.check_contents(self) |
1 | 225 | return BOMB |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
226 | |
1 | 227 | |
228 | class DebExtractor(TarExtractor): | |
229 | def prepare(self): | |
230 | self.pipe(['ar', 'p', self.filename, 'data.tar.gz'], | |
231 | "data.tar.gz extraction") | |
232 | self.archive.seek(0, 0) | |
233 | self.pipe(['zcat'], "data.tar.gz decompression") | |
234 | ||
235 | def basename(self): | |
5 | 236 | pieces = os.path.basename(self.filename).rsplit('_', 1) |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
237 | if len(pieces) == 1: |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
238 | return pieces[0] |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
239 | elif (len(pieces[-1]) > 10) or (not pieces[-1].endswith('.deb')): |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
240 | return BaseExtractor.basename(self) |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
241 | return pieces[0] |
1 | 242 | |
243 | def check_contents(self): | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
244 | TarExtractor.check_contents(self) |
1 | 245 | return BOMB |
246 | ||
247 | ||
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
248 | class MatchHandler(object): |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
249 | def __init__(self, extractor, contents): |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
250 | self.extractor = extractor |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
251 | self.contents = contents |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
252 | self.directory = extractor.basename() |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
253 | |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
254 | def extract(self, directory='.'): |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
255 | try: |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
256 | self.extractor.extract(directory) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
257 | except ExtractorError, error: |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
258 | return error.strerror |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
259 | |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
260 | def cleanup(self): |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
261 | command = 'chmod' |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
262 | status = subprocess.call(['chmod', '-R', 'u+rw', self.directory]) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
263 | if status == 0: |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
264 | command = 'find' |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
265 | status = subprocess.call(['find', self.directory, '-type', 'd', |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
266 | '-exec', 'chmod', 'u+x', '{}', ';']) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
267 | if status != 0: |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
268 | return "%s returned with exit status %s" % (command, status) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
269 | |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
270 | |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
271 | class BombHandler(MatchHandler): |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
272 | def __init__(self, extractor, contents): |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
273 | MatchHandler.__init__(self, extractor, contents) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
274 | basename = self.directory |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
275 | for suffix in [''] + ['.%s' % (x,) for x in range(1, 10)]: |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
276 | self.directory = '%s%s' % (basename, suffix) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
277 | try: |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
278 | os.mkdir(self.directory) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
279 | except OSError, error: |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
280 | if error.errno == errno.EEXIST: |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
281 | continue |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
282 | raise ValueError("could not make extraction directory %s: %s" % |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
283 | (error.filename, error.strerror)) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
284 | ## if suffix != '': |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
285 | ## self.show_error("extracted to %s" % (directory,)) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
286 | break |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
287 | else: |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
288 | raise ValueError("all good names for an extraction directory taken") |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
289 | |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
290 | def extract(self): |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
291 | os.chdir(self.directory) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
292 | return MatchHandler.extract(self, '..') |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
293 | |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
294 | def cleanup(self): |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
295 | os.chdir('..') |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
296 | return MatchHandler.cleanup(self) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
297 | |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
298 | |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
299 | class EmptyHandler(object): |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
300 | def __init__(self, extractor, contents): pass |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
301 | def extract(self): pass |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
302 | def cleanup(self): pass |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
303 | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
304 | extractor_map = {'application/x-tar': TarExtractor, |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
305 | 'application/zip': ZipExtractor, |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
306 | 'application/x-msdos-program': ZipExtractor, |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
307 | 'application/x-debian-package': DebExtractor, |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
308 | 'application/x-redhat-package-manager': RPMExtractor, |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
309 | 'application/x-rpm': RPMExtractor, |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
310 | 'application/x-cpio': CpioExtractor} |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
311 | |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
312 | handler_map = {EMPTY: EmptyHandler, |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
313 | MATCHING_DIRECTORY: MatchHandler} |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
314 | |
5 | 315 | class ExtractorApplication(object): |
316 | def __init__(self, arguments): | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
317 | self.parse_options(arguments) |
5 | 318 | self.successes = [] |
319 | self.failures = [] | |
320 | ||
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
321 | def parse_options(self, arguments): |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
322 | parser = optparse.OptionParser( |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
323 | usage="%prog [options] archive [archive2 ...]", |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
324 | description="Intelligent archive extractor", |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
325 | version=VERSION_BANNER |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
326 | ) |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
327 | parser.add_option('-r', '--recursive', dest='recursive', |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
328 | action='store_true', default=False, |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
329 | help='extract archives contained in the ones listed') |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
330 | self.options, filenames = parser.parse_args(arguments) |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
331 | if not filenames: |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
332 | parser.error("you did not list any archives") |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
333 | self.archives = {os.path.realpath(os.curdir): filenames} |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
334 | |
5 | 335 | def show_error(self, message): |
336 | print >>sys.stderr, "%s: %s" % (self.current_filename, message) | |
1 | 337 | |
5 | 338 | def get_extractor(self): |
339 | mimetype, encoding = mimetypes.guess_type(self.current_filename) | |
1 | 340 | try: |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
341 | extractor = extractor_map[mimetype] |
1 | 342 | except KeyError: |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
343 | return "not a known archive type" |
5 | 344 | try: |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
345 | self.current_extractor = extractor(self.current_filename, mimetype, |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
346 | encoding) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
347 | content = self.current_extractor.check_contents() |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
348 | handler = handler_map.get(content, BombHandler) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
349 | self.current_handler = handler(self.current_extractor, content) |
5 | 350 | except ExtractorError, error: |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
351 | return str(error) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
352 | |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
353 | def recurse(self): |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
354 | if not self.options.recursive: |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
355 | return |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
356 | archive_path = os.path.split(self.current_filename)[0] |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
357 | for filename in self.current_extractor.included_archives: |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
358 | tail_path, basename = os.path.split(filename) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
359 | directory = os.path.join(self.current_directory, archive_path, |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
360 | self.current_handler.directory, tail_path) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
361 | self.archives.setdefault(directory, []).append(basename) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
362 | |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
363 | def report(self, function, *args): |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
364 | error = function(*args) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
365 | if error: |
5 | 366 | self.show_error(error) |
367 | return False | |
368 | return True | |
369 | ||
370 | def run(self): | |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
371 | while self.archives: |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
372 | self.current_directory, filenames = self.archives.popitem() |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
373 | for filename in filenames: |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
374 | os.chdir(self.current_directory) |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
375 | self.current_filename = filename |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
376 | self.cleanup_actions = [] |
8
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
377 | success = self.report(self.get_extractor) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
378 | if success: |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
379 | for name in 'extract', 'cleanup': |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
380 | success = (self.report(getattr(self.current_handler, |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
381 | name)) and success) |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
382 | self.recurse() |
97388f5ff770
[svn] Make ExtractorApplication suck less. Now the strategies for handling
brett
parents:
7
diff
changeset
|
383 | if success: |
6
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
384 | self.successes.append(self.current_filename) |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
385 | else: |
77043f4e6a9f
[svn] The big thing here is recursive extraction. Find archive files in the
brett
parents:
5
diff
changeset
|
386 | self.failures.append(self.current_filename) |
5 | 387 | if self.failures: |
388 | return 1 | |
389 | return 0 | |
390 | ||
1 | 391 | |
392 | if __name__ == '__main__': | |
5 | 393 | app = ExtractorApplication(sys.argv[1:]) |
394 | sys.exit(app.run()) |