Sun, 29 Oct 2006 20:03:12 -0500
[svn] Fix a small bug that would crash the program if an archive was empty.
Make basename calculation more robust.
Go ahead and run .exe files through the ZipExtractor. Need to cope more
gracefully when it doesn't work, but I've lost momentum for tonight.
1 | 1 | #!/usr/bin/env python |
2 | # | |
3 | # x -- Intelligently extract various archive types. | |
4 | # Copyright (c) 2006 Brett Smith <brettcsmith@brettcsmith.org>. | |
5 | # | |
6 | # This program is free software; you can redistribute it and/or modify it | |
7 | # under the terms of the GNU General Public License as published by the | |
8 | # Free Software Foundation; either version 2 of the License, or (at your | |
9 | # option) any later version. | |
10 | # | |
11 | # This program is distributed in the hope that it will be useful, but | |
12 | # WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General | |
14 | # Public License for more details. | |
15 | # | |
16 | # You should have received a copy of the GNU General Public License along | |
17 | # with this program; if not, write to the Free Software Foundation, Inc., | |
18 | # 51 Franklin Street, 5th Floor, Boston, MA, 02111. | |
19 | ||
20 | import mimetypes | |
21 | import os | |
22 | import subprocess | |
23 | import sys | |
24 | import tempfile | |
25 | ||
26 | from cStringIO import StringIO | |
27 | ||
28 | mimetypes.encodings_map.setdefault('.bz2', 'bzip2') | |
29 | ||
30 | MATCHING_DIRECTORY = 1 | |
31 | ONE_DIRECTORY = 2 | |
32 | BOMB = 3 | |
33 | EMPTY = 4 | |
34 | ||
35 | class ExtractorError(Exception): | |
36 | pass | |
37 | ||
38 | ||
39 | class ProcessStreamer(object): | |
40 | def __init__(self, command, stdin, description="checking contents", | |
41 | stderr=None): | |
42 | self.process = subprocess.Popen(command, bufsize=1, stdin=stdin, | |
43 | stdout=subprocess.PIPE, stderr=stderr) | |
44 | self.command = ' '.join(command) | |
45 | self.description = description | |
46 | ||
47 | def __iter__(self): | |
48 | return self | |
49 | ||
50 | def next(self): | |
51 | line = self.process.stdout.readline() | |
52 | if line: | |
53 | return line.rstrip('\n') | |
54 | else: | |
55 | raise StopIteration | |
56 | ||
57 | def stop(self): | |
58 | while self.process.stdout.readline(): | |
59 | pass | |
60 | self.process.stdout.close() | |
61 | status = self.process.wait() | |
62 | if status != 0: | |
63 | raise ExtractorError("%s error: '%s' returned status code %s" % | |
64 | (self.description, self.command, status)) | |
65 | try: | |
66 | self.process.stderr.close() | |
67 | except AttributeError: | |
68 | pass | |
69 | ||
70 | ||
71 | class BaseExtractor(object): | |
72 | decoders = {'bzip2': 'bzcat', 'gzip': 'zcat', 'compress': 'zcat'} | |
73 | ||
74 | def __init__(self, filename, mimetype, encoding): | |
75 | self.filename = filename | |
76 | self.mimetype = mimetype | |
77 | self.encoding = encoding | |
78 | self.archive = open(filename, 'r') | |
79 | if encoding: | |
80 | self.pipe([self.decoders[encoding]], "decoding") | |
81 | self.prepare() | |
82 | ||
83 | def run(self, command, description="extraction", stdout=None, stderr=None, | |
84 | stdin=None, string_output=True): | |
85 | process = subprocess.Popen(command, stdin=stdin, stdout=stdout, | |
86 | stderr=stderr) | |
87 | status = process.wait() | |
88 | if status != 0: | |
89 | raise ExtractorError("%s error: '%s' returned status code %s" % | |
90 | (description, ' '.join(command), status)) | |
91 | try: | |
92 | process.stderr.close() | |
93 | except AttributeError: | |
94 | pass | |
95 | output = process.stdout | |
96 | if string_output: | |
97 | try: | |
98 | output = output.read(-1) | |
99 | process.stdout.close() | |
100 | except AttributeError: | |
101 | pass | |
102 | return output | |
103 | ||
104 | def pipe(self, command, description, stderr=None): | |
105 | output = tempfile.TemporaryFile() | |
106 | self.run(command, description, output, stderr, self.archive, False) | |
107 | self.archive.close() | |
108 | self.archive = output | |
109 | self.archive.flush() | |
110 | ||
111 | def prepare(self): | |
112 | pass | |
113 | ||
114 | def check_contents(self): | |
115 | self.archive.seek(0, 0) | |
116 | filenames = self.get_filenames() | |
117 | try: | |
118 | first_part = filenames.next().split('/', 1)[0] + '/' | |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
119 | except StopIteration: |
1 | 120 | filenames.stop() |
121 | return EMPTY | |
122 | for filename in filenames: | |
123 | if not filename.startswith(first_part): | |
124 | filenames.stop() | |
125 | return BOMB | |
126 | filenames.stop() | |
127 | if self.basename() == first_part[:-1]: | |
128 | return MATCHING_DIRECTORY | |
129 | return ONE_DIRECTORY | |
130 | ||
131 | def basename(self): | |
132 | pieces = self.filename.split('.') | |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
133 | extension = '.' + pieces[-1] |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
134 | if mimetypes.encodings_map.has_key(extension): |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
135 | pieces.pop() |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
136 | extension = '.' + pieces[-1] |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
137 | if (mimetypes.types_map.has_key(extension) or |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
138 | mimetypes.common_types.has_key(extension) or |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
139 | mimetypes.suffix_map.has_key(extension)): |
1 | 140 | pieces.pop() |
141 | return '.'.join(pieces) | |
142 | ||
143 | def extract(self, path): | |
144 | self.archive.seek(0, 0) | |
145 | self.extract_archive() | |
146 | ||
147 | ||
148 | class TarExtractor(BaseExtractor): | |
149 | def get_filenames(self): | |
150 | return ProcessStreamer(['tar', '-t'], self.archive) | |
151 | ||
152 | def extract_archive(self): | |
153 | self.run(['tar', '-x'], stdin=self.archive) | |
154 | ||
155 | ||
156 | class ZipExtractor(BaseExtractor): | |
157 | def __init__(self, filename, mimetype, encoding): | |
158 | self.filename = filename | |
159 | self.mimetype = mimetype | |
160 | self.encoding = encoding | |
161 | self.archive = StringIO() | |
162 | ||
163 | def get_filenames(self): | |
164 | return ProcessStreamer(['zipinfo', '-1', self.filename], None) | |
165 | ||
166 | def extract(self, path): | |
167 | self.run(['unzip', '-q', os.path.join(path, self.filename)]) | |
168 | ||
169 | ||
170 | class CpioExtractor(BaseExtractor): | |
171 | def get_filenames(self): | |
172 | return ProcessStreamer(['cpio', '-t'], self.archive, | |
173 | stderr=subprocess.PIPE) | |
174 | ||
175 | def extract_archive(self): | |
176 | self.run(['cpio', '-i', '--make-directories', | |
177 | '--no-absolute-filenames'], | |
178 | stderr=subprocess.PIPE, stdin=self.archive) | |
179 | ||
180 | ||
181 | class RPMExtractor(CpioExtractor): | |
182 | def prepare(self): | |
183 | self.pipe(['rpm2cpio', '-'], "rpm2cpio") | |
184 | ||
185 | def basename(self): | |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
186 | pieces = self.filename.rsplit('.', 2) |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
187 | if len(pieces) == 1: |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
188 | return pieces[0] |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
189 | elif pieces[-1] != 'rpm': |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
190 | return BaseExtractor.basename(self) |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
191 | pieces.pop() |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
192 | if len(pieces) == 1: |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
193 | return pieces[0] |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
194 | elif len(pieces[-1]) < 6: |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
195 | pieces.pop() |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
196 | return '.'.join(pieces) |
1 | 197 | |
198 | def check_contents(self): | |
199 | return BOMB | |
200 | ||
201 | ||
202 | class DebExtractor(TarExtractor): | |
203 | def prepare(self): | |
204 | self.pipe(['ar', 'p', self.filename, 'data.tar.gz'], | |
205 | "data.tar.gz extraction") | |
206 | self.archive.seek(0, 0) | |
207 | self.pipe(['zcat'], "data.tar.gz decompression") | |
208 | ||
209 | def basename(self): | |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
210 | pieces = self.filename.rsplit('_', 1) |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
211 | if len(pieces) == 1: |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
212 | return pieces[0] |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
213 | elif (len(pieces[-1]) > 10) or (not pieces[-1].endswith('.deb')): |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
214 | return BaseExtractor.basename(self) |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
215 | return pieces[0] |
1 | 216 | |
217 | def check_contents(self): | |
218 | return BOMB | |
219 | ||
220 | ||
221 | extractor_map = {'application/x-tar': TarExtractor, | |
222 | 'application/zip': ZipExtractor, | |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
223 | 'application/x-msdos-program': ZipExtractor, |
1 | 224 | 'application/x-debian-package': DebExtractor, |
225 | 'application/x-redhat-package-manager': RPMExtractor, | |
226 | 'application/x-shar': None, | |
227 | 'application/x-cpio': CpioExtractor} | |
228 | ||
229 | def show_error(filename, message): | |
230 | print >>sys.stderr, "%s: %s" % (filename, message) | |
231 | ||
232 | def main(arguments): | |
233 | for filename in arguments: | |
234 | mimetype, encoding = mimetypes.guess_type(filename) | |
235 | try: | |
236 | handler = extractor_map[mimetype] | |
237 | except KeyError: | |
238 | show_error(filename, "doesn't look like an archive") | |
239 | continue | |
240 | extractor = handler(filename, mimetype, encoding) | |
241 | contents = extractor.check_contents() | |
242 | path = '.' | |
243 | if contents == BOMB: | |
244 | directory = extractor.basename() | |
245 | try: | |
246 | os.mkdir(directory) | |
247 | except OSError, error: | |
248 | show_error(filename, "could not create %s: %s" % | |
249 | (error.filename, error.strerror)) | |
250 | continue | |
251 | os.chdir(directory) | |
252 | path = '..' | |
253 | extractor.extract(path) | |
254 | if contents == BOMB: | |
255 | os.chdir('..') | |
256 | ||
257 | if __name__ == '__main__': | |
258 | main(sys.argv[1:]) |