Wed, 01 Nov 2006 22:03:46 -0500
[svn] Additions.
1 | 1 | #!/usr/bin/env python |
2 | # | |
3 | # x -- Intelligently extract various archive types. | |
4 | # Copyright (c) 2006 Brett Smith <brettcsmith@brettcsmith.org>. | |
5 | # | |
6 | # This program is free software; you can redistribute it and/or modify it | |
7 | # under the terms of the GNU General Public License as published by the | |
8 | # Free Software Foundation; either version 2 of the License, or (at your | |
9 | # option) any later version. | |
10 | # | |
11 | # This program is distributed in the hope that it will be useful, but | |
12 | # WITHOUT ANY WARRANTY; without even the implied warranty of | |
13 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General | |
14 | # Public License for more details. | |
15 | # | |
16 | # You should have received a copy of the GNU General Public License along | |
17 | # with this program; if not, write to the Free Software Foundation, Inc., | |
18 | # 51 Franklin Street, 5th Floor, Boston, MA, 02111. | |
19 | ||
20 | import mimetypes | |
21 | import os | |
22 | import subprocess | |
23 | import sys | |
24 | import tempfile | |
25 | ||
26 | from cStringIO import StringIO | |
27 | ||
28 | mimetypes.encodings_map.setdefault('.bz2', 'bzip2') | |
29 | ||
30 | MATCHING_DIRECTORY = 1 | |
31 | ONE_DIRECTORY = 2 | |
32 | BOMB = 3 | |
33 | EMPTY = 4 | |
34 | ||
35 | class ExtractorError(Exception): | |
36 | pass | |
37 | ||
38 | ||
39 | class ProcessStreamer(object): | |
40 | def __init__(self, command, stdin, description="checking contents", | |
41 | stderr=None): | |
42 | self.process = subprocess.Popen(command, bufsize=1, stdin=stdin, | |
43 | stdout=subprocess.PIPE, stderr=stderr) | |
44 | self.command = ' '.join(command) | |
45 | self.description = description | |
46 | ||
47 | def __iter__(self): | |
48 | return self | |
49 | ||
50 | def next(self): | |
51 | line = self.process.stdout.readline() | |
52 | if line: | |
53 | return line.rstrip('\n') | |
54 | else: | |
55 | raise StopIteration | |
56 | ||
57 | def stop(self): | |
58 | while self.process.stdout.readline(): | |
59 | pass | |
60 | self.process.stdout.close() | |
61 | status = self.process.wait() | |
62 | if status != 0: | |
63 | raise ExtractorError("%s error: '%s' returned status code %s" % | |
64 | (self.description, self.command, status)) | |
65 | try: | |
66 | self.process.stderr.close() | |
67 | except AttributeError: | |
68 | pass | |
69 | ||
70 | ||
71 | class BaseExtractor(object): | |
72 | decoders = {'bzip2': 'bzcat', 'gzip': 'zcat', 'compress': 'zcat'} | |
73 | ||
74 | def __init__(self, filename, mimetype, encoding): | |
75 | self.filename = filename | |
76 | self.mimetype = mimetype | |
77 | self.encoding = encoding | |
78 | self.archive = open(filename, 'r') | |
79 | if encoding: | |
80 | self.pipe([self.decoders[encoding]], "decoding") | |
81 | self.prepare() | |
82 | ||
83 | def run(self, command, description="extraction", stdout=None, stderr=None, | |
84 | stdin=None, string_output=True): | |
85 | process = subprocess.Popen(command, stdin=stdin, stdout=stdout, | |
86 | stderr=stderr) | |
87 | status = process.wait() | |
88 | if status != 0: | |
89 | raise ExtractorError("%s error: '%s' returned status code %s" % | |
90 | (description, ' '.join(command), status)) | |
91 | try: | |
92 | process.stderr.close() | |
93 | except AttributeError: | |
94 | pass | |
95 | output = process.stdout | |
96 | if string_output: | |
97 | try: | |
98 | output = output.read(-1) | |
99 | process.stdout.close() | |
100 | except AttributeError: | |
101 | pass | |
102 | return output | |
103 | ||
104 | def pipe(self, command, description, stderr=None): | |
105 | output = tempfile.TemporaryFile() | |
106 | self.run(command, description, output, stderr, self.archive, False) | |
107 | self.archive.close() | |
108 | self.archive = output | |
109 | self.archive.flush() | |
110 | ||
111 | def prepare(self): | |
112 | pass | |
113 | ||
114 | def check_contents(self): | |
115 | self.archive.seek(0, 0) | |
116 | filenames = self.get_filenames() | |
117 | try: | |
118 | first_part = filenames.next().split('/', 1)[0] + '/' | |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
119 | except StopIteration: |
1 | 120 | filenames.stop() |
121 | return EMPTY | |
122 | for filename in filenames: | |
123 | if not filename.startswith(first_part): | |
124 | filenames.stop() | |
125 | return BOMB | |
126 | filenames.stop() | |
127 | if self.basename() == first_part[:-1]: | |
128 | return MATCHING_DIRECTORY | |
129 | return ONE_DIRECTORY | |
130 | ||
131 | def basename(self): | |
132 | pieces = self.filename.split('.') | |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
133 | extension = '.' + pieces[-1] |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
134 | if mimetypes.encodings_map.has_key(extension): |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
135 | pieces.pop() |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
136 | extension = '.' + pieces[-1] |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
137 | if (mimetypes.types_map.has_key(extension) or |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
138 | mimetypes.common_types.has_key(extension) or |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
139 | mimetypes.suffix_map.has_key(extension)): |
1 | 140 | pieces.pop() |
141 | return '.'.join(pieces) | |
142 | ||
143 | def extract(self, path): | |
144 | self.archive.seek(0, 0) | |
145 | self.extract_archive() | |
146 | ||
147 | ||
148 | class TarExtractor(BaseExtractor): | |
149 | def get_filenames(self): | |
150 | return ProcessStreamer(['tar', '-t'], self.archive) | |
151 | ||
152 | def extract_archive(self): | |
153 | self.run(['tar', '-x'], stdin=self.archive) | |
154 | ||
155 | ||
156 | class ZipExtractor(BaseExtractor): | |
157 | def __init__(self, filename, mimetype, encoding): | |
158 | self.filename = filename | |
159 | self.mimetype = mimetype | |
160 | self.encoding = encoding | |
161 | self.archive = StringIO() | |
162 | ||
163 | def get_filenames(self): | |
164 | return ProcessStreamer(['zipinfo', '-1', self.filename], None) | |
165 | ||
166 | def extract(self, path): | |
167 | self.run(['unzip', '-q', os.path.join(path, self.filename)]) | |
168 | ||
169 | ||
170 | class CpioExtractor(BaseExtractor): | |
171 | def get_filenames(self): | |
172 | return ProcessStreamer(['cpio', '-t'], self.archive, | |
173 | stderr=subprocess.PIPE) | |
174 | ||
175 | def extract_archive(self): | |
176 | self.run(['cpio', '-i', '--make-directories', | |
177 | '--no-absolute-filenames'], | |
178 | stderr=subprocess.PIPE, stdin=self.archive) | |
179 | ||
180 | ||
181 | class RPMExtractor(CpioExtractor): | |
182 | def prepare(self): | |
183 | self.pipe(['rpm2cpio', '-'], "rpm2cpio") | |
184 | ||
185 | def basename(self): | |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
186 | pieces = self.filename.rsplit('.', 2) |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
187 | if len(pieces) == 1: |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
188 | return pieces[0] |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
189 | elif pieces[-1] != 'rpm': |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
190 | return BaseExtractor.basename(self) |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
191 | pieces.pop() |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
192 | if len(pieces) == 1: |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
193 | return pieces[0] |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
194 | elif len(pieces[-1]) < 6: |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
195 | pieces.pop() |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
196 | return '.'.join(pieces) |
1 | 197 | |
198 | def check_contents(self): | |
199 | return BOMB | |
200 | ||
201 | ||
202 | class DebExtractor(TarExtractor): | |
203 | def prepare(self): | |
204 | self.pipe(['ar', 'p', self.filename, 'data.tar.gz'], | |
205 | "data.tar.gz extraction") | |
206 | self.archive.seek(0, 0) | |
207 | self.pipe(['zcat'], "data.tar.gz decompression") | |
208 | ||
209 | def basename(self): | |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
210 | pieces = self.filename.rsplit('_', 1) |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
211 | if len(pieces) == 1: |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
212 | return pieces[0] |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
213 | elif (len(pieces[-1]) > 10) or (not pieces[-1].endswith('.deb')): |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
214 | return BaseExtractor.basename(self) |
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
215 | return pieces[0] |
1 | 216 | |
217 | def check_contents(self): | |
218 | return BOMB | |
219 | ||
220 | ||
221 | extractor_map = {'application/x-tar': TarExtractor, | |
222 | 'application/zip': ZipExtractor, | |
2
1570351bf863
[svn] Fix a small bug that would crash the program if an archive was empty.
brett
parents:
1
diff
changeset
|
223 | 'application/x-msdos-program': ZipExtractor, |
1 | 224 | 'application/x-debian-package': DebExtractor, |
225 | 'application/x-redhat-package-manager': RPMExtractor, | |
226 | 'application/x-shar': None, | |
227 | 'application/x-cpio': CpioExtractor} | |
228 | ||
229 | def show_error(filename, message): | |
230 | print >>sys.stderr, "%s: %s" % (filename, message) | |
231 | ||
232 | def main(arguments): | |
233 | for filename in arguments: | |
234 | mimetype, encoding = mimetypes.guess_type(filename) | |
235 | try: | |
236 | handler = extractor_map[mimetype] | |
237 | except KeyError: | |
238 | show_error(filename, "doesn't look like an archive") | |
239 | continue | |
240 | extractor = handler(filename, mimetype, encoding) | |
241 | contents = extractor.check_contents() | |
242 | path = '.' | |
243 | if contents == BOMB: | |
244 | directory = extractor.basename() | |
245 | try: | |
246 | os.mkdir(directory) | |
247 | except OSError, error: | |
248 | show_error(filename, "could not create %s: %s" % | |
249 | (error.filename, error.strerror)) | |
250 | continue | |
251 | os.chdir(directory) | |
252 | path = '..' | |
253 | extractor.extract(path) | |
254 | if contents == BOMB: | |
255 | os.chdir('..') | |
256 | ||
257 | if __name__ == '__main__': | |
258 | main(sys.argv[1:]) |