|
1 ''' |
|
2 Download / prepare / process XMPP DOAP files for the software list |
|
3 Requires: Pillow, python-slugify |
|
4 ''' |
|
5 from typing import Any |
|
6 from typing import Optional |
|
7 from typing import Union |
|
8 |
|
9 import json |
|
10 import os |
|
11 import re |
|
12 import shutil |
|
13 from datetime import date |
|
14 from pathlib import Path |
|
15 from urllib.parse import urlparse |
|
16 |
|
17 from colorama import Fore |
|
18 from colorama import Style |
|
19 from defusedxml.ElementTree import parse |
|
20 from defusedxml.ElementTree import ParseError |
|
21 from PIL import Image |
|
22 from PIL import UnidentifiedImageError |
|
23 from PIL.Image import Resampling |
|
24 from slugify import slugify |
|
25 |
|
26 from util import download_file |
|
27 from util import initialize_directory |
|
28 |
|
29 SOFTWARE_PATH = Path('content/software') |
|
30 DATA_PATH = Path('data') |
|
31 DOWNLOAD_PATH = Path('downloads') |
|
32 STATIC_PATH = Path('static') |
|
33 STATIC_DOAP_PATH = STATIC_PATH / 'doap' |
|
34 LOGOS_PATH = STATIC_PATH / 'images' / 'packages' |
|
35 |
|
36 DOAP_NS = 'http://usefulinc.com/ns/doap#' |
|
37 XMPP_NS = 'https://linkmauve.fr/ns/xmpp-doap#' |
|
38 SCHEMA_NS = 'https://schema.org/' |
|
39 RDF_RESOURCE = '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource' |
|
40 DOAP_NAME = f'.//{{{DOAP_NS}}}name' |
|
41 DOAP_SHORTDESC = f'.//{{{DOAP_NS}}}shortdesc' |
|
42 DOAP_HOMEPAGE = f'.//{{{DOAP_NS}}}homepage' |
|
43 DOAP_OS = f'.//{{{DOAP_NS}}}os' |
|
44 DOAP_PROGRAMMING_LANGUAGE = f'.//{{{DOAP_NS}}}programming-language' |
|
45 DOAP_LOGO = f'.//{{{SCHEMA_NS}}}logo' |
|
46 DOAP_IMPLEMENTS = f'.//{{{DOAP_NS}}}implements' |
|
47 DOAP_SUPPORTED_XEP = f'.//{{{XMPP_NS}}}SupportedXep' |
|
48 DOAP_XEP_NUMBER = f'.//{{{XMPP_NS}}}xep' |
|
49 DOAP_XEP_VERSION = f'.//{{{XMPP_NS}}}version' |
|
50 DOAP_XEP_STATUS = f'.//{{{XMPP_NS}}}status' |
|
51 |
|
52 RFC_REGEX = r'rfc\d{1,4}' |
|
53 XEP_REGEX = r'xep-\d{1,4}' |
|
54 |
|
55 XML_DECLARATION = '<?xml version=\"1.0\" encoding=\"UTF-8\"?>' |
|
56 XMPP_XSL = '<?xml-stylesheet href=\"/doap/xmpp-style.xsl\" type=\"text/xsl\"?>' |
|
57 |
|
58 MD_FRONTMATTER = '''--- |
|
59 title: "%(title)s" |
|
60 date: %(date)s |
|
61 layout: packages |
|
62 aliases: |
|
63 - "/software/%(type)s/%(name_slug)s" |
|
64 --- |
|
65 |
|
66 {{< package-details name_slug="%(name_slug)s" package_type="%(type)s" >}} |
|
67 ''' |
|
68 |
|
69 SOFTWARE_CATEGORIES: list[str] = [ |
|
70 'client', |
|
71 'component', |
|
72 'library', |
|
73 'server', |
|
74 'tool', |
|
75 ] |
|
76 PLATFORMS: list[str] = [ |
|
77 'Android', |
|
78 'iOS', |
|
79 'Browser', |
|
80 'Windows', |
|
81 'macOS', |
|
82 'Linux', |
|
83 ] |
|
84 |
|
85 |
|
86 def parse_doap_infos(doap_file: str |
|
87 ) -> Optional[dict[str, Union[str, list[str], list[dict[str, str]], None]]]: |
|
88 ''' |
|
89 Parse DOAP file and return infos |
|
90 ''' |
|
91 try: |
|
92 doap = parse( |
|
93 DOWNLOAD_PATH / f'doap_files/{doap_file}.doap') |
|
94 except (FileNotFoundError, ParseError) as err: |
|
95 print('Error while trying to parse DOAP file:', doap_file, err) |
|
96 return None |
|
97 |
|
98 info: dict[str, Union[str, list[str], list[dict[str, str]], None]] = {} |
|
99 |
|
100 info['name'] = None |
|
101 doap_name = doap.find(DOAP_NAME) |
|
102 if doap_name is not None: |
|
103 info['name'] = doap_name.text |
|
104 |
|
105 info['homepage'] = None |
|
106 doap_homepage = doap.find(DOAP_HOMEPAGE) |
|
107 if doap_homepage is not None: |
|
108 info['homepage'] = doap_homepage.attrib.get(RDF_RESOURCE) |
|
109 |
|
110 info['shortdesc'] = None |
|
111 doap_shortdesc = doap.find(DOAP_SHORTDESC) |
|
112 if doap_shortdesc is not None: |
|
113 info['shortdesc'] = doap_shortdesc.text |
|
114 |
|
115 info['platforms'] = [] |
|
116 for entry in doap.findall(DOAP_OS): |
|
117 info['platforms'].append(entry.text) |
|
118 |
|
119 info['programming_lang'] = [] |
|
120 for entry in doap.findall(DOAP_PROGRAMMING_LANGUAGE): |
|
121 info['programming_lang'].append(entry.text) |
|
122 |
|
123 info['logo'] = None |
|
124 doap_logo = doap.find(DOAP_LOGO) |
|
125 if doap_logo is not None: |
|
126 info['logo'] = doap_logo.attrib.get(RDF_RESOURCE) |
|
127 |
|
128 rfcs: list[str] = [] |
|
129 xeps: list[dict[str, str]] = [] |
|
130 for entry in doap.findall(DOAP_IMPLEMENTS): |
|
131 rfc = entry.attrib.get(RDF_RESOURCE) |
|
132 if rfc is not None: |
|
133 match = re.search(RFC_REGEX, rfc) |
|
134 if match: |
|
135 rfcs.append(match.group()[3:]) |
|
136 |
|
137 supported_xep = entry.find(DOAP_SUPPORTED_XEP) |
|
138 if supported_xep is not None: |
|
139 number = supported_xep.find(DOAP_XEP_NUMBER) |
|
140 if number is not None: |
|
141 number = number.attrib.get(RDF_RESOURCE) |
|
142 match = re.search(XEP_REGEX, number or '') |
|
143 if match: |
|
144 number = match.group()[4:] |
|
145 |
|
146 version = supported_xep.find(DOAP_XEP_VERSION) |
|
147 if version is not None: |
|
148 version = version.text |
|
149 |
|
150 status = supported_xep.find(DOAP_XEP_STATUS) |
|
151 if status is not None: |
|
152 status = status.text |
|
153 |
|
154 xeps.append({ |
|
155 'ref': supported_xep.find(DOAP_XEP_NUMBER).text, |
|
156 'number': number, |
|
157 'version': version, |
|
158 'status': status, |
|
159 }) |
|
160 |
|
161 info['rfcs'] = rfcs |
|
162 info['xeps'] = xeps |
|
163 |
|
164 return info |
|
165 |
|
166 |
|
167 def check_image_file(file_path: Path, extension: str) -> bool: |
|
168 ''' |
|
169 Check if file size is greater than 300 KiB and if so, resize image |
|
170 Returns success |
|
171 ''' |
|
172 if extension == 'svg': |
|
173 # No need to resize SVG files |
|
174 return True |
|
175 |
|
176 try: |
|
177 file_size = os.path.getsize(file_path) |
|
178 except OSError as error: |
|
179 print('An error occurred while trying to open logo:', error) |
|
180 return False |
|
181 |
|
182 if file_size <= 300000: |
|
183 # Small enough, no need to resize image |
|
184 return True |
|
185 |
|
186 try: |
|
187 with Image.open(file_path) as img: |
|
188 width, height = img.size |
|
189 new_width = 400 |
|
190 new_height = int(new_width * height / width) |
|
191 img = img.resize( |
|
192 (new_width, new_height), Resampling.LANCZOS) |
|
193 img.save(file_path) |
|
194 print(f' Logo at {file_path} ' |
|
195 f'(file size: {file_size / (1<<10):,.0f} KB) ' |
|
196 f'too big, had to be resized') |
|
197 except (ValueError, OSError, UnidentifiedImageError) as error: |
|
198 print('An error occurred while trying to resize logo:', error) |
|
199 return False |
|
200 |
|
201 return True |
|
202 |
|
203 |
|
204 def process_logo(package_name: str, uri: str) -> Optional[str]: |
|
205 ''' |
|
206 Download package logo and return logo URI |
|
207 ''' |
|
208 image_url = urlparse(uri) |
|
209 _, extension = os.path.splitext(image_url.path) |
|
210 file_name = f'{package_name}{extension}' |
|
211 success = download_file( |
|
212 uri, |
|
213 Path(file_name)) |
|
214 if not success: |
|
215 return None |
|
216 |
|
217 success = check_image_file( |
|
218 DOWNLOAD_PATH / file_name, extension[1:].lower()) |
|
219 if not success: |
|
220 return None |
|
221 logo_uri = f'/images/packages/{package_name}{extension}' |
|
222 shutil.copyfile( |
|
223 DOWNLOAD_PATH / file_name, |
|
224 Path(LOGOS_PATH / file_name)) |
|
225 return logo_uri |
|
226 |
|
227 |
|
228 def prepare_package_data() -> None: |
|
229 ''' |
|
230 Download and prepare package data (software.json) for |
|
231 rendering with Hugo |
|
232 ''' |
|
233 for category in SOFTWARE_CATEGORIES: |
|
234 if category == 'library': |
|
235 category = 'libraries' |
|
236 else: |
|
237 category = f'{category}s' |
|
238 |
|
239 shutil.copy(SOFTWARE_PATH / '_index.md', |
|
240 DOWNLOAD_PATH / 'software_index.md') |
|
241 shutil.copy(SOFTWARE_PATH / 'software-comparison.md', |
|
242 DOWNLOAD_PATH / 'software-comparison.md') |
|
243 initialize_directory(SOFTWARE_PATH) |
|
244 shutil.copy(DOWNLOAD_PATH / 'software_index.md', |
|
245 SOFTWARE_PATH / '_index.md') |
|
246 shutil.copy(DOWNLOAD_PATH / 'software-comparison.md', |
|
247 SOFTWARE_PATH / 'software-comparison.md') |
|
248 |
|
249 with open(DATA_PATH / 'software.json', 'rb') as json_file: |
|
250 xsf_package_list = json.load(json_file) |
|
251 |
|
252 package_infos: dict[str, Any] = {} |
|
253 |
|
254 number_of_doap_packages = 0 |
|
255 |
|
256 for package in xsf_package_list: |
|
257 if package['doap'] is None: |
|
258 print(f'{Fore.YELLOW}DOAP n/a' |
|
259 f'{Style.RESET_ALL} ', |
|
260 package['name']) |
|
261 continue |
|
262 |
|
263 # DOAP is available |
|
264 number_of_doap_packages += 1 |
|
265 package_name_slug = slugify( |
|
266 package['name'], |
|
267 replacements=[['+', 'plus']]) |
|
268 |
|
269 doap_url = package['doap'] |
|
270 if doap_url.startswith('/hosted-doap'): |
|
271 # DOAP file is hosted at xmpp.org |
|
272 print(f'{Fore.LIGHTCYAN_EX}DOAP by xmpp.org' |
|
273 f'{Style.RESET_ALL} ', |
|
274 package['name']) |
|
275 shutil.copyfile( |
|
276 f'{STATIC_PATH}{doap_url}', |
|
277 Path(f'{DOWNLOAD_PATH}/doap_files/{package_name_slug}.doap')) |
|
278 else: |
|
279 print(f'{Fore.LIGHTBLUE_EX}DOAP by vendor' |
|
280 f'{Style.RESET_ALL} ', |
|
281 package['name']) |
|
282 download_file( |
|
283 package['doap'], |
|
284 Path(f'doap_files/{package_name_slug}.doap')) |
|
285 |
|
286 parsed_package_infos = parse_doap_infos(package_name_slug) |
|
287 if parsed_package_infos is None: |
|
288 continue |
|
289 |
|
290 logo_uri = None |
|
291 logo = parsed_package_infos['logo'] |
|
292 if logo is not None and isinstance(logo, str): |
|
293 logo_uri = process_logo( |
|
294 package_name_slug, logo) |
|
295 |
|
296 package_infos[package['name']] = { |
|
297 'categories': package['categories'], |
|
298 'name_slug': package_name_slug, |
|
299 'homepage': parsed_package_infos['homepage'], |
|
300 'logo': logo_uri, |
|
301 'shortdesc': parsed_package_infos['shortdesc'], |
|
302 'platforms': parsed_package_infos['platforms'], |
|
303 'programming_lang': parsed_package_infos['programming_lang'], |
|
304 'rfcs': parsed_package_infos['rfcs'], |
|
305 'xeps': parsed_package_infos['xeps'], |
|
306 } |
|
307 |
|
308 for category in package['categories']: |
|
309 if category == 'library': |
|
310 category = 'libraries' |
|
311 else: |
|
312 category = f'{category}s' |
|
313 create_package_page(category, package_name_slug, package['name']) |
|
314 |
|
315 print(f'Number of packages:\n' |
|
316 f'total: {len(xsf_package_list)} ' |
|
317 f'(with DOAP: {number_of_doap_packages}), ' |
|
318 f'\n{42 * "="}') |
|
319 with open(DATA_PATH / 'software_list_doap.json', |
|
320 'w', |
|
321 encoding='utf-8') as package_data_file: |
|
322 json.dump(package_infos, package_data_file, indent=4) |
|
323 |
|
324 |
|
325 def add_doap_data_to_xeplist() -> None: |
|
326 with open(DATA_PATH / 'software_list_doap.json') as software_list: |
|
327 software_data = json.load(software_list) |
|
328 with open(DATA_PATH / 'xeplist.json') as xep_list: |
|
329 xep_data = json.load(xep_list) |
|
330 |
|
331 for xep in xep_data: |
|
332 xep['implementations'] = [] |
|
333 for name, package_data in software_data.items(): |
|
334 if not package_data['xeps']: |
|
335 continue |
|
336 for supported_xep in package_data['xeps']: |
|
337 if xep['number'] is not None and (supported_xep['number'] == f'{xep["number"]:04d}') \ |
|
338 or supported_xep["ref"] == xep["url"]: |
|
339 xep['implementations'].append({ |
|
340 'package_name': name, |
|
341 'package_name_slug': package_data['name_slug'], |
|
342 'package_categories': package_data['categories'], |
|
343 'implemented_version': supported_xep['version'], |
|
344 'implementation_status': supported_xep['status'] |
|
345 }) |
|
346 break |
|
347 |
|
348 with open(DATA_PATH / 'xeplist.json', |
|
349 'w', |
|
350 encoding='utf-8') as xep_list: |
|
351 json.dump(xep_data, xep_list, indent=4) |
|
352 |
|
353 def create_package_page(package_type: str, name_slug: str, name: str) -> None: |
|
354 ''' |
|
355 Create an .md page for package, containing a shortcode |
|
356 for displaying package details |
|
357 ''' |
|
358 today = date.today() |
|
359 date_formatted = today.strftime('%Y-%m-%d') |
|
360 with open(SOFTWARE_PATH / f'{name_slug}.md', |
|
361 'w', |
|
362 encoding='utf8') as md_file: |
|
363 md_file.write( |
|
364 MD_FRONTMATTER % { |
|
365 'title': f'XMPP {package_type.capitalize()}: {name}', |
|
366 'date': date_formatted, |
|
367 'type': package_type, |
|
368 'name_slug': name_slug, |
|
369 } |
|
370 ) |
|
371 |
|
372 |
|
373 def prepare_doap_files() -> None: |
|
374 ''' |
|
375 Copy DOAP files to /static/doap/ and replace the |
|
376 xml-stylesheet with our stylesheet (or add it, if there is none) |
|
377 ''' |
|
378 for entry in os.scandir(DOWNLOAD_PATH / 'doap_files'): |
|
379 shutil.copy(DOWNLOAD_PATH / 'doap_files' / entry.name, |
|
380 STATIC_DOAP_PATH / entry.name) |
|
381 |
|
382 for entry in os.scandir(STATIC_PATH / 'hosted-doap'): |
|
383 shutil.copy(STATIC_PATH / 'hosted-doap' / entry.name, |
|
384 STATIC_DOAP_PATH / entry.name) |
|
385 |
|
386 xml_declaration_pattern = r'<\?xml version.+?\?>' |
|
387 stylesheet_pattern = r'<\?xml-stylesheet.+?\?>' |
|
388 for entry in os.scandir(STATIC_DOAP_PATH): |
|
389 if not entry.name.endswith('.doap'): |
|
390 continue |
|
391 |
|
392 with open(STATIC_DOAP_PATH / entry.name, |
|
393 'r+', |
|
394 encoding='utf-8') as doap_file: |
|
395 content = doap_file.read() |
|
396 |
|
397 result = re.sub( |
|
398 stylesheet_pattern, |
|
399 XMPP_XSL, |
|
400 content, |
|
401 0, |
|
402 re.MULTILINE) |
|
403 if result != content: |
|
404 # Replaced custom stylesheet with our stylesheet |
|
405 doap_file.truncate(0) |
|
406 doap_file.seek(0) |
|
407 doap_file.write(result) |
|
408 continue |
|
409 |
|
410 # No custom stylesheet found |
|
411 result = re.sub( |
|
412 xml_declaration_pattern, |
|
413 f'{XML_DECLARATION}\n{XMPP_XSL}', |
|
414 content, |
|
415 0, |
|
416 re.MULTILINE) |
|
417 if result != content: |
|
418 # Added our stylesheet |
|
419 doap_file.truncate(0) |
|
420 doap_file.seek(0) |
|
421 doap_file.write(result) |
|
422 else: |
|
423 print('WARNING: Could not alter XML header of', entry.name) |
|
424 # Remove content entirely, since we can't |
|
425 # control what would be rendered |
|
426 doap_file.truncate(0) |
|
427 |
|
428 |
|
429 if __name__ == '__main__': |
|
430 initialize_directory(DOWNLOAD_PATH) |
|
431 initialize_directory(LOGOS_PATH) |
|
432 Path(DOWNLOAD_PATH / 'doap_files').mkdir(parents=True) |
|
433 |
|
434 prepare_package_data() |
|
435 add_doap_data_to_xeplist() |
|
436 |
|
437 initialize_directory(STATIC_DOAP_PATH) |
|
438 prepare_doap_files() |