Mon, 20 Mar 2023 11:40:59 +0000
Fix field name of implementation status in the JSON
0 | 1 | ''' |
2 | Download / prepare / process XMPP DOAP files for the software list | |
3 | Requires: Pillow, python-slugify | |
4 | ''' | |
5 | from typing import Any | |
6 | from typing import Optional | |
7 | from typing import Union | |
8 | ||
9 | import json | |
10 | import os | |
11 | import re | |
12 | import shutil | |
13 | from datetime import date | |
14 | from pathlib import Path | |
15 | from urllib.parse import urlparse | |
16 | ||
17 | from colorama import Fore | |
18 | from colorama import Style | |
19 | from defusedxml.ElementTree import parse | |
20 | from defusedxml.ElementTree import ParseError | |
21 | from PIL import Image | |
22 | from PIL import UnidentifiedImageError | |
23 | from PIL.Image import Resampling | |
24 | from slugify import slugify | |
25 | ||
26 | from util import download_file | |
27 | from util import initialize_directory | |
28 | ||
29 | SOFTWARE_PATH = Path('content/software') | |
30 | DATA_PATH = Path('data') | |
31 | DOWNLOAD_PATH = Path('downloads') | |
32 | STATIC_PATH = Path('static') | |
33 | STATIC_DOAP_PATH = STATIC_PATH / 'doap' | |
34 | LOGOS_PATH = STATIC_PATH / 'images' / 'packages' | |
35 | ||
36 | DOAP_NS = 'http://usefulinc.com/ns/doap#' | |
37 | XMPP_NS = 'https://linkmauve.fr/ns/xmpp-doap#' | |
38 | SCHEMA_NS = 'https://schema.org/' | |
39 | RDF_RESOURCE = '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource' | |
40 | DOAP_NAME = f'.//{{{DOAP_NS}}}name' | |
41 | DOAP_SHORTDESC = f'.//{{{DOAP_NS}}}shortdesc' | |
42 | DOAP_HOMEPAGE = f'.//{{{DOAP_NS}}}homepage' | |
43 | DOAP_OS = f'.//{{{DOAP_NS}}}os' | |
44 | DOAP_PROGRAMMING_LANGUAGE = f'.//{{{DOAP_NS}}}programming-language' | |
45 | DOAP_LOGO = f'.//{{{SCHEMA_NS}}}logo' | |
46 | DOAP_IMPLEMENTS = f'.//{{{DOAP_NS}}}implements' | |
47 | DOAP_SUPPORTED_XEP = f'.//{{{XMPP_NS}}}SupportedXep' | |
48 | DOAP_XEP_NUMBER = f'.//{{{XMPP_NS}}}xep' | |
49 | DOAP_XEP_VERSION = f'.//{{{XMPP_NS}}}version' | |
50 | DOAP_XEP_STATUS = f'.//{{{XMPP_NS}}}status' | |
51 | ||
52 | RFC_REGEX = r'rfc\d{1,4}' | |
53 | XEP_REGEX = r'xep-\d{1,4}' | |
54 | ||
55 | XML_DECLARATION = '<?xml version=\"1.0\" encoding=\"UTF-8\"?>' | |
56 | XMPP_XSL = '<?xml-stylesheet href=\"/doap/xmpp-style.xsl\" type=\"text/xsl\"?>' | |
57 | ||
58 | MD_FRONTMATTER = '''--- | |
59 | title: "%(title)s" | |
60 | date: %(date)s | |
61 | layout: packages | |
62 | aliases: | |
63 | - "/software/%(type)s/%(name_slug)s" | |
64 | --- | |
65 | ||
66 | {{< package-details name_slug="%(name_slug)s" package_type="%(type)s" >}} | |
67 | ''' | |
68 | ||
69 | SOFTWARE_CATEGORIES: list[str] = [ | |
70 | 'client', | |
71 | 'component', | |
72 | 'library', | |
73 | 'server', | |
74 | 'tool', | |
75 | ] | |
76 | PLATFORMS: list[str] = [ | |
77 | 'Android', | |
78 | 'iOS', | |
79 | 'Browser', | |
80 | 'Windows', | |
81 | 'macOS', | |
82 | 'Linux', | |
83 | ] | |
84 | ||
85 | ||
86 | def parse_doap_infos(doap_file: str | |
87 | ) -> Optional[dict[str, Union[str, list[str], list[dict[str, str]], None]]]: | |
88 | ''' | |
89 | Parse DOAP file and return infos | |
90 | ''' | |
91 | try: | |
92 | doap = parse( | |
93 | DOWNLOAD_PATH / f'doap_files/{doap_file}.doap') | |
94 | except (FileNotFoundError, ParseError) as err: | |
95 | print('Error while trying to parse DOAP file:', doap_file, err) | |
96 | return None | |
97 | ||
98 | info: dict[str, Union[str, list[str], list[dict[str, str]], None]] = {} | |
99 | ||
100 | info['name'] = None | |
101 | doap_name = doap.find(DOAP_NAME) | |
102 | if doap_name is not None: | |
103 | info['name'] = doap_name.text | |
104 | ||
105 | info['homepage'] = None | |
106 | doap_homepage = doap.find(DOAP_HOMEPAGE) | |
107 | if doap_homepage is not None: | |
108 | info['homepage'] = doap_homepage.attrib.get(RDF_RESOURCE) | |
109 | ||
110 | info['shortdesc'] = None | |
111 | doap_shortdesc = doap.find(DOAP_SHORTDESC) | |
112 | if doap_shortdesc is not None: | |
113 | info['shortdesc'] = doap_shortdesc.text | |
114 | ||
115 | info['platforms'] = [] | |
116 | for entry in doap.findall(DOAP_OS): | |
117 | info['platforms'].append(entry.text) | |
118 | ||
119 | info['programming_lang'] = [] | |
120 | for entry in doap.findall(DOAP_PROGRAMMING_LANGUAGE): | |
121 | info['programming_lang'].append(entry.text) | |
122 | ||
123 | info['logo'] = None | |
124 | doap_logo = doap.find(DOAP_LOGO) | |
125 | if doap_logo is not None: | |
126 | info['logo'] = doap_logo.attrib.get(RDF_RESOURCE) | |
127 | ||
128 | rfcs: list[str] = [] | |
129 | xeps: list[dict[str, str]] = [] | |
130 | for entry in doap.findall(DOAP_IMPLEMENTS): | |
131 | rfc = entry.attrib.get(RDF_RESOURCE) | |
132 | if rfc is not None: | |
133 | match = re.search(RFC_REGEX, rfc) | |
134 | if match: | |
135 | rfcs.append(match.group()[3:]) | |
136 | ||
137 | supported_xep = entry.find(DOAP_SUPPORTED_XEP) | |
138 | if supported_xep is not None: | |
139 | number = supported_xep.find(DOAP_XEP_NUMBER) | |
140 | if number is not None: | |
141 | number = number.attrib.get(RDF_RESOURCE) | |
142 | match = re.search(XEP_REGEX, number or '') | |
143 | if match: | |
144 | number = match.group()[4:] | |
145 | ||
146 | version = supported_xep.find(DOAP_XEP_VERSION) | |
147 | if version is not None: | |
148 | version = version.text | |
149 | ||
150 | status = supported_xep.find(DOAP_XEP_STATUS) | |
151 | if status is not None: | |
152 | status = status.text | |
153 | ||
154 | xeps.append({ | |
155 | 'ref': supported_xep.find(DOAP_XEP_NUMBER).text, | |
156 | 'number': number, | |
157 | 'version': version, | |
158 | 'status': status, | |
159 | }) | |
160 | ||
161 | info['rfcs'] = rfcs | |
162 | info['xeps'] = xeps | |
163 | ||
164 | return info | |
165 | ||
166 | ||
167 | def check_image_file(file_path: Path, extension: str) -> bool: | |
168 | ''' | |
169 | Check if file size is greater than 300 KiB and if so, resize image | |
170 | Returns success | |
171 | ''' | |
172 | if extension == 'svg': | |
173 | # No need to resize SVG files | |
174 | return True | |
175 | ||
176 | try: | |
177 | file_size = os.path.getsize(file_path) | |
178 | except OSError as error: | |
179 | print('An error occurred while trying to open logo:', error) | |
180 | return False | |
181 | ||
182 | if file_size <= 300000: | |
183 | # Small enough, no need to resize image | |
184 | return True | |
185 | ||
186 | try: | |
187 | with Image.open(file_path) as img: | |
188 | width, height = img.size | |
189 | new_width = 400 | |
190 | new_height = int(new_width * height / width) | |
191 | img = img.resize( | |
192 | (new_width, new_height), Resampling.LANCZOS) | |
193 | img.save(file_path) | |
194 | print(f' Logo at {file_path} ' | |
195 | f'(file size: {file_size / (1<<10):,.0f} KB) ' | |
196 | f'too big, had to be resized') | |
197 | except (ValueError, OSError, UnidentifiedImageError) as error: | |
198 | print('An error occurred while trying to resize logo:', error) | |
199 | return False | |
200 | ||
201 | return True | |
202 | ||
203 | ||
204 | def process_logo(package_name: str, uri: str) -> Optional[str]: | |
205 | ''' | |
206 | Download package logo and return logo URI | |
207 | ''' | |
208 | image_url = urlparse(uri) | |
209 | _, extension = os.path.splitext(image_url.path) | |
210 | file_name = f'{package_name}{extension}' | |
211 | success = download_file( | |
212 | uri, | |
213 | Path(file_name)) | |
214 | if not success: | |
215 | return None | |
216 | ||
217 | success = check_image_file( | |
218 | DOWNLOAD_PATH / file_name, extension[1:].lower()) | |
219 | if not success: | |
220 | return None | |
221 | logo_uri = f'/images/packages/{package_name}{extension}' | |
222 | shutil.copyfile( | |
223 | DOWNLOAD_PATH / file_name, | |
224 | Path(LOGOS_PATH / file_name)) | |
225 | return logo_uri | |
226 | ||
227 | ||
228 | def prepare_package_data() -> None: | |
229 | ''' | |
230 | Download and prepare package data (software.json) for | |
231 | rendering with Hugo | |
232 | ''' | |
233 | for category in SOFTWARE_CATEGORIES: | |
234 | if category == 'library': | |
235 | category = 'libraries' | |
236 | else: | |
237 | category = f'{category}s' | |
238 | ||
239 | shutil.copy(SOFTWARE_PATH / '_index.md', | |
240 | DOWNLOAD_PATH / 'software_index.md') | |
241 | shutil.copy(SOFTWARE_PATH / 'software-comparison.md', | |
242 | DOWNLOAD_PATH / 'software-comparison.md') | |
243 | initialize_directory(SOFTWARE_PATH) | |
244 | shutil.copy(DOWNLOAD_PATH / 'software_index.md', | |
245 | SOFTWARE_PATH / '_index.md') | |
246 | shutil.copy(DOWNLOAD_PATH / 'software-comparison.md', | |
247 | SOFTWARE_PATH / 'software-comparison.md') | |
248 | ||
249 | with open(DATA_PATH / 'software.json', 'rb') as json_file: | |
250 | xsf_package_list = json.load(json_file) | |
251 | ||
252 | package_infos: dict[str, Any] = {} | |
253 | ||
254 | number_of_doap_packages = 0 | |
255 | ||
256 | for package in xsf_package_list: | |
257 | if package['doap'] is None: | |
258 | print(f'{Fore.YELLOW}DOAP n/a' | |
259 | f'{Style.RESET_ALL} ', | |
260 | package['name']) | |
261 | continue | |
262 | ||
263 | # DOAP is available | |
264 | number_of_doap_packages += 1 | |
265 | package_name_slug = slugify( | |
266 | package['name'], | |
267 | replacements=[['+', 'plus']]) | |
268 | ||
269 | doap_url = package['doap'] | |
270 | if doap_url.startswith('/hosted-doap'): | |
271 | # DOAP file is hosted at xmpp.org | |
272 | print(f'{Fore.LIGHTCYAN_EX}DOAP by xmpp.org' | |
273 | f'{Style.RESET_ALL} ', | |
274 | package['name']) | |
275 | shutil.copyfile( | |
276 | f'{STATIC_PATH}{doap_url}', | |
277 | Path(f'{DOWNLOAD_PATH}/doap_files/{package_name_slug}.doap')) | |
278 | else: | |
279 | print(f'{Fore.LIGHTBLUE_EX}DOAP by vendor' | |
280 | f'{Style.RESET_ALL} ', | |
281 | package['name']) | |
282 | download_file( | |
283 | package['doap'], | |
284 | Path(f'doap_files/{package_name_slug}.doap')) | |
285 | ||
286 | parsed_package_infos = parse_doap_infos(package_name_slug) | |
287 | if parsed_package_infos is None: | |
288 | continue | |
289 | ||
290 | logo_uri = None | |
291 | logo = parsed_package_infos['logo'] | |
292 | if logo is not None and isinstance(logo, str): | |
293 | logo_uri = process_logo( | |
294 | package_name_slug, logo) | |
295 | ||
296 | package_infos[package['name']] = { | |
297 | 'categories': package['categories'], | |
298 | 'name_slug': package_name_slug, | |
299 | 'homepage': parsed_package_infos['homepage'], | |
300 | 'logo': logo_uri, | |
301 | 'shortdesc': parsed_package_infos['shortdesc'], | |
302 | 'platforms': parsed_package_infos['platforms'], | |
303 | 'programming_lang': parsed_package_infos['programming_lang'], | |
304 | 'rfcs': parsed_package_infos['rfcs'], | |
305 | 'xeps': parsed_package_infos['xeps'], | |
306 | } | |
307 | ||
308 | for category in package['categories']: | |
309 | if category == 'library': | |
310 | category = 'libraries' | |
311 | else: | |
312 | category = f'{category}s' | |
313 | create_package_page(category, package_name_slug, package['name']) | |
314 | ||
315 | print(f'Number of packages:\n' | |
316 | f'total: {len(xsf_package_list)} ' | |
317 | f'(with DOAP: {number_of_doap_packages}), ' | |
318 | f'\n{42 * "="}') | |
319 | with open(DATA_PATH / 'software_list_doap.json', | |
320 | 'w', | |
321 | encoding='utf-8') as package_data_file: | |
322 | json.dump(package_infos, package_data_file, indent=4) | |
323 | ||
324 | ||
325 | def add_doap_data_to_xeplist() -> None: | |
326 | with open(DATA_PATH / 'software_list_doap.json') as software_list: | |
327 | software_data = json.load(software_list) | |
328 | with open(DATA_PATH / 'xeplist.json') as xep_list: | |
329 | xep_data = json.load(xep_list) | |
330 | ||
331 | for xep in xep_data: | |
332 | xep['implementations'] = [] | |
333 | for name, package_data in software_data.items(): | |
334 | if not package_data['xeps']: | |
335 | continue | |
336 | for supported_xep in package_data['xeps']: | |
337 | if xep['number'] is not None and (supported_xep['number'] == f'{xep["number"]:04d}') \ | |
338 | or supported_xep["ref"] == xep["url"]: | |
339 | xep['implementations'].append({ | |
340 | 'package_name': name, | |
341 | 'package_name_slug': package_data['name_slug'], | |
342 | 'package_categories': package_data['categories'], | |
343 | 'implemented_version': supported_xep['version'], | |
344 | 'implementation_status': supported_xep['status'] | |
345 | }) | |
346 | break | |
347 | ||
348 | with open(DATA_PATH / 'xeplist.json', | |
349 | 'w', | |
350 | encoding='utf-8') as xep_list: | |
351 | json.dump(xep_data, xep_list, indent=4) | |
352 | ||
353 | def create_package_page(package_type: str, name_slug: str, name: str) -> None: | |
354 | ''' | |
355 | Create an .md page for package, containing a shortcode | |
356 | for displaying package details | |
357 | ''' | |
358 | today = date.today() | |
359 | date_formatted = today.strftime('%Y-%m-%d') | |
360 | with open(SOFTWARE_PATH / f'{name_slug}.md', | |
361 | 'w', | |
362 | encoding='utf8') as md_file: | |
363 | md_file.write( | |
364 | MD_FRONTMATTER % { | |
365 | 'title': f'XMPP {package_type.capitalize()}: {name}', | |
366 | 'date': date_formatted, | |
367 | 'type': package_type, | |
368 | 'name_slug': name_slug, | |
369 | } | |
370 | ) | |
371 | ||
372 | ||
373 | def prepare_doap_files() -> None: | |
374 | ''' | |
375 | Copy DOAP files to /static/doap/ and replace the | |
376 | xml-stylesheet with our stylesheet (or add it, if there is none) | |
377 | ''' | |
378 | for entry in os.scandir(DOWNLOAD_PATH / 'doap_files'): | |
379 | shutil.copy(DOWNLOAD_PATH / 'doap_files' / entry.name, | |
380 | STATIC_DOAP_PATH / entry.name) | |
381 | ||
382 | for entry in os.scandir(STATIC_PATH / 'hosted-doap'): | |
383 | shutil.copy(STATIC_PATH / 'hosted-doap' / entry.name, | |
384 | STATIC_DOAP_PATH / entry.name) | |
385 | ||
386 | xml_declaration_pattern = r'<\?xml version.+?\?>' | |
387 | stylesheet_pattern = r'<\?xml-stylesheet.+?\?>' | |
388 | for entry in os.scandir(STATIC_DOAP_PATH): | |
389 | if not entry.name.endswith('.doap'): | |
390 | continue | |
391 | ||
392 | with open(STATIC_DOAP_PATH / entry.name, | |
393 | 'r+', | |
394 | encoding='utf-8') as doap_file: | |
395 | content = doap_file.read() | |
396 | ||
397 | result = re.sub( | |
398 | stylesheet_pattern, | |
399 | XMPP_XSL, | |
400 | content, | |
401 | 0, | |
402 | re.MULTILINE) | |
403 | if result != content: | |
404 | # Replaced custom stylesheet with our stylesheet | |
405 | doap_file.truncate(0) | |
406 | doap_file.seek(0) | |
407 | doap_file.write(result) | |
408 | continue | |
409 | ||
410 | # No custom stylesheet found | |
411 | result = re.sub( | |
412 | xml_declaration_pattern, | |
413 | f'{XML_DECLARATION}\n{XMPP_XSL}', | |
414 | content, | |
415 | 0, | |
416 | re.MULTILINE) | |
417 | if result != content: | |
418 | # Added our stylesheet | |
419 | doap_file.truncate(0) | |
420 | doap_file.seek(0) | |
421 | doap_file.write(result) | |
422 | else: | |
423 | print('WARNING: Could not alter XML header of', entry.name) | |
424 | # Remove content entirely, since we can't | |
425 | # control what would be rendered | |
426 | doap_file.truncate(0) | |
427 | ||
428 | ||
429 | if __name__ == '__main__': | |
430 | initialize_directory(DOWNLOAD_PATH) | |
431 | initialize_directory(LOGOS_PATH) | |
432 | Path(DOWNLOAD_PATH / 'doap_files').mkdir(parents=True) | |
433 | ||
434 | prepare_package_data() | |
435 | add_doap_data_to_xeplist() | |
436 | ||
437 | initialize_directory(STATIC_DOAP_PATH) | |
438 | prepare_doap_files() |