prepare_software_list.py

changeset 0
8e1675826e46
equal deleted inserted replaced
-1:000000000000 0:8e1675826e46
1 '''
2 Download / prepare / process XMPP DOAP files for the software list
3 Requires: Pillow, python-slugify
4 '''
5 from typing import Any
6 from typing import Optional
7 from typing import Union
8
9 import json
10 import os
11 import re
12 import shutil
13 from datetime import date
14 from pathlib import Path
15 from urllib.parse import urlparse
16
17 from colorama import Fore
18 from colorama import Style
19 from defusedxml.ElementTree import parse
20 from defusedxml.ElementTree import ParseError
21 from PIL import Image
22 from PIL import UnidentifiedImageError
23 from PIL.Image import Resampling
24 from slugify import slugify
25
26 from util import download_file
27 from util import initialize_directory
28
29 SOFTWARE_PATH = Path('content/software')
30 DATA_PATH = Path('data')
31 DOWNLOAD_PATH = Path('downloads')
32 STATIC_PATH = Path('static')
33 STATIC_DOAP_PATH = STATIC_PATH / 'doap'
34 LOGOS_PATH = STATIC_PATH / 'images' / 'packages'
35
36 DOAP_NS = 'http://usefulinc.com/ns/doap#'
37 XMPP_NS = 'https://linkmauve.fr/ns/xmpp-doap#'
38 SCHEMA_NS = 'https://schema.org/'
39 RDF_RESOURCE = '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource'
40 DOAP_NAME = f'.//{{{DOAP_NS}}}name'
41 DOAP_SHORTDESC = f'.//{{{DOAP_NS}}}shortdesc'
42 DOAP_HOMEPAGE = f'.//{{{DOAP_NS}}}homepage'
43 DOAP_OS = f'.//{{{DOAP_NS}}}os'
44 DOAP_PROGRAMMING_LANGUAGE = f'.//{{{DOAP_NS}}}programming-language'
45 DOAP_LOGO = f'.//{{{SCHEMA_NS}}}logo'
46 DOAP_IMPLEMENTS = f'.//{{{DOAP_NS}}}implements'
47 DOAP_SUPPORTED_XEP = f'.//{{{XMPP_NS}}}SupportedXep'
48 DOAP_XEP_NUMBER = f'.//{{{XMPP_NS}}}xep'
49 DOAP_XEP_VERSION = f'.//{{{XMPP_NS}}}version'
50 DOAP_XEP_STATUS = f'.//{{{XMPP_NS}}}status'
51
52 RFC_REGEX = r'rfc\d{1,4}'
53 XEP_REGEX = r'xep-\d{1,4}'
54
55 XML_DECLARATION = '<?xml version=\"1.0\" encoding=\"UTF-8\"?>'
56 XMPP_XSL = '<?xml-stylesheet href=\"/doap/xmpp-style.xsl\" type=\"text/xsl\"?>'
57
58 MD_FRONTMATTER = '''---
59 title: "%(title)s"
60 date: %(date)s
61 layout: packages
62 aliases:
63 - "/software/%(type)s/%(name_slug)s"
64 ---
65
66 {{< package-details name_slug="%(name_slug)s" package_type="%(type)s" >}}
67 '''
68
69 SOFTWARE_CATEGORIES: list[str] = [
70 'client',
71 'component',
72 'library',
73 'server',
74 'tool',
75 ]
76 PLATFORMS: list[str] = [
77 'Android',
78 'iOS',
79 'Browser',
80 'Windows',
81 'macOS',
82 'Linux',
83 ]
84
85
86 def parse_doap_infos(doap_file: str
87 ) -> Optional[dict[str, Union[str, list[str], list[dict[str, str]], None]]]:
88 '''
89 Parse DOAP file and return infos
90 '''
91 try:
92 doap = parse(
93 DOWNLOAD_PATH / f'doap_files/{doap_file}.doap')
94 except (FileNotFoundError, ParseError) as err:
95 print('Error while trying to parse DOAP file:', doap_file, err)
96 return None
97
98 info: dict[str, Union[str, list[str], list[dict[str, str]], None]] = {}
99
100 info['name'] = None
101 doap_name = doap.find(DOAP_NAME)
102 if doap_name is not None:
103 info['name'] = doap_name.text
104
105 info['homepage'] = None
106 doap_homepage = doap.find(DOAP_HOMEPAGE)
107 if doap_homepage is not None:
108 info['homepage'] = doap_homepage.attrib.get(RDF_RESOURCE)
109
110 info['shortdesc'] = None
111 doap_shortdesc = doap.find(DOAP_SHORTDESC)
112 if doap_shortdesc is not None:
113 info['shortdesc'] = doap_shortdesc.text
114
115 info['platforms'] = []
116 for entry in doap.findall(DOAP_OS):
117 info['platforms'].append(entry.text)
118
119 info['programming_lang'] = []
120 for entry in doap.findall(DOAP_PROGRAMMING_LANGUAGE):
121 info['programming_lang'].append(entry.text)
122
123 info['logo'] = None
124 doap_logo = doap.find(DOAP_LOGO)
125 if doap_logo is not None:
126 info['logo'] = doap_logo.attrib.get(RDF_RESOURCE)
127
128 rfcs: list[str] = []
129 xeps: list[dict[str, str]] = []
130 for entry in doap.findall(DOAP_IMPLEMENTS):
131 rfc = entry.attrib.get(RDF_RESOURCE)
132 if rfc is not None:
133 match = re.search(RFC_REGEX, rfc)
134 if match:
135 rfcs.append(match.group()[3:])
136
137 supported_xep = entry.find(DOAP_SUPPORTED_XEP)
138 if supported_xep is not None:
139 number = supported_xep.find(DOAP_XEP_NUMBER)
140 if number is not None:
141 number = number.attrib.get(RDF_RESOURCE)
142 match = re.search(XEP_REGEX, number or '')
143 if match:
144 number = match.group()[4:]
145
146 version = supported_xep.find(DOAP_XEP_VERSION)
147 if version is not None:
148 version = version.text
149
150 status = supported_xep.find(DOAP_XEP_STATUS)
151 if status is not None:
152 status = status.text
153
154 xeps.append({
155 'ref': supported_xep.find(DOAP_XEP_NUMBER).text,
156 'number': number,
157 'version': version,
158 'status': status,
159 })
160
161 info['rfcs'] = rfcs
162 info['xeps'] = xeps
163
164 return info
165
166
167 def check_image_file(file_path: Path, extension: str) -> bool:
168 '''
169 Check if file size is greater than 300 KiB and if so, resize image
170 Returns success
171 '''
172 if extension == 'svg':
173 # No need to resize SVG files
174 return True
175
176 try:
177 file_size = os.path.getsize(file_path)
178 except OSError as error:
179 print('An error occurred while trying to open logo:', error)
180 return False
181
182 if file_size <= 300000:
183 # Small enough, no need to resize image
184 return True
185
186 try:
187 with Image.open(file_path) as img:
188 width, height = img.size
189 new_width = 400
190 new_height = int(new_width * height / width)
191 img = img.resize(
192 (new_width, new_height), Resampling.LANCZOS)
193 img.save(file_path)
194 print(f' Logo at {file_path} '
195 f'(file size: {file_size / (1<<10):,.0f} KB) '
196 f'too big, had to be resized')
197 except (ValueError, OSError, UnidentifiedImageError) as error:
198 print('An error occurred while trying to resize logo:', error)
199 return False
200
201 return True
202
203
204 def process_logo(package_name: str, uri: str) -> Optional[str]:
205 '''
206 Download package logo and return logo URI
207 '''
208 image_url = urlparse(uri)
209 _, extension = os.path.splitext(image_url.path)
210 file_name = f'{package_name}{extension}'
211 success = download_file(
212 uri,
213 Path(file_name))
214 if not success:
215 return None
216
217 success = check_image_file(
218 DOWNLOAD_PATH / file_name, extension[1:].lower())
219 if not success:
220 return None
221 logo_uri = f'/images/packages/{package_name}{extension}'
222 shutil.copyfile(
223 DOWNLOAD_PATH / file_name,
224 Path(LOGOS_PATH / file_name))
225 return logo_uri
226
227
228 def prepare_package_data() -> None:
229 '''
230 Download and prepare package data (software.json) for
231 rendering with Hugo
232 '''
233 for category in SOFTWARE_CATEGORIES:
234 if category == 'library':
235 category = 'libraries'
236 else:
237 category = f'{category}s'
238
239 shutil.copy(SOFTWARE_PATH / '_index.md',
240 DOWNLOAD_PATH / 'software_index.md')
241 shutil.copy(SOFTWARE_PATH / 'software-comparison.md',
242 DOWNLOAD_PATH / 'software-comparison.md')
243 initialize_directory(SOFTWARE_PATH)
244 shutil.copy(DOWNLOAD_PATH / 'software_index.md',
245 SOFTWARE_PATH / '_index.md')
246 shutil.copy(DOWNLOAD_PATH / 'software-comparison.md',
247 SOFTWARE_PATH / 'software-comparison.md')
248
249 with open(DATA_PATH / 'software.json', 'rb') as json_file:
250 xsf_package_list = json.load(json_file)
251
252 package_infos: dict[str, Any] = {}
253
254 number_of_doap_packages = 0
255
256 for package in xsf_package_list:
257 if package['doap'] is None:
258 print(f'{Fore.YELLOW}DOAP n/a'
259 f'{Style.RESET_ALL} ',
260 package['name'])
261 continue
262
263 # DOAP is available
264 number_of_doap_packages += 1
265 package_name_slug = slugify(
266 package['name'],
267 replacements=[['+', 'plus']])
268
269 doap_url = package['doap']
270 if doap_url.startswith('/hosted-doap'):
271 # DOAP file is hosted at xmpp.org
272 print(f'{Fore.LIGHTCYAN_EX}DOAP by xmpp.org'
273 f'{Style.RESET_ALL} ',
274 package['name'])
275 shutil.copyfile(
276 f'{STATIC_PATH}{doap_url}',
277 Path(f'{DOWNLOAD_PATH}/doap_files/{package_name_slug}.doap'))
278 else:
279 print(f'{Fore.LIGHTBLUE_EX}DOAP by vendor'
280 f'{Style.RESET_ALL} ',
281 package['name'])
282 download_file(
283 package['doap'],
284 Path(f'doap_files/{package_name_slug}.doap'))
285
286 parsed_package_infos = parse_doap_infos(package_name_slug)
287 if parsed_package_infos is None:
288 continue
289
290 logo_uri = None
291 logo = parsed_package_infos['logo']
292 if logo is not None and isinstance(logo, str):
293 logo_uri = process_logo(
294 package_name_slug, logo)
295
296 package_infos[package['name']] = {
297 'categories': package['categories'],
298 'name_slug': package_name_slug,
299 'homepage': parsed_package_infos['homepage'],
300 'logo': logo_uri,
301 'shortdesc': parsed_package_infos['shortdesc'],
302 'platforms': parsed_package_infos['platforms'],
303 'programming_lang': parsed_package_infos['programming_lang'],
304 'rfcs': parsed_package_infos['rfcs'],
305 'xeps': parsed_package_infos['xeps'],
306 }
307
308 for category in package['categories']:
309 if category == 'library':
310 category = 'libraries'
311 else:
312 category = f'{category}s'
313 create_package_page(category, package_name_slug, package['name'])
314
315 print(f'Number of packages:\n'
316 f'total: {len(xsf_package_list)} '
317 f'(with DOAP: {number_of_doap_packages}), '
318 f'\n{42 * "="}')
319 with open(DATA_PATH / 'software_list_doap.json',
320 'w',
321 encoding='utf-8') as package_data_file:
322 json.dump(package_infos, package_data_file, indent=4)
323
324
325 def add_doap_data_to_xeplist() -> None:
326 with open(DATA_PATH / 'software_list_doap.json') as software_list:
327 software_data = json.load(software_list)
328 with open(DATA_PATH / 'xeplist.json') as xep_list:
329 xep_data = json.load(xep_list)
330
331 for xep in xep_data:
332 xep['implementations'] = []
333 for name, package_data in software_data.items():
334 if not package_data['xeps']:
335 continue
336 for supported_xep in package_data['xeps']:
337 if xep['number'] is not None and (supported_xep['number'] == f'{xep["number"]:04d}') \
338 or supported_xep["ref"] == xep["url"]:
339 xep['implementations'].append({
340 'package_name': name,
341 'package_name_slug': package_data['name_slug'],
342 'package_categories': package_data['categories'],
343 'implemented_version': supported_xep['version'],
344 'implementation_status': supported_xep['status']
345 })
346 break
347
348 with open(DATA_PATH / 'xeplist.json',
349 'w',
350 encoding='utf-8') as xep_list:
351 json.dump(xep_data, xep_list, indent=4)
352
353 def create_package_page(package_type: str, name_slug: str, name: str) -> None:
354 '''
355 Create an .md page for package, containing a shortcode
356 for displaying package details
357 '''
358 today = date.today()
359 date_formatted = today.strftime('%Y-%m-%d')
360 with open(SOFTWARE_PATH / f'{name_slug}.md',
361 'w',
362 encoding='utf8') as md_file:
363 md_file.write(
364 MD_FRONTMATTER % {
365 'title': f'XMPP {package_type.capitalize()}: {name}',
366 'date': date_formatted,
367 'type': package_type,
368 'name_slug': name_slug,
369 }
370 )
371
372
373 def prepare_doap_files() -> None:
374 '''
375 Copy DOAP files to /static/doap/ and replace the
376 xml-stylesheet with our stylesheet (or add it, if there is none)
377 '''
378 for entry in os.scandir(DOWNLOAD_PATH / 'doap_files'):
379 shutil.copy(DOWNLOAD_PATH / 'doap_files' / entry.name,
380 STATIC_DOAP_PATH / entry.name)
381
382 for entry in os.scandir(STATIC_PATH / 'hosted-doap'):
383 shutil.copy(STATIC_PATH / 'hosted-doap' / entry.name,
384 STATIC_DOAP_PATH / entry.name)
385
386 xml_declaration_pattern = r'<\?xml version.+?\?>'
387 stylesheet_pattern = r'<\?xml-stylesheet.+?\?>'
388 for entry in os.scandir(STATIC_DOAP_PATH):
389 if not entry.name.endswith('.doap'):
390 continue
391
392 with open(STATIC_DOAP_PATH / entry.name,
393 'r+',
394 encoding='utf-8') as doap_file:
395 content = doap_file.read()
396
397 result = re.sub(
398 stylesheet_pattern,
399 XMPP_XSL,
400 content,
401 0,
402 re.MULTILINE)
403 if result != content:
404 # Replaced custom stylesheet with our stylesheet
405 doap_file.truncate(0)
406 doap_file.seek(0)
407 doap_file.write(result)
408 continue
409
410 # No custom stylesheet found
411 result = re.sub(
412 xml_declaration_pattern,
413 f'{XML_DECLARATION}\n{XMPP_XSL}',
414 content,
415 0,
416 re.MULTILINE)
417 if result != content:
418 # Added our stylesheet
419 doap_file.truncate(0)
420 doap_file.seek(0)
421 doap_file.write(result)
422 else:
423 print('WARNING: Could not alter XML header of', entry.name)
424 # Remove content entirely, since we can't
425 # control what would be rendered
426 doap_file.truncate(0)
427
428
429 if __name__ == '__main__':
430 initialize_directory(DOWNLOAD_PATH)
431 initialize_directory(LOGOS_PATH)
432 Path(DOWNLOAD_PATH / 'doap_files').mkdir(parents=True)
433
434 prepare_package_data()
435 add_doap_data_to_xeplist()
436
437 initialize_directory(STATIC_DOAP_PATH)
438 prepare_doap_files()

mercurial