--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/prepare_software_list.py Mon Mar 13 16:39:07 2023 +0000 @@ -0,0 +1,438 @@ +''' +Download / prepare / process XMPP DOAP files for the software list +Requires: Pillow, python-slugify +''' +from typing import Any +from typing import Optional +from typing import Union + +import json +import os +import re +import shutil +from datetime import date +from pathlib import Path +from urllib.parse import urlparse + +from colorama import Fore +from colorama import Style +from defusedxml.ElementTree import parse +from defusedxml.ElementTree import ParseError +from PIL import Image +from PIL import UnidentifiedImageError +from PIL.Image import Resampling +from slugify import slugify + +from util import download_file +from util import initialize_directory + +SOFTWARE_PATH = Path('content/software') +DATA_PATH = Path('data') +DOWNLOAD_PATH = Path('downloads') +STATIC_PATH = Path('static') +STATIC_DOAP_PATH = STATIC_PATH / 'doap' +LOGOS_PATH = STATIC_PATH / 'images' / 'packages' + +DOAP_NS = 'http://usefulinc.com/ns/doap#' +XMPP_NS = 'https://linkmauve.fr/ns/xmpp-doap#' +SCHEMA_NS = 'https://schema.org/' +RDF_RESOURCE = '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource' +DOAP_NAME = f'.//{{{DOAP_NS}}}name' +DOAP_SHORTDESC = f'.//{{{DOAP_NS}}}shortdesc' +DOAP_HOMEPAGE = f'.//{{{DOAP_NS}}}homepage' +DOAP_OS = f'.//{{{DOAP_NS}}}os' +DOAP_PROGRAMMING_LANGUAGE = f'.//{{{DOAP_NS}}}programming-language' +DOAP_LOGO = f'.//{{{SCHEMA_NS}}}logo' +DOAP_IMPLEMENTS = f'.//{{{DOAP_NS}}}implements' +DOAP_SUPPORTED_XEP = f'.//{{{XMPP_NS}}}SupportedXep' +DOAP_XEP_NUMBER = f'.//{{{XMPP_NS}}}xep' +DOAP_XEP_VERSION = f'.//{{{XMPP_NS}}}version' +DOAP_XEP_STATUS = f'.//{{{XMPP_NS}}}status' + +RFC_REGEX = r'rfc\d{1,4}' +XEP_REGEX = r'xep-\d{1,4}' + +XML_DECLARATION = '<?xml version=\"1.0\" encoding=\"UTF-8\"?>' +XMPP_XSL = '<?xml-stylesheet href=\"/doap/xmpp-style.xsl\" type=\"text/xsl\"?>' + +MD_FRONTMATTER = '''--- +title: "%(title)s" +date: %(date)s +layout: packages +aliases: + - "/software/%(type)s/%(name_slug)s" +--- + +{{< package-details name_slug="%(name_slug)s" package_type="%(type)s" >}} +''' + +SOFTWARE_CATEGORIES: list[str] = [ + 'client', + 'component', + 'library', + 'server', + 'tool', +] +PLATFORMS: list[str] = [ + 'Android', + 'iOS', + 'Browser', + 'Windows', + 'macOS', + 'Linux', +] + + +def parse_doap_infos(doap_file: str + ) -> Optional[dict[str, Union[str, list[str], list[dict[str, str]], None]]]: + ''' + Parse DOAP file and return infos + ''' + try: + doap = parse( + DOWNLOAD_PATH / f'doap_files/{doap_file}.doap') + except (FileNotFoundError, ParseError) as err: + print('Error while trying to parse DOAP file:', doap_file, err) + return None + + info: dict[str, Union[str, list[str], list[dict[str, str]], None]] = {} + + info['name'] = None + doap_name = doap.find(DOAP_NAME) + if doap_name is not None: + info['name'] = doap_name.text + + info['homepage'] = None + doap_homepage = doap.find(DOAP_HOMEPAGE) + if doap_homepage is not None: + info['homepage'] = doap_homepage.attrib.get(RDF_RESOURCE) + + info['shortdesc'] = None + doap_shortdesc = doap.find(DOAP_SHORTDESC) + if doap_shortdesc is not None: + info['shortdesc'] = doap_shortdesc.text + + info['platforms'] = [] + for entry in doap.findall(DOAP_OS): + info['platforms'].append(entry.text) + + info['programming_lang'] = [] + for entry in doap.findall(DOAP_PROGRAMMING_LANGUAGE): + info['programming_lang'].append(entry.text) + + info['logo'] = None + doap_logo = doap.find(DOAP_LOGO) + if doap_logo is not None: + info['logo'] = doap_logo.attrib.get(RDF_RESOURCE) + + rfcs: list[str] = [] + xeps: list[dict[str, str]] = [] + for entry in doap.findall(DOAP_IMPLEMENTS): + rfc = entry.attrib.get(RDF_RESOURCE) + if rfc is not None: + match = re.search(RFC_REGEX, rfc) + if match: + rfcs.append(match.group()[3:]) + + supported_xep = entry.find(DOAP_SUPPORTED_XEP) + if supported_xep is not None: + number = supported_xep.find(DOAP_XEP_NUMBER) + if number is not None: + number = number.attrib.get(RDF_RESOURCE) + match = re.search(XEP_REGEX, number or '') + if match: + number = match.group()[4:] + + version = supported_xep.find(DOAP_XEP_VERSION) + if version is not None: + version = version.text + + status = supported_xep.find(DOAP_XEP_STATUS) + if status is not None: + status = status.text + + xeps.append({ + 'ref': supported_xep.find(DOAP_XEP_NUMBER).text, + 'number': number, + 'version': version, + 'status': status, + }) + + info['rfcs'] = rfcs + info['xeps'] = xeps + + return info + + +def check_image_file(file_path: Path, extension: str) -> bool: + ''' + Check if file size is greater than 300 KiB and if so, resize image + Returns success + ''' + if extension == 'svg': + # No need to resize SVG files + return True + + try: + file_size = os.path.getsize(file_path) + except OSError as error: + print('An error occurred while trying to open logo:', error) + return False + + if file_size <= 300000: + # Small enough, no need to resize image + return True + + try: + with Image.open(file_path) as img: + width, height = img.size + new_width = 400 + new_height = int(new_width * height / width) + img = img.resize( + (new_width, new_height), Resampling.LANCZOS) + img.save(file_path) + print(f' Logo at {file_path} ' + f'(file size: {file_size / (1<<10):,.0f} KB) ' + f'too big, had to be resized') + except (ValueError, OSError, UnidentifiedImageError) as error: + print('An error occurred while trying to resize logo:', error) + return False + + return True + + +def process_logo(package_name: str, uri: str) -> Optional[str]: + ''' + Download package logo and return logo URI + ''' + image_url = urlparse(uri) + _, extension = os.path.splitext(image_url.path) + file_name = f'{package_name}{extension}' + success = download_file( + uri, + Path(file_name)) + if not success: + return None + + success = check_image_file( + DOWNLOAD_PATH / file_name, extension[1:].lower()) + if not success: + return None + logo_uri = f'/images/packages/{package_name}{extension}' + shutil.copyfile( + DOWNLOAD_PATH / file_name, + Path(LOGOS_PATH / file_name)) + return logo_uri + + +def prepare_package_data() -> None: + ''' + Download and prepare package data (software.json) for + rendering with Hugo + ''' + for category in SOFTWARE_CATEGORIES: + if category == 'library': + category = 'libraries' + else: + category = f'{category}s' + + shutil.copy(SOFTWARE_PATH / '_index.md', + DOWNLOAD_PATH / 'software_index.md') + shutil.copy(SOFTWARE_PATH / 'software-comparison.md', + DOWNLOAD_PATH / 'software-comparison.md') + initialize_directory(SOFTWARE_PATH) + shutil.copy(DOWNLOAD_PATH / 'software_index.md', + SOFTWARE_PATH / '_index.md') + shutil.copy(DOWNLOAD_PATH / 'software-comparison.md', + SOFTWARE_PATH / 'software-comparison.md') + + with open(DATA_PATH / 'software.json', 'rb') as json_file: + xsf_package_list = json.load(json_file) + + package_infos: dict[str, Any] = {} + + number_of_doap_packages = 0 + + for package in xsf_package_list: + if package['doap'] is None: + print(f'{Fore.YELLOW}DOAP n/a' + f'{Style.RESET_ALL} ', + package['name']) + continue + + # DOAP is available + number_of_doap_packages += 1 + package_name_slug = slugify( + package['name'], + replacements=[['+', 'plus']]) + + doap_url = package['doap'] + if doap_url.startswith('/hosted-doap'): + # DOAP file is hosted at xmpp.org + print(f'{Fore.LIGHTCYAN_EX}DOAP by xmpp.org' + f'{Style.RESET_ALL} ', + package['name']) + shutil.copyfile( + f'{STATIC_PATH}{doap_url}', + Path(f'{DOWNLOAD_PATH}/doap_files/{package_name_slug}.doap')) + else: + print(f'{Fore.LIGHTBLUE_EX}DOAP by vendor' + f'{Style.RESET_ALL} ', + package['name']) + download_file( + package['doap'], + Path(f'doap_files/{package_name_slug}.doap')) + + parsed_package_infos = parse_doap_infos(package_name_slug) + if parsed_package_infos is None: + continue + + logo_uri = None + logo = parsed_package_infos['logo'] + if logo is not None and isinstance(logo, str): + logo_uri = process_logo( + package_name_slug, logo) + + package_infos[package['name']] = { + 'categories': package['categories'], + 'name_slug': package_name_slug, + 'homepage': parsed_package_infos['homepage'], + 'logo': logo_uri, + 'shortdesc': parsed_package_infos['shortdesc'], + 'platforms': parsed_package_infos['platforms'], + 'programming_lang': parsed_package_infos['programming_lang'], + 'rfcs': parsed_package_infos['rfcs'], + 'xeps': parsed_package_infos['xeps'], + } + + for category in package['categories']: + if category == 'library': + category = 'libraries' + else: + category = f'{category}s' + create_package_page(category, package_name_slug, package['name']) + + print(f'Number of packages:\n' + f'total: {len(xsf_package_list)} ' + f'(with DOAP: {number_of_doap_packages}), ' + f'\n{42 * "="}') + with open(DATA_PATH / 'software_list_doap.json', + 'w', + encoding='utf-8') as package_data_file: + json.dump(package_infos, package_data_file, indent=4) + + +def add_doap_data_to_xeplist() -> None: + with open(DATA_PATH / 'software_list_doap.json') as software_list: + software_data = json.load(software_list) + with open(DATA_PATH / 'xeplist.json') as xep_list: + xep_data = json.load(xep_list) + + for xep in xep_data: + xep['implementations'] = [] + for name, package_data in software_data.items(): + if not package_data['xeps']: + continue + for supported_xep in package_data['xeps']: + if xep['number'] is not None and (supported_xep['number'] == f'{xep["number"]:04d}') \ + or supported_xep["ref"] == xep["url"]: + xep['implementations'].append({ + 'package_name': name, + 'package_name_slug': package_data['name_slug'], + 'package_categories': package_data['categories'], + 'implemented_version': supported_xep['version'], + 'implementation_status': supported_xep['status'] + }) + break + + with open(DATA_PATH / 'xeplist.json', + 'w', + encoding='utf-8') as xep_list: + json.dump(xep_data, xep_list, indent=4) + +def create_package_page(package_type: str, name_slug: str, name: str) -> None: + ''' + Create an .md page for package, containing a shortcode + for displaying package details + ''' + today = date.today() + date_formatted = today.strftime('%Y-%m-%d') + with open(SOFTWARE_PATH / f'{name_slug}.md', + 'w', + encoding='utf8') as md_file: + md_file.write( + MD_FRONTMATTER % { + 'title': f'XMPP {package_type.capitalize()}: {name}', + 'date': date_formatted, + 'type': package_type, + 'name_slug': name_slug, + } + ) + + +def prepare_doap_files() -> None: + ''' + Copy DOAP files to /static/doap/ and replace the + xml-stylesheet with our stylesheet (or add it, if there is none) + ''' + for entry in os.scandir(DOWNLOAD_PATH / 'doap_files'): + shutil.copy(DOWNLOAD_PATH / 'doap_files' / entry.name, + STATIC_DOAP_PATH / entry.name) + + for entry in os.scandir(STATIC_PATH / 'hosted-doap'): + shutil.copy(STATIC_PATH / 'hosted-doap' / entry.name, + STATIC_DOAP_PATH / entry.name) + + xml_declaration_pattern = r'<\?xml version.+?\?>' + stylesheet_pattern = r'<\?xml-stylesheet.+?\?>' + for entry in os.scandir(STATIC_DOAP_PATH): + if not entry.name.endswith('.doap'): + continue + + with open(STATIC_DOAP_PATH / entry.name, + 'r+', + encoding='utf-8') as doap_file: + content = doap_file.read() + + result = re.sub( + stylesheet_pattern, + XMPP_XSL, + content, + 0, + re.MULTILINE) + if result != content: + # Replaced custom stylesheet with our stylesheet + doap_file.truncate(0) + doap_file.seek(0) + doap_file.write(result) + continue + + # No custom stylesheet found + result = re.sub( + xml_declaration_pattern, + f'{XML_DECLARATION}\n{XMPP_XSL}', + content, + 0, + re.MULTILINE) + if result != content: + # Added our stylesheet + doap_file.truncate(0) + doap_file.seek(0) + doap_file.write(result) + else: + print('WARNING: Could not alter XML header of', entry.name) + # Remove content entirely, since we can't + # control what would be rendered + doap_file.truncate(0) + + +if __name__ == '__main__': + initialize_directory(DOWNLOAD_PATH) + initialize_directory(LOGOS_PATH) + Path(DOWNLOAD_PATH / 'doap_files').mkdir(parents=True) + + prepare_package_data() + add_doap_data_to_xeplist() + + initialize_directory(STATIC_DOAP_PATH) + prepare_doap_files()