Data data data

Mon, 13 Mar 2023 16:39:07 +0000

author
Matthew Wild <mwild1@gmail.com>
date
Mon, 13 Mar 2023 16:39:07 +0000
changeset 0
8e1675826e46
child 1
75449093fdb6

Data data data

Dockerfile file | annotate | diff | comparison | revisions
metadata.yml file | annotate | diff | comparison | revisions
normalize_json.lua file | annotate | diff | comparison | revisions
prepare_software_list.py file | annotate | diff | comparison | revisions
prepare_xep_list.py file | annotate | diff | comparison | revisions
run.sh file | annotate | diff | comparison | revisions
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Dockerfile	Mon Mar 13 16:39:07 2023 +0000
@@ -0,0 +1,28 @@
+FROM debian:bullseye-slim
+
+# Update system
+RUN DEBIAN_FRONTEND=noninteractive \
+    apt-get update && apt-get dist-upgrade -y \
+      && apt-get install -y --no-install-recommends \
+      lua5.2 lua-cjson lua-expat python3 python3-pip git \
+      && apt-get autoremove -y && apt-get clean
+
+RUN git clone https://github.com/xsf/xmpp.org /opt/xmpp.org
+
+WORKDIR /opt/xmpp.org
+
+RUN python3 -m pip install -r tools/requirements.txt \
+ && python3 -m pip install datasette sqlite-utils
+
+COPY normalize_json.lua .
+COPY metadata.yml .
+COPY prepare_software_list.py tools/prepare_software_list.py
+COPY prepare_xep_list.py tools/prepare_xep_list.py
+
+ADD https://raw.githubusercontent.com/xsf/xmpp.org/master/data/software.json data/software.json
+
+ADD run.sh /usr/local/bin/run.sh
+
+ENTRYPOINT ["/bin/bash"]
+CMD ["-c", "/usr/local/bin/run.sh"]
+EXPOSE 8001
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metadata.yml	Mon Mar 13 16:39:07 2023 +0000
@@ -0,0 +1,118 @@
+---
+
+title: "XMPP ecosystem data"
+description: "Explore the ecosystem of XMPP extensions and software"
+source: "XMPP Standards Foundation"
+source_url: "https://xmpp.org/"
+
+databases:
+  xmpp:
+    tables:
+      xeps:
+        label_column: title
+      software:
+        label_column: name
+      compliance_levels:
+        columns:
+          xep_id: XEP
+      implementations:
+        columns:
+          xep_id: XEP
+          xep_version: XEP version
+          software_id: Software
+    queries:
+      outdated_implementations:
+        title: "Implementations: needing updates"
+        description: "Find implementations not on the latest version of a XEP"
+        sql: |-
+          select
+            software.name,
+            xeps.number as xep_num,
+            xeps.title as xep_name,
+            xep_version as implemented_xep_version,
+            xeps.version as latest_xep_version
+          from
+            implementations
+            join xeps on implementations.xep_id == xeps.id
+            join software on implementations.software_id == software.id
+          where
+            xep_version != xeps.version
+      unusual_compliance:
+        title: "XEPs: Unusual compliance requirements"
+        description: "Find XEPs in undesired states that are required by current compliance suites"
+        sql: |-
+          select
+            xeps.number,
+            xeps.title,
+            xeps.status,
+            compliance_levels.category,
+            compliance_levels.level
+          from
+            xeps
+            join compliance_levels on compliance_levels.xep_id = xeps.id
+          where
+            compliance_levels.level not null
+            and xeps.status not in ('Active','Stable','Final');
+      deferred:
+        title: "XEPs: Deferred with implementations"
+        description: "Show deferred XEPs with implementation counts"
+        sql: |-
+          select
+            xeps.number,
+            xeps.title,
+            xeps.last_updated,
+            count(implementations.xep_id) as implementations
+          from
+            xeps
+            join implementations on implementations.xep_id = xeps.id
+          where
+            status = "Deferred"
+          group by
+            xeps.id
+          order by
+            implementations desc, last_updated desc;
+      expiring:
+        title: "XEPs: Expiring soon"
+        description: "Show XEPs soon to become deferred"
+        sql: |-
+          select
+            xeps.number,
+            xeps.title,
+            xeps.last_updated,
+            count(implementations.xep_id) as implementations
+          from
+            xeps
+            join implementations on implementations.xep_id = xeps.id
+          where
+            status = "Experimental"
+          group by
+            xeps.id
+          order by
+            last_updated asc;
+      last_call:
+        title: "XEPs: Proposed XEPs under Last Call"
+        description: "List open Last Calls"
+        sql: |-
+          select
+            xeps.number,
+            xeps.title,
+            xeps.last_updated
+          from
+            xeps
+          where
+            status = "Proposed"
+          order by
+            last_updated desc;
+      inbox:
+        title: "XEPs: Inbox"
+        description: "All submissions not accepted and assigned a XEP number"
+        sql: |-
+          select
+            xeps.title,
+            xeps.last_updated
+          from
+            xeps
+          where
+            xeps.number is null
+          order by
+            last_updated desc;
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/normalize_json.lua	Mon Mar 13 16:39:07 2023 +0000
@@ -0,0 +1,129 @@
+local json = require "cjson";
+
+local data_base_path = "data/";
+
+local function read_json_file(fn)
+	local data = assert(io.open(data_base_path .. fn)):read("*a");
+	return assert(json.decode(data));
+end
+
+local software_list = read_json_file("software_list_doap.json");
+local software_table = {};
+local known_software = {};
+
+local software_categories_table = {};
+local software_platforms_table = {};
+
+local software_counter = -1;
+for software_name, entry in pairs(software_list) do
+	software_counter = software_counter + 1;
+	known_software[entry.name_slug] = software_counter;
+	local new_entry = {
+		id = software_counter;
+		short_name = entry.name_slug;
+		name = software_name;
+	};
+	table.insert(software_table, new_entry);
+	for _, platform in ipairs(entry.platforms) do
+		table.insert(software_platforms_table, {
+			software_id = software_counter;
+			platform = platform;
+		});
+	end
+	for _, category in ipairs(entry.categories) do
+		table.insert(software_categories_table, {
+			software_id = software_counter;
+			category = category;
+		});
+	end
+end
+
+
+local xep_list = read_json_file("xeplist.json");
+local xep_table, xeps_by_url = {}, {};
+local implementations_table = {
+	--[[{
+		xep = <XEP id>
+		software = <software id>
+		version =
+		status =
+	}]]
+};
+
+table.sort(xep_list, function (a, b)
+	if a.number ~= json.null and b.number ~= json.null then
+		return a.number < b.number;
+	end
+	return a.last_updated < b.last_updated;
+end);
+
+local xep_counter = -1;
+for _, entry in ipairs(xep_list) do
+	xep_counter = xep_counter + 1;
+	if entry.status == "Proto" then
+		entry.status = "ProtoXEP";
+	end
+	local xep = {
+		id = xep_counter;
+		number = entry.number;
+		title = entry.title;
+		status = entry.status;
+		last_updated = entry.last_updated;
+		type = entry.type;
+		version = entry.version;
+		approver = entry.approver;
+		accepted = entry.accepted;
+		short_name = entry.short_name;
+		url = entry.url;
+	};
+
+	if entry.url ~= json.null then
+		xeps_by_url[entry.url] = xep_counter;
+	end
+
+	for _, implementation in ipairs(entry.implementations) do
+		local software_id = known_software[implementation.package_name_slug];
+		if software_id then
+			table.insert(implementations_table, {
+				software_id = software_id;
+				xep_id = xep_counter;
+				xep_version = implementation.implemented_version;
+				status = implementation.status;
+			});
+		end
+	end
+	table.insert(xep_table, xep);
+end
+
+local function write_json(fn, data)
+	local f = assert(io.open(fn, "w+"));
+	f:write(json.encode(data));
+	f:close();
+end
+
+local compliance_table = {};
+
+for category, level_data in pairs(read_json_file("compliance_suite.json")) do
+	for level, software_data in pairs(level_data) do
+		for software_type, spec_list in pairs(software_data) do
+			for _, spec_url in ipairs(spec_list) do
+				local xep_id = xeps_by_url[spec_url];
+				if xep_id then
+					table.insert(compliance_table, {
+						xep_id = xep_id;
+						category = category;
+						level = level;
+						software_type = software_type;
+					});
+				end
+			end
+		end
+	end
+end
+
+write_json("db_xeps.json", xep_table);
+write_json("db_software.json", software_table);
+write_json("db_software_platforms.json", software_platforms_table);
+write_json("db_software_categories.json", software_categories_table);
+write_json("db_implementations.json", implementations_table);
+write_json("db_compliance.json", compliance_table);
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prepare_software_list.py	Mon Mar 13 16:39:07 2023 +0000
@@ -0,0 +1,438 @@
+'''
+Download / prepare / process XMPP DOAP files for the software list
+Requires: Pillow, python-slugify
+'''
+from typing import Any
+from typing import Optional
+from typing import Union
+
+import json
+import os
+import re
+import shutil
+from datetime import date
+from pathlib import Path
+from urllib.parse import urlparse
+
+from colorama import Fore
+from colorama import Style
+from defusedxml.ElementTree import parse
+from defusedxml.ElementTree import ParseError
+from PIL import Image
+from PIL import UnidentifiedImageError
+from PIL.Image import Resampling
+from slugify import slugify
+
+from util import download_file
+from util import initialize_directory
+
+SOFTWARE_PATH = Path('content/software')
+DATA_PATH = Path('data')
+DOWNLOAD_PATH = Path('downloads')
+STATIC_PATH = Path('static')
+STATIC_DOAP_PATH = STATIC_PATH / 'doap'
+LOGOS_PATH = STATIC_PATH / 'images' / 'packages'
+
+DOAP_NS = 'http://usefulinc.com/ns/doap#'
+XMPP_NS = 'https://linkmauve.fr/ns/xmpp-doap#'
+SCHEMA_NS = 'https://schema.org/'
+RDF_RESOURCE = '{http://www.w3.org/1999/02/22-rdf-syntax-ns#}resource'
+DOAP_NAME = f'.//{{{DOAP_NS}}}name'
+DOAP_SHORTDESC = f'.//{{{DOAP_NS}}}shortdesc'
+DOAP_HOMEPAGE = f'.//{{{DOAP_NS}}}homepage'
+DOAP_OS = f'.//{{{DOAP_NS}}}os'
+DOAP_PROGRAMMING_LANGUAGE = f'.//{{{DOAP_NS}}}programming-language'
+DOAP_LOGO = f'.//{{{SCHEMA_NS}}}logo'
+DOAP_IMPLEMENTS = f'.//{{{DOAP_NS}}}implements'
+DOAP_SUPPORTED_XEP = f'.//{{{XMPP_NS}}}SupportedXep'
+DOAP_XEP_NUMBER = f'.//{{{XMPP_NS}}}xep'
+DOAP_XEP_VERSION = f'.//{{{XMPP_NS}}}version'
+DOAP_XEP_STATUS = f'.//{{{XMPP_NS}}}status'
+
+RFC_REGEX = r'rfc\d{1,4}'
+XEP_REGEX = r'xep-\d{1,4}'
+
+XML_DECLARATION = '<?xml version=\"1.0\" encoding=\"UTF-8\"?>'
+XMPP_XSL = '<?xml-stylesheet href=\"/doap/xmpp-style.xsl\" type=\"text/xsl\"?>'
+
+MD_FRONTMATTER = '''---
+title: "%(title)s"
+date: %(date)s
+layout: packages
+aliases:
+    - "/software/%(type)s/%(name_slug)s"
+---
+
+{{< package-details name_slug="%(name_slug)s" package_type="%(type)s" >}}
+'''
+
+SOFTWARE_CATEGORIES: list[str] = [
+    'client',
+    'component',
+    'library',
+    'server',
+    'tool',
+]
+PLATFORMS: list[str] = [
+    'Android',
+    'iOS',
+    'Browser',
+    'Windows',
+    'macOS',
+    'Linux',
+]
+
+
+def parse_doap_infos(doap_file: str
+                     ) -> Optional[dict[str, Union[str, list[str], list[dict[str, str]], None]]]:
+    '''
+    Parse DOAP file and return infos
+    '''
+    try:
+        doap = parse(
+            DOWNLOAD_PATH / f'doap_files/{doap_file}.doap')
+    except (FileNotFoundError, ParseError) as err:
+        print('Error while trying to parse DOAP file:', doap_file, err)
+        return None
+
+    info: dict[str, Union[str, list[str], list[dict[str, str]], None]] = {}
+
+    info['name'] = None
+    doap_name = doap.find(DOAP_NAME)
+    if doap_name is not None:
+        info['name'] = doap_name.text
+
+    info['homepage'] = None
+    doap_homepage = doap.find(DOAP_HOMEPAGE)
+    if doap_homepage is not None:
+        info['homepage'] = doap_homepage.attrib.get(RDF_RESOURCE)
+
+    info['shortdesc'] = None
+    doap_shortdesc = doap.find(DOAP_SHORTDESC)
+    if doap_shortdesc is not None:
+        info['shortdesc'] = doap_shortdesc.text
+
+    info['platforms'] = []
+    for entry in doap.findall(DOAP_OS):
+        info['platforms'].append(entry.text)
+
+    info['programming_lang'] = []
+    for entry in doap.findall(DOAP_PROGRAMMING_LANGUAGE):
+        info['programming_lang'].append(entry.text)
+
+    info['logo'] = None
+    doap_logo = doap.find(DOAP_LOGO)
+    if doap_logo is not None:
+        info['logo'] = doap_logo.attrib.get(RDF_RESOURCE)
+
+    rfcs: list[str] = []
+    xeps: list[dict[str, str]] = []
+    for entry in doap.findall(DOAP_IMPLEMENTS):
+        rfc = entry.attrib.get(RDF_RESOURCE)
+        if rfc is not None:
+            match = re.search(RFC_REGEX, rfc)
+            if match:
+                rfcs.append(match.group()[3:])
+
+        supported_xep = entry.find(DOAP_SUPPORTED_XEP)
+        if supported_xep is not None:
+            number = supported_xep.find(DOAP_XEP_NUMBER)
+            if number is not None:
+                number = number.attrib.get(RDF_RESOURCE)
+                match = re.search(XEP_REGEX, number or '')
+                if match:
+                    number = match.group()[4:]
+
+            version = supported_xep.find(DOAP_XEP_VERSION)
+            if version is not None:
+                version = version.text
+
+            status = supported_xep.find(DOAP_XEP_STATUS)
+            if status is not None:
+                status = status.text
+
+            xeps.append({
+                'ref': supported_xep.find(DOAP_XEP_NUMBER).text,
+                'number': number,
+                'version': version,
+                'status': status,
+            })
+
+    info['rfcs'] = rfcs
+    info['xeps'] = xeps
+
+    return info
+
+
+def check_image_file(file_path: Path, extension: str) -> bool:
+    '''
+    Check if file size is greater than 300 KiB and if so, resize image
+    Returns success
+    '''
+    if extension == 'svg':
+        # No need to resize SVG files
+        return True
+
+    try:
+        file_size = os.path.getsize(file_path)
+    except OSError as error:
+        print('An error occurred while trying to open logo:', error)
+        return False
+
+    if file_size <= 300000:
+        # Small enough, no need to resize image
+        return True
+
+    try:
+        with Image.open(file_path) as img:
+            width, height = img.size
+            new_width = 400
+            new_height = int(new_width * height / width)
+            img = img.resize(
+                (new_width, new_height), Resampling.LANCZOS)
+            img.save(file_path)
+            print(f'                  Logo at {file_path} '
+                  f'(file size: {file_size / (1<<10):,.0f} KB) '
+                  f'too big, had to be resized')
+    except (ValueError, OSError, UnidentifiedImageError) as error:
+        print('An error occurred while trying to resize logo:', error)
+        return False
+
+    return True
+
+
+def process_logo(package_name: str, uri: str) -> Optional[str]:
+    '''
+    Download package logo and return logo URI
+    '''
+    image_url = urlparse(uri)
+    _, extension = os.path.splitext(image_url.path)
+    file_name = f'{package_name}{extension}'
+    success = download_file(
+        uri,
+        Path(file_name))
+    if not success:
+        return None
+
+    success = check_image_file(
+        DOWNLOAD_PATH / file_name, extension[1:].lower())
+    if not success:
+        return None
+    logo_uri = f'/images/packages/{package_name}{extension}'
+    shutil.copyfile(
+        DOWNLOAD_PATH / file_name,
+        Path(LOGOS_PATH / file_name))
+    return logo_uri
+
+
+def prepare_package_data() -> None:
+    '''
+    Download and prepare package data (software.json) for
+    rendering with Hugo
+    '''
+    for category in SOFTWARE_CATEGORIES:
+        if category == 'library':
+            category = 'libraries'
+        else:
+            category = f'{category}s'
+
+    shutil.copy(SOFTWARE_PATH / '_index.md',
+                DOWNLOAD_PATH / 'software_index.md')
+    shutil.copy(SOFTWARE_PATH / 'software-comparison.md',
+                DOWNLOAD_PATH / 'software-comparison.md')
+    initialize_directory(SOFTWARE_PATH)
+    shutil.copy(DOWNLOAD_PATH / 'software_index.md',
+                SOFTWARE_PATH / '_index.md')
+    shutil.copy(DOWNLOAD_PATH / 'software-comparison.md',
+                SOFTWARE_PATH / 'software-comparison.md')
+
+    with open(DATA_PATH / 'software.json', 'rb') as json_file:
+        xsf_package_list = json.load(json_file)
+
+    package_infos: dict[str, Any] = {}
+
+    number_of_doap_packages = 0
+
+    for package in xsf_package_list:
+        if package['doap'] is None:
+            print(f'{Fore.YELLOW}DOAP n/a'
+                  f'{Style.RESET_ALL}         ',
+                  package['name'])
+            continue
+
+        # DOAP is available
+        number_of_doap_packages += 1
+        package_name_slug = slugify(
+            package['name'],
+            replacements=[['+', 'plus']])
+
+        doap_url = package['doap']
+        if doap_url.startswith('/hosted-doap'):
+            # DOAP file is hosted at xmpp.org
+            print(f'{Fore.LIGHTCYAN_EX}DOAP by xmpp.org'
+                  f'{Style.RESET_ALL} ',
+                  package['name'])
+            shutil.copyfile(
+                f'{STATIC_PATH}{doap_url}',
+                Path(f'{DOWNLOAD_PATH}/doap_files/{package_name_slug}.doap'))
+        else:
+            print(f'{Fore.LIGHTBLUE_EX}DOAP by vendor'
+                  f'{Style.RESET_ALL}   ',
+                  package['name'])
+            download_file(
+                package['doap'],
+                Path(f'doap_files/{package_name_slug}.doap'))
+
+        parsed_package_infos = parse_doap_infos(package_name_slug)
+        if parsed_package_infos is None:
+            continue
+
+        logo_uri = None
+        logo = parsed_package_infos['logo']
+        if logo is not None and isinstance(logo, str):
+            logo_uri = process_logo(
+                package_name_slug, logo)
+
+        package_infos[package['name']] = {
+            'categories': package['categories'],
+            'name_slug': package_name_slug,
+            'homepage': parsed_package_infos['homepage'],
+            'logo': logo_uri,
+            'shortdesc': parsed_package_infos['shortdesc'],
+            'platforms': parsed_package_infos['platforms'],
+            'programming_lang': parsed_package_infos['programming_lang'],
+            'rfcs': parsed_package_infos['rfcs'],
+            'xeps': parsed_package_infos['xeps'],
+        }
+
+        for category in package['categories']:
+            if category == 'library':
+                category = 'libraries'
+            else:
+                category = f'{category}s'
+            create_package_page(category, package_name_slug, package['name'])
+
+    print(f'Number of packages:\n'
+          f'total: {len(xsf_package_list)} '
+          f'(with DOAP: {number_of_doap_packages}), '
+          f'\n{42 * "="}')
+    with open(DATA_PATH / 'software_list_doap.json',
+              'w',
+              encoding='utf-8') as package_data_file:
+        json.dump(package_infos, package_data_file, indent=4)
+
+
+def add_doap_data_to_xeplist() -> None:
+    with open(DATA_PATH / 'software_list_doap.json') as software_list:
+        software_data = json.load(software_list)
+    with open(DATA_PATH / 'xeplist.json') as xep_list:
+        xep_data = json.load(xep_list)
+
+    for xep in xep_data:
+        xep['implementations'] = []
+        for name, package_data in software_data.items():
+            if not package_data['xeps']:
+                continue
+            for supported_xep in package_data['xeps']:
+                if xep['number'] is not None and (supported_xep['number'] == f'{xep["number"]:04d}') \
+                   or supported_xep["ref"] == xep["url"]:
+                    xep['implementations'].append({
+                        'package_name': name,
+                        'package_name_slug': package_data['name_slug'],
+                        'package_categories': package_data['categories'],
+                        'implemented_version': supported_xep['version'],
+                        'implementation_status': supported_xep['status']
+                    })
+                    break
+
+    with open(DATA_PATH / 'xeplist.json',
+              'w',
+              encoding='utf-8') as xep_list:
+        json.dump(xep_data, xep_list, indent=4)
+
+def create_package_page(package_type: str, name_slug: str, name: str) -> None:
+    '''
+    Create an .md page for package, containing a shortcode
+    for displaying package details
+    '''
+    today = date.today()
+    date_formatted = today.strftime('%Y-%m-%d')
+    with open(SOFTWARE_PATH / f'{name_slug}.md',
+              'w',
+              encoding='utf8') as md_file:
+        md_file.write(
+            MD_FRONTMATTER % {
+                'title': f'XMPP {package_type.capitalize()}: {name}',
+                'date': date_formatted,
+                'type': package_type,
+                'name_slug': name_slug,
+            }
+        )
+
+
+def prepare_doap_files() -> None:
+    '''
+    Copy DOAP files to /static/doap/ and replace the
+    xml-stylesheet with our stylesheet (or add it, if there is none)
+    '''
+    for entry in os.scandir(DOWNLOAD_PATH / 'doap_files'):
+        shutil.copy(DOWNLOAD_PATH / 'doap_files' / entry.name,
+                    STATIC_DOAP_PATH / entry.name)
+
+    for entry in os.scandir(STATIC_PATH / 'hosted-doap'):
+        shutil.copy(STATIC_PATH / 'hosted-doap' / entry.name,
+                    STATIC_DOAP_PATH / entry.name)
+
+    xml_declaration_pattern = r'<\?xml version.+?\?>'
+    stylesheet_pattern = r'<\?xml-stylesheet.+?\?>'
+    for entry in os.scandir(STATIC_DOAP_PATH):
+        if not entry.name.endswith('.doap'):
+            continue
+
+        with open(STATIC_DOAP_PATH / entry.name,
+                  'r+',
+                  encoding='utf-8') as doap_file:
+            content = doap_file.read()
+
+            result = re.sub(
+                stylesheet_pattern,
+                XMPP_XSL,
+                content,
+                0,
+                re.MULTILINE)
+            if result != content:
+                # Replaced custom stylesheet with our stylesheet
+                doap_file.truncate(0)
+                doap_file.seek(0)
+                doap_file.write(result)
+                continue
+
+            # No custom stylesheet found
+            result = re.sub(
+                xml_declaration_pattern,
+                f'{XML_DECLARATION}\n{XMPP_XSL}',
+                content,
+                0,
+                re.MULTILINE)
+            if result != content:
+                # Added our stylesheet
+                doap_file.truncate(0)
+                doap_file.seek(0)
+                doap_file.write(result)
+            else:
+                print('WARNING: Could not alter XML header of', entry.name)
+                # Remove content entirely, since we can't
+                # control what would be rendered
+                doap_file.truncate(0)
+
+
+if __name__ == '__main__':
+    initialize_directory(DOWNLOAD_PATH)
+    initialize_directory(LOGOS_PATH)
+    Path(DOWNLOAD_PATH / 'doap_files').mkdir(parents=True)
+
+    prepare_package_data()
+    add_doap_data_to_xeplist()
+
+    initialize_directory(STATIC_DOAP_PATH)
+    prepare_doap_files()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/prepare_xep_list.py	Mon Mar 13 16:39:07 2023 +0000
@@ -0,0 +1,75 @@
+'''
+This file is used to download the XEP list and convert it to JSON
+'''
+from typing import Any
+
+import json
+import os
+import sys
+
+import requests
+from defusedxml.ElementTree import fromstring
+from defusedxml.ElementTree import ParseError
+
+XEP_LIST_URL = 'https://xmpp.org/extensions/xeplist.xml'
+
+
+def build_xep_list() -> None:
+    '''
+    Download and parse xeplist.xml and build xeplist.json
+    '''
+    try:
+        xeplist_request = requests.get(XEP_LIST_URL)
+    except requests.exceptions.RequestException as err:
+        sys.exit(f'Error while requesting xeplist.xml ({err}')
+
+    if not 200 >= xeplist_request.status_code < 400:
+        sys.exit(f'Error while downloading xeplist.xml '
+                 f'({xeplist_request.status_code}')
+
+    try:
+        root = fromstring(xeplist_request.content)
+    except ParseError:
+        sys.exit('Error while parsing xeplist.xml')
+
+    def fix_status(status: str) -> str:
+        if status == 'Draft':
+            return 'Stable'
+        return status
+
+    xeps: list[dict[str, Any]] = []
+    for xep in root.findall("xep"):
+        xep_data = {
+            'title': xep.find('title').text,
+            'status': fix_status(xep.find('status').text),
+            'number': int(xep.find('number').text) if xep.find('number').text != 'xxxx' else None,
+            'last_updated': xep.find('last-revision').find('date').text,
+            'version': xep.find('last-revision').find('version').text,
+            'type': xep.find('type').text,
+            'approver': xep.find('approver').text,
+        }
+        if xep.get("accepted") == "true":
+            xep_data.update({
+                'accepted': True,
+                'short_name': xep.find('shortname').text if xep.find('shortname') else None,
+                'url': f'https://xmpp.org/extensions/xep-{xep_data["number"]:04d}.html',
+            })
+        else:
+            xep_data.update({
+                'accepted': False,
+                'short_name': xep.find("proto-name").text,
+                'url': f'https://xmpp.org/extensions/inbox/{xep.find("proto-name").text}.html',
+            })
+        xeps.append(xep_data)
+
+    base_path = os.path.dirname(os.path.abspath(sys.argv[0]))
+
+    with open(f'{base_path}/../data/xeplist.json',
+              'w',
+              encoding='utf-8') as json_file:
+        json.dump(xeps, json_file, indent=4)
+    print('XEP List prepared successfully')
+
+
+if __name__ == '__main__':
+    build_xep_list()
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/run.sh	Mon Mar 13 16:39:07 2023 +0000
@@ -0,0 +1,26 @@
+#!/bin/bash
+
+set -xeo pipefail
+
+python3 tools/prepare_xep_list.py
+python3 tools/prepare_software_list.py
+python3 tools/prepare_compliance.py
+
+lua normalize_json.lua
+
+sqlite-utils insert xmpp.db xeps db_xeps.json --pk=id
+sqlite-utils insert xmpp.db software db_software.json --pk=id
+sqlite-utils insert xmpp.db software_platforms db_software_platforms.json --pk=software_id --pk=platform
+sqlite-utils insert xmpp.db software_categories db_software_categories.json --pk=software_id --pk=category
+sqlite-utils insert xmpp.db implementations db_implementations.json --pk=xep_id --pk=software_id
+sqlite-utils insert xmpp.db compliance_levels db_compliance.json
+sqlite-utils add-foreign-key xmpp.db implementations xep_id xeps id
+sqlite-utils add-foreign-key xmpp.db implementations software_id software id
+sqlite-utils add-foreign-key xmpp.db compliance_levels xep_id xeps id
+sqlite-utils add-foreign-key xmpp.db software_platforms software_id software id
+sqlite-utils add-foreign-key xmpp.db software_categories software_id software id
+
+# Precompute row counts for performance
+datasette inspect xmpp.db --inspect-file=counts.json
+
+exec datasette serve -h 0.0.0.0 --metadata metadata.yml -i xmpp.db --inspect-file=counts.json

mercurial