test: add entry validation and broken-link detection tests

Add three tests against the real README: verify all entries have non-empty names, valid http(s) URLs, and no broken markdown link syntax (e.g. '[name(url)' missing the closing ']('). Co-Authored-By: Claude <noreply@anthropic.com>
2026-04-11 02:11:42 +08:00 · 2026-04-03 15:55:53 +08:00 · 2026-04-03 15:55:53 +08:00 · 520e285e8e
commit 520e285e8e
parent 54864ab37a
1 changed files with 70 additions and 0 deletions
--- a/website/tests/test_readme_parser.py
+++ b/website/tests/test_readme_parser.py
@ -6,6 +6,7 @@ import textwrap
 import pytest
 from readme_parser import (
    _find_inline,
    _parse_section_entries,
    parse_readme,
    render_inline_html,
@ -486,3 +487,72 @@ class TestParseRealReadme:
        misc_group = next((g for g in self.groups if g["name"] == "Miscellaneous"), None)
        assert misc_group is not None
        assert any(c["name"] == "Miscellaneous" for c in misc_group["categories"])
    def test_all_entries_have_nonempty_names(self):
        bad = []
        for cat in self.cats:
            for entry in cat["entries"]:
                if not entry["name"].strip():
                    bad.append(f"{cat['name']}: empty entry name (url={entry['url']})")
        assert bad == [], "Entries with empty names:\n" + "\n".join(bad)
    def test_all_entries_have_valid_urls(self):
        bad = []
        for cat in self.cats:
            for entry in cat["entries"]:
                if not entry["url"].startswith(("https://", "http://")):
                    bad.append(f"{cat['name']}: [{entry['name']}] has invalid url: {entry['url']!r}")
                for see in entry["also_see"]:
                    if not see["url"].startswith(("https://", "http://")):
                        bad.append(f"{cat['name']}: [{see['name']}] (also_see) has invalid url: {see['url']!r}")
        assert bad == [], "Entries with invalid URLs:\n" + "\n".join(bad)
    def test_no_malformed_entry_lines(self):
        """Detect list items that look like entries but have broken link syntax.
        Walks the markdown-it AST for list items whose inline text starts
        with '[' but contain no link node. This catches broken markdown
        like '- [name(url)' where the closing '](' is missing.
        """
        md = MarkdownIt("commonmark")
        root = SyntaxTreeNode(md.parse(self.readme_text))
        # Find category section boundaries (between --- and # Resources/Contributing)
        hr_idx = None
        end_idx = None
        for i, node in enumerate(root.children):
            if hr_idx is None and node.type == "hr":
                hr_idx = i
            elif node.type == "heading" and node.tag == "h1":
                text = render_inline_text(node.children[0].children) if node.children else ""
                if end_idx is None and text in ("Resources", "Contributing"):
                    end_idx = i
        if hr_idx is None:
            return
        bad = []
        cat_nodes = root.children[hr_idx + 1 : end_idx or len(root.children)]
        for node in cat_nodes:
            if node.type != "bullet_list":
                continue
            self._check_list_for_broken_links(node, bad)
        assert bad == [], "List items with broken link syntax:\n" + "\n".join(bad)
    def _check_list_for_broken_links(self, bullet_list, bad):
        for list_item in bullet_list.children:
            if list_item.type != "list_item":
                continue
            inline = _find_inline(list_item)
            if inline is None:
                continue
            # Check if inline text starts with '[' but has no link node
            has_link = any(c.type == "link" for c in inline.children)
            text = render_inline_text(inline.children)
            if not has_link and text.startswith("["):
                line = list_item.map[0] + 1 if list_item.map else "?"
                bad.append(f"  line {line}: {text}")
            # Recurse into nested lists
            for child in list_item.children:
                if child.type == "bullet_list":
                    self._check_list_for_broken_links(child, bad)