diff --git a/.gitignore b/.gitignore
index dd9781c..ca26a6e 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
# python
.venv/
+__pycache__/
*.py[co]
# website
@@ -11,6 +12,10 @@ website/data/
# claude code
.claude/skills/
-.superpowers/
.gstack/
+.playwright-cli/
+.superpowers/
skills-lock.json
+
+# codex
+.agents/
diff --git a/CLAUDE.md b/CLAUDE.md
index 81d1341..7210cc4 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -2,31 +2,36 @@
## Repository Overview
-This is the awesome-python repository - a curated list of Python frameworks, libraries, software and resources. The repository serves as a comprehensive directory about Python ecosystem.
+An opinionated list of Python frameworks, libraries, tools, and resources. Published at [awesome-python.com](https://awesome-python.com/).
## PR Review Guidelines
-**For all PR review tasks, refer to [CONTRIBUTING.md](CONTRIBUTING.md)** which contains:
+**Refer to [CONTRIBUTING.md](CONTRIBUTING.md)** for acceptance criteria, quality requirements, rejection rules, and entry format.
-- Acceptance criteria (Industry Standard, Rising Star, Hidden Gem)
-- Quality requirements
-- Automatic rejection criteria
-- Entry format reference
-- PR description template
+## Structure
-## Architecture & Structure
+- **README.md**: Source of truth. Hierarchical categories with alphabetically ordered entries.
+- **CONTRIBUTING.md**: Submission guidelines and review criteria.
+- **website/**: Static site generator that builds awesome-python.com from README.md.
+ - `build.py`: Parses README.md and renders HTML via Jinja2 templates.
+ - `fetch_github_stars.py`: Fetches star counts into `website/data/`.
+ - `readme_parser.py`: Markdown-to-structured-data parser.
+ - `templates/`, `static/`: Jinja2 templates and CSS/JS assets.
+ - `tests/`: Pytest tests for the build pipeline.
+- **Makefile**: `make install`, `make build`, `make preview`, `make test`, `make fetch_github_stars`.
+- **pyproject.toml**: Uses `uv` for dependency management. Python >=3.13.
-The repository follows a single-file architecture:
+## Entry Format
-- **README.md**: All content in hierarchical structure (categories, subcategories, entries)
-- **CONTRIBUTING.md**: Submission guidelines and review criteria
-- **sort.py**: Script to enforce alphabetical ordering
+```markdown
+- [project-name](https://github.com/owner/repo) - Description ending with period.
+```
-Entry format: `* [project-name](url) - Concise description ending with period.`
+Use PyPI package name as display name. If not on PyPI, use the GitHub repo name. Use GitHub URLs when available.
-## Key Considerations
+## Key Rules
-- This is a curated list, not a code project
-- Quality over quantity - only "awesome" projects
-- Alphabetical ordering within categories is mandatory
-- README.md is the source of truth for all content
+- Alphabetical ordering within categories is mandatory.
+- Quality over quantity. Only "awesome" projects.
+- One project per PR.
+- README.md is the single source of content truth.
diff --git a/Makefile b/Makefile
index 21a4c5d..8a0905f 100644
--- a/Makefile
+++ b/Makefile
@@ -14,7 +14,6 @@ build:
uv run python website/build.py
preview: build
- @echo "Check the website on http://localhost:8000"
uv run watchmedo shell-command \
--patterns='*.md;*.html;*.css;*.js;*.py' \
--recursive \
diff --git a/README.md b/README.md
index a782517..276f9f8 100644
--- a/README.md
+++ b/README.md
@@ -1,8 +1,10 @@
# Awesome Python
-An opinionated list of awesome Python frameworks, libraries, tools, software and resources.
+An opinionated list of Python frameworks, libraries, tools, and resources.
-> The **#10 most-starred repo on GitHub**. Put your product where Python developers discover tools. [Become a sponsor](SPONSORSHIP.md).
+# **Sponsors**
+
+> The **#10 most-starred repo on GitHub**. Put your product in front of Python developers. [Become a sponsor](SPONSORSHIP.md).
# Categories
@@ -15,7 +17,7 @@ An opinionated list of awesome Python frameworks, libraries, tools, software and
- [Computer Vision](#computer-vision)
- [Recommender Systems](#recommender-systems)
-**Web**
+**Web Development**
- [Web Frameworks](#web-frameworks)
- [Web APIs](#web-apis)
@@ -125,17 +127,23 @@ An opinionated list of awesome Python frameworks, libraries, tools, software and
_Libraries for building AI applications, LLM integrations, and autonomous agents._
-- Frameworks
+- Agent Skills
+ - [django-ai-plugins](https://github.com/vintasoftware/django-ai-plugins) - Django backend agent skills for Django, DRF, Celery, and Django-specific code review.
+ - [sentry-skills](https://github.com/getsentry/skills) - Python-focused engineering skills for code review, debugging, and backend workflows.
+ - [trailofbits-skills](https://github.com/trailofbits/skills) - Python-friendly security skills for auditing, testing, and safer backend development.
+- Orchestration
- [autogen](https://github.com/microsoft/autogen) - A programming framework for building agentic AI applications.
- [crewai](https://github.com/crewAIInc/crewAI) - A framework for orchestrating role-playing autonomous AI agents for collaborative task solving.
- [dspy](https://github.com/stanfordnlp/dspy) - A framework for programming, not prompting, language models.
- - [instructor](https://github.com/567-labs/instructor) - A library for extracting structured data from LLMs, powered by Pydantic.
- [langchain](https://github.com/langchain-ai/langchain) - Building applications with LLMs through composability.
- - [llama_index](https://github.com/run-llama/llama_index) - A data framework for your LLM application.
- [pydantic-ai](https://github.com/pydantic/pydantic-ai) - A Python agent framework for building generative AI applications with structured schemas.
-- Pretrained Models and Inference
- - [diffusers](https://github.com/huggingface/diffusers) - A library that provides pretrained diffusion models for generating and editing images, audio, and video.
- - [transformers](https://github.com/huggingface/transformers) - A framework that lets you easily use pretrained transformer models for NLP, vision, and audio tasks.
+- Data Layer
+ - [instructor](https://github.com/567-labs/instructor) - A library for extracting structured data from LLMs, powered by Pydantic.
+ - [llama-index](https://github.com/run-llama/llama_index) - A data framework for your LLM application.
+ - [mem0](https://github.com/mem0ai/mem0) - An intelligent memory layer for AI agents enabling personalized interactions.
+- Pre-trained Models and Inference
+ - [diffusers](https://github.com/huggingface/diffusers) - A library that provides pre-trained diffusion models for generating and editing images, audio, and video.
+ - [transformers](https://github.com/huggingface/transformers) - A framework that lets you easily use pre-trained transformer models for NLP, vision, and audio tasks.
- [vllm](https://github.com/vllm-project/vllm) - A high-throughput and memory-efficient inference and serving engine for LLMs.
## Deep Learning
@@ -193,7 +201,7 @@ _Libraries for building recommender systems._
- [implicit](https://github.com/benfred/implicit) - A fast Python implementation of collaborative filtering for implicit datasets.
- [scikit-surprise](https://github.com/NicolasHug/Surprise) - A scikit for building and analyzing recommender systems.
-**Web**
+**Web Development**
## Web Frameworks
@@ -543,7 +551,6 @@ _Python implementation of data structures, algorithms and design patterns. Also
- [sortedcontainers](https://github.com/grantjenks/python-sortedcontainers) - Fast and pure-Python implementation of sorted collections.
- [thealgorithms](https://github.com/TheAlgorithms/Python) - All Algorithms implemented in Python.
- Design Patterns
- - [python-cqrs](https://github.com/pypatterns/python-cqrs) - Event-Driven Architecture Framework with CQRS/CQS, Transaction Outbox, Saga orchestration.
- [python-patterns](https://github.com/faif/python-patterns) - A collection of design patterns in Python.
- [transitions](https://github.com/pytransitions/transitions) - A lightweight, object-oriented finite state machine implementation.
@@ -573,14 +580,15 @@ _Tools of static analysis, linters and code quality checkers. Also see [awesome-
- Code Formatters
- [black](https://github.com/psf/black) - The uncompromising Python code formatter.
- [isort](https://github.com/PyCQA/isort) - A Python utility / library to sort imports.
-- Static Type Checkers, also see [awesome-python-typing](https://github.com/typeddjango/awesome-python-typing)
+ - [ruff](https://github.com/astral-sh/ruff) - An extremely fast Python linter and code formatter.
+- Refactoring
+ - [rope](https://github.com/python-rope/rope) - Rope is a python refactoring library.
+- Type Checkers - [awesome-python-typing](https://github.com/typeddjango/awesome-python-typing)
- [mypy](https://github.com/python/mypy) - Check variable types during compile time.
- [pyre-check](https://github.com/facebook/pyre-check) - Performant type checking.
- [ty](https://github.com/astral-sh/ty) - An extremely fast Python type checker and language server.
- [typeshed](https://github.com/python/typeshed) - Collection of library stubs for Python, with static types.
-- Refactoring
- - [rope](https://github.com/python-rope/rope) - Rope is a python refactoring library.
-- Static Type Annotations Generators
+- Type Annotations Generators
- [monkeytype](https://github.com/Instagram/MonkeyType) - A system for Python that generates static type annotations by collecting runtime types.
- [pytype](https://github.com/google/pytype) - Pytype checks and infers types for Python code - without requiring type annotations.
@@ -588,7 +596,7 @@ _Tools of static analysis, linters and code quality checkers. Also see [awesome-
_Libraries for testing codebases and generating test data._
-- Testing Frameworks
+- Frameworks
- [hypothesis](https://github.com/HypothesisWorks/hypothesis) - Hypothesis is an advanced Quickcheck style property based testing library.
- [pytest](https://github.com/pytest-dev/pytest) - A mature full-featured Python testing tool.
- [robotframework](https://github.com/robotframework/robotframework) - A generic test automation framework.
@@ -599,7 +607,7 @@ _Libraries for testing codebases and generating test data._
- [tox](https://github.com/tox-dev/tox) - Auto builds and tests distributions in multiple Python versions
- GUI / Web Testing
- [locust](https://github.com/locustio/locust) - Scalable user load testing tool written in Python.
- - [playwright](https://github.com/microsoft/playwright-python) - Python version of the Playwright testing and automation library.
+ - [playwright-python](https://github.com/microsoft/playwright-python) - Python version of the Playwright testing and automation library.
- [pyautogui](https://github.com/asweigart/pyautogui) - PyAutoGUI is a cross-platform GUI automation Python module for human beings.
- [schemathesis](https://github.com/schemathesis/schemathesis) - A tool for automatic property-based testing of web applications built with Open API / Swagger specifications.
- [selenium](https://github.com/SeleniumHQ/selenium) - Python bindings for [Selenium](https://selenium.dev/) [WebDriver](https://selenium.dev/documentation/webdriver/).
@@ -737,11 +745,11 @@ _Tools and libraries for Virtual Networking and SDN (Software Defined Networking
**CLI & GUI**
-## Command-line Interface Development
+## CLI Development
_Libraries for building command-line applications._
-- Command-line Application Development
+- CLI Development
- [argparse](https://docs.python.org/3/library/argparse.html) - (Python standard library) Command-line option and argument parsing.
- [cement](https://github.com/datafolklabs/cement) - CLI Application Framework for Python.
- [click](https://github.com/pallets/click/) - A package for creating beautiful command line interfaces in a composable way.
@@ -756,7 +764,7 @@ _Libraries for building command-line applications._
- [textual](https://github.com/Textualize/textual) - A framework for building interactive user interfaces that run in the terminal and the browser.
- [tqdm](https://github.com/tqdm/tqdm) - Fast, extensible progress bar for loops and CLI.
-## Command-line Tools
+## CLI Tools
_Useful CLI-based tools for productivity._
@@ -834,11 +842,11 @@ _Libraries for parsing and manipulating plain texts._
_Libraries for working with HTML and XML._
- [beautifulsoup](https://www.crummy.com/software/BeautifulSoup/bs4/doc/) - Providing Pythonic idioms for iterating, searching, and modifying HTML or XML.
-- [cssutils](https://github.com/jaraco/cssutils) - A CSS library for Python.
- [justhtml](https://github.com/EmilStenstrom/justhtml/) - A pure Python HTML5 parser that just works.
- [lxml](https://github.com/lxml/lxml) - A very fast, easy-to-use and versatile library for handling HTML and XML.
- [markupsafe](https://github.com/pallets/markupsafe) - Implements a XML/HTML/XHTML Markup safe string for Python.
- [pyquery](https://github.com/gawel/pyquery) - A jQuery-like library for parsing HTML.
+- [tinycss2](https://github.com/Kozea/tinycss2) - A low-level CSS parser and generator written in Python.
- [xmltodict](https://github.com/martinblech/xmltodict) - Working with XML feel like you are working with JSON.
## File Format Processing
@@ -850,14 +858,14 @@ _Libraries for parsing and manipulating specific text formats._
- [kreuzberg](https://github.com/kreuzberg-dev/kreuzberg) - High-performance document extraction library with a Rust core, supporting 62+ formats including PDF, Office, images with OCR, HTML, email, and archives.
- [pyelftools](https://github.com/eliben/pyelftools) - Parsing and analyzing ELF files and DWARF debugging information.
- [tablib](https://github.com/jazzband/tablib) - A module for Tabular Datasets in XLS, CSV, JSON, YAML.
-- Office
+- MS Office
- [docxtpl](https://github.com/elapouya/python-docx-template) - Editing a docx document by jinja2 template
- [openpyxl](https://openpyxl.readthedocs.io/en/stable/) - A library for reading and writing Excel 2010 xlsx/xlsm/xltx/xltm files.
- [pyexcel](https://github.com/pyexcel/pyexcel) - Providing one API for reading, manipulating and writing csv, ods, xls, xlsx and xlsm files.
- [python-docx](https://github.com/python-openxml/python-docx) - Reads, queries and modifies Microsoft Word 2007/2008 docx files.
- [python-pptx](https://github.com/scanny/python-pptx) - Python library for creating and updating PowerPoint (.pptx) files.
- [xlsxwriter](https://github.com/jmcnamara/XlsxWriter) - A Python module for creating Excel .xlsx files.
- - [xlwings](https://github.com/ZoomerAnalytics/xlwings) - A BSD-licensed library that makes it easy to call Python from Excel and vice versa.
+ - [xlwings](https://github.com/xlwings/xlwings) - A BSD-licensed library that makes it easy to call Python from Excel and vice versa.
- PDF
- [pdf_oxide](https://github.com/yfedoseev/pdf_oxide) - A fast PDF library for text extraction, image extraction, and markdown conversion, powered by Rust.
- [pdfminer.six](https://github.com/pdfminer/pdfminer.six) - Pdfminer.six is a community maintained fork of the original PDFMiner.
@@ -891,14 +899,14 @@ _Libraries for file manipulation._
_Libraries for manipulating images._
-- [pillow](https://github.com/python-pillow/Pillow) - Pillow is the friendly [PIL](http://www.pythonware.com/products/pil/) fork.
+- [pillow](https://github.com/python-pillow/Pillow) - Pillow is the friendly [PIL](https://www.pythonware.com/products/pil/) fork.
- [pymatting](https://github.com/pymatting/pymatting) - A library for alpha matting.
- [python-barcode](https://github.com/WhyNotHugo/python-barcode) - Create barcodes in Python with no extra dependencies.
- [python-qrcode](https://github.com/lincolnloop/python-qrcode) - A pure Python QR Code generator.
- [pyvips](https://github.com/libvips/pyvips) - A fast image processing library with low memory needs.
- [scikit-image](https://github.com/scikit-image/scikit-image) - A Python library for (scientific) image processing.
- [thumbor](https://github.com/thumbor/thumbor) - A smart imaging service. It enables on-demand crop, re-sizing and flipping of images.
-- [wand](https://github.com/emcconville/wand) - Python bindings for [MagickWand](http://www.imagemagick.org/script/magick-wand.php), C API for ImageMagick.
+- [wand](https://github.com/emcconville/wand) - Python bindings for [MagickWand](https://www.imagemagick.org/script/magick-wand.php), C API for ImageMagick.
## Audio & Video Processing
@@ -1090,7 +1098,6 @@ Where to discover learning resources or new Python libraries.
- [Django Chat](https://djangochat.com/)
- [PyPodcats](https://pypodcats.live)
- [Python Bytes](https://pythonbytes.fm)
-- [Python Test](https://podcast.pythontest.com/)
- [Talk Python To Me](https://talkpython.fm/)
- [The Real Python Podcast](https://realpython.com/podcasts/rpp/)
@@ -1100,4 +1107,4 @@ Your contributions are always welcome! Please take a look at the [contribution g
---
-If you have any question about this opinionated list, do not hesitate to contact [@VintaChen](https://twitter.com/VintaChen) on Twitter.
+If you have any question about this opinionated list, do not hesitate to contact [@vinta](https://x.com/vinta) on X (Twitter).
diff --git a/SPONSORSHIP.md b/SPONSORSHIP.md
index 3f9e8a6..4bb3b89 100644
--- a/SPONSORSHIP.md
+++ b/SPONSORSHIP.md
@@ -2,11 +2,11 @@
**The #10 most-starred repository on all of GitHub.**
-awesome-python is where Python developers go to discover tools. When someone searches Google for "best Python libraries," they land here. When ChatGPT recommends Python tools, it references this list. When developers evaluate frameworks, this is the list they check.
+awesome-python is where Python developers go to discover tools. It ranks on the first page of Google for "best Python libraries," is referenced by ChatGPT and other LLMs when recommending Python tools, and is the list developers check when evaluating frameworks.
Your sponsorship puts your product in front of developers at the exact moment they're choosing what to use.
-## By the Numbers
+## Audience
| Metric | Value |
| ------------ | ---------------------------------------------------------------------------------------------------- |
@@ -15,22 +15,35 @@ Your sponsorship puts your product in front of developers at the exact moment th
| Watchers |  |
| Contributors |  |
-Top referrers: GitHub, Google Search, YouTube, Reddit, ChatGPT — developers actively searching for and evaluating Python tools.
+**Who visits:** Professional Python developers evaluating libraries and tools for production use. Not beginners browsing tutorials. People making adoption decisions.
+
+**Top referrers:** Google Search, GitHub, ChatGPT/LLMs, YouTube, Reddit, Hacker News.
## Sponsorship Tiers
-### Logo Sponsor — $500/month (2 slots)
+### Logo Sponsor - $500/month
-Your logo and a one-line description at the top of the README, seen by every visitor.
+Your logo and a one-line description pinned to the top of the README, above all project entries. Every visitor to the repo or awesome-python.com sees it first.
-### Link Sponsor — $150/month (5 slots)
+**What you get:**
+- Logo + one-line description in the README header
+- Logo on awesome-python.com sponsor section
+- Permanent placement for the duration of your sponsorship
-A text link with your product name at the top of the README, right below logo sponsors.
+### Link Sponsor - $150/month
+
+A text link with your product name at the top of the README, directly below logo sponsors.
+
+**What you get:**
+- Text link in the README header
+- Link on awesome-python.com sponsor section
## Past Sponsors
-- [Warp](https://www.warp.dev/) - https://github.com/vinta/awesome-python/pull/2766
+- [Warp](https://www.warp.dev/) - The terminal for modern developers.
## Get Started
-Email [vinta.chen@gmail.com](mailto:vinta.chen@gmail.com?subject=awesome-python%20Sponsorship) with your company name and preferred tier. Most sponsors are set up within 24 hours.
+Email [vinta.chen@gmail.com](mailto:vinta.chen@gmail.com?subject=awesome-python%20Sponsorship) with your company name and preferred tier.
+
+Setup takes less than 24 hours. Month-to-month billing, cancel anytime.
diff --git a/website/build.py b/website/build.py
index 5ab9c9e..cf75928 100644
--- a/website/build.py
+++ b/website/build.py
@@ -4,30 +4,12 @@
import json
import re
import shutil
+from datetime import datetime, timezone
from pathlib import Path
from typing import TypedDict
from jinja2 import Environment, FileSystemLoader
-from readme_parser import parse_readme, slugify
-
-
-def group_categories(
- parsed_groups: list[dict],
- resources: list[dict],
-) -> list[dict]:
- """Combine parsed groups with resources for template rendering."""
- groups = list(parsed_groups)
-
- if resources:
- groups.append(
- {
- "name": "Resources",
- "slug": slugify("Resources"),
- "categories": list(resources),
- }
- )
-
- return groups
+from readme_parser import parse_readme
class StarData(TypedDict):
@@ -120,6 +102,11 @@ def extract_entries(
existing["categories"].append(cat["name"])
if group_name not in existing["groups"]:
existing["groups"].append(group_name)
+ subcat = entry["subcategory"]
+ if subcat:
+ scoped = f"{cat['name']} > {subcat}"
+ if not any(s["value"] == scoped for s in existing["subcategories"]):
+ existing["subcategories"].append({"name": subcat, "value": scoped})
else:
merged = {
"name": entry["name"],
@@ -127,6 +114,7 @@ def extract_entries(
"description": entry["description"],
"categories": [cat["name"]],
"groups": [group_name],
+ "subcategories": [{"name": entry["subcategory"], "value": f"{cat['name']} > {entry['subcategory']}"}] if entry["subcategory"] else [],
"stars": None,
"owner": None,
"last_commit_at": None,
@@ -138,6 +126,13 @@ def extract_entries(
return entries
+def format_stars_short(stars: int) -> str:
+ """Format star count as compact string like '230k'."""
+ if stars >= 1000:
+ return f"{stars // 1000}k"
+ return str(stars)
+
+
def build(repo_root: str) -> None:
"""Main build: parse README, render single-page HTML via Jinja2 templates."""
repo = Path(repo_root)
@@ -151,14 +146,17 @@ def build(repo_root: str) -> None:
subtitle = stripped
break
- parsed_groups, resources = parse_readme(readme_text)
+ parsed_groups = parse_readme(readme_text)
categories = [cat for g in parsed_groups for cat in g["categories"]]
total_entries = sum(c["entry_count"] for c in categories)
- groups = group_categories(parsed_groups, resources)
- entries = extract_entries(categories, groups)
+ entries = extract_entries(categories, parsed_groups)
stars_data = load_stars(website / "data" / "github_stars.json")
+
+ repo_self = stars_data.get("vinta/awesome-python", {})
+ repo_stars = format_stars_short(repo_self["stars"]) if "stars" in repo_self else None
+
for entry in entries:
repo_key = extract_github_repo(entry["url"])
if not repo_key and entry.get("source_type") == "Built-in":
@@ -185,12 +183,12 @@ def build(repo_root: str) -> None:
(site_dir / "index.html").write_text(
tpl_index.render(
categories=categories,
- resources=resources,
- groups=groups,
subtitle=subtitle,
entries=entries,
total_entries=total_entries,
total_categories=len(categories),
+ repo_stars=repo_stars,
+ build_date=datetime.now(timezone.utc).strftime("%B %d, %Y"),
),
encoding="utf-8",
)
@@ -202,7 +200,7 @@ def build(repo_root: str) -> None:
(site_dir / "llms.txt").write_text(readme_text, encoding="utf-8")
- print(f"Built single page with {len(parsed_groups)} groups, {len(categories)} categories + {len(resources)} resources")
+ print(f"Built single page with {len(parsed_groups)} groups, {len(categories)} categories")
print(f"Total entries: {total_entries}")
print(f"Output: {site_dir}")
diff --git a/website/fetch_github_stars.py b/website/fetch_github_stars.py
index d3b024e..ccff1b6 100644
--- a/website/fetch_github_stars.py
+++ b/website/fetch_github_stars.py
@@ -103,6 +103,7 @@ def main() -> None:
readme_text = README_PATH.read_text(encoding="utf-8")
current_repos = extract_github_repos(readme_text)
+ current_repos.add("vinta/awesome-python")
print(f"Found {len(current_repos)} GitHub repos in README.md")
cache = load_stars(CACHE_FILE)
diff --git a/website/readme_parser.py b/website/readme_parser.py
index c0ecfc6..4f36ed7 100644
--- a/website/readme_parser.py
+++ b/website/readme_parser.py
@@ -20,6 +20,7 @@ class ParsedEntry(TypedDict):
url: str
description: str # inline HTML, properly escaped
also_see: list[AlsoSee]
+ subcategory: str # sub-category label, empty if none
class ParsedSection(TypedDict):
@@ -28,8 +29,6 @@ class ParsedSection(TypedDict):
description: str # plain text, links resolved to text
entries: list[ParsedEntry]
entry_count: int
- preview: str
- content_html: str # rendered HTML, properly escaped
class ParsedGroup(TypedDict):
@@ -131,6 +130,7 @@ def _extract_description(nodes: list[SyntaxTreeNode]) -> str:
# --- Entry extraction --------------------------------------------------------
_DESC_SEP_RE = re.compile(r"^\s*[-\u2013\u2014]\s*")
+_SUBCAT_TRAILING_RE = re.compile(r"[\s,\-\u2013\u2014]+(also\s+see\s*)?$", re.IGNORECASE)
def _find_child(node: SyntaxTreeNode, child_type: str) -> SyntaxTreeNode | None:
@@ -178,7 +178,11 @@ def _extract_description_html(inline: SyntaxTreeNode, first_link: SyntaxTreeNode
return _DESC_SEP_RE.sub("", html)
-def _parse_list_entries(bullet_list: SyntaxTreeNode) -> list[ParsedEntry]:
+def _parse_list_entries(
+ bullet_list: SyntaxTreeNode,
+ *,
+ subcategory: str = "",
+) -> list[ParsedEntry]:
"""Extract entries from a bullet_list AST node.
Handles three patterns:
@@ -199,10 +203,16 @@ def _parse_list_entries(bullet_list: SyntaxTreeNode) -> list[ParsedEntry]:
first_link = _find_first_link(inline)
if first_link is None or not _is_leading_link(inline, first_link):
- # Subcategory label (plain text or text-before-link) — recurse into nested list
+ # Subcategory label: take text before the first link, strip trailing separators
+ pre_link = []
+ for child in inline.children:
+ if child.type == "link":
+ break
+ pre_link.append(child)
+ label = _SUBCAT_TRAILING_RE.sub("", render_inline_text(pre_link)) if pre_link else render_inline_text(inline.children)
nested = _find_child(list_item, "bullet_list")
if nested:
- entries.extend(_parse_list_entries(nested))
+ entries.extend(_parse_list_entries(nested, subcategory=label))
continue
# Entry with a link
@@ -231,6 +241,7 @@ def _parse_list_entries(bullet_list: SyntaxTreeNode) -> list[ParsedEntry]:
url=url,
description=desc_html,
also_see=also_see,
+ subcategory=subcategory,
))
return entries
@@ -245,69 +256,6 @@ def _parse_section_entries(content_nodes: list[SyntaxTreeNode]) -> list[ParsedEn
return entries
-# --- Content HTML rendering --------------------------------------------------
-
-
-def _render_bullet_list_html(
- bullet_list: SyntaxTreeNode,
- *,
- is_sub: bool = False,
-) -> str:
- """Render a bullet_list node to HTML with entry/entry-sub/subcat classes."""
- out: list[str] = []
-
- for list_item in bullet_list.children:
- if list_item.type != "list_item":
- continue
-
- inline = _find_inline(list_item)
- if inline is None:
- continue
-
- first_link = _find_first_link(inline)
-
- if first_link is None or not _is_leading_link(inline, first_link):
- # Subcategory label (plain text or text-before-link)
- label = str(escape(render_inline_text(inline.children)))
- out.append(f'
{label}
')
- nested = _find_child(list_item, "bullet_list")
- if nested:
- out.append(_render_bullet_list_html(nested, is_sub=False))
- continue
-
- # Entry with a link
- name = str(escape(render_inline_text(first_link.children)))
- url = str(escape(first_link.attrGet("href") or ""))
-
- if is_sub:
- out.append(f'