mirror of
https://github.com/vinta/awesome-python.git
synced 2026-04-11 02:11:42 +08:00
refactor: parse thematic groups from README bold markers instead of hardcoding them
The website builder previously relied on a hardcoded SECTION_GROUPS list in build.py to organize categories into thematic groups. This was fragile: any rename or addition to README.md required a matching code change. Replace this with a parser-driven approach: - readme_parser.py now detects bold-only paragraphs (**Group Name**) as group boundary markers and groups H2 categories beneath them into ParsedGroup structs. - build.py drops SECTION_GROUPS entirely; group_categories() now just passes parsed groups through and appends the Resources group. - sort.py is removed as it relied on the old flat section model. - Tests updated throughout to reflect the new (groups, resources) return shape and to cover the new grouping logic. Co-Authored-By: Claude <noreply@anthropic.com>
This commit is contained in:
parent
fd9b2665ed
commit
4322026817
83
sort.py
83
sort.py
@ -1,83 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# coding: utf-8
|
|
||||||
|
|
||||||
"""
|
|
||||||
The approach taken is explained below. I decided to do it simply.
|
|
||||||
Initially I was considering parsing the data into some sort of
|
|
||||||
structure and then generating an appropriate README. I am still
|
|
||||||
considering doing it - but for now this should work. The only issue
|
|
||||||
I see is that it only sorts the entries at the lowest level, and that
|
|
||||||
the order of the top-level contents do not match the order of the actual
|
|
||||||
entries.
|
|
||||||
|
|
||||||
This could be extended by having nested blocks, sorting them recursively
|
|
||||||
and flattening the end structure into a list of lines. Revision 2 maybe ^.^.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def sort_blocks():
|
|
||||||
# First, we load the current README into memory
|
|
||||||
with open('README.md', 'r') as read_me_file:
|
|
||||||
read_me = read_me_file.read()
|
|
||||||
|
|
||||||
# Separating the 'table of contents' from the contents (blocks)
|
|
||||||
table_of_contents = ''.join(read_me.split('- - -')[0])
|
|
||||||
blocks = ''.join(read_me.split('- - -')[1]).split('\n# ')
|
|
||||||
for i in range(len(blocks)):
|
|
||||||
if i == 0:
|
|
||||||
blocks[i] = blocks[i] + '\n'
|
|
||||||
else:
|
|
||||||
blocks[i] = '# ' + blocks[i] + '\n'
|
|
||||||
|
|
||||||
# Sorting the libraries
|
|
||||||
inner_blocks = sorted(blocks[0].split('##'))
|
|
||||||
for i in range(1, len(inner_blocks)):
|
|
||||||
if inner_blocks[i][0] != '#':
|
|
||||||
inner_blocks[i] = '##' + inner_blocks[i]
|
|
||||||
inner_blocks = ''.join(inner_blocks)
|
|
||||||
|
|
||||||
# Replacing the non-sorted libraries by the sorted ones and gathering all at the final_README file
|
|
||||||
blocks[0] = inner_blocks
|
|
||||||
final_README = table_of_contents + '- - -' + ''.join(blocks)
|
|
||||||
|
|
||||||
with open('README.md', 'w+') as sorted_file:
|
|
||||||
sorted_file.write(final_README)
|
|
||||||
|
|
||||||
def main():
|
|
||||||
# First, we load the current README into memory as an array of lines
|
|
||||||
with open('README.md', 'r') as read_me_file:
|
|
||||||
read_me = read_me_file.readlines()
|
|
||||||
|
|
||||||
# Then we cluster the lines together as blocks
|
|
||||||
# Each block represents a collection of lines that should be sorted
|
|
||||||
# This was done by assuming only links ([...](...)) are meant to be sorted
|
|
||||||
# Clustering is done by indentation
|
|
||||||
blocks = []
|
|
||||||
last_indent = None
|
|
||||||
for line in read_me:
|
|
||||||
s_line = line.lstrip()
|
|
||||||
indent = len(line) - len(s_line)
|
|
||||||
|
|
||||||
if any([s_line.startswith(s) for s in ['* [', '- [']]):
|
|
||||||
if indent == last_indent:
|
|
||||||
blocks[-1].append(line)
|
|
||||||
else:
|
|
||||||
blocks.append([line])
|
|
||||||
last_indent = indent
|
|
||||||
else:
|
|
||||||
blocks.append([line])
|
|
||||||
last_indent = None
|
|
||||||
|
|
||||||
with open('README.md', 'w+') as sorted_file:
|
|
||||||
# Then all of the blocks are sorted individually
|
|
||||||
blocks = [
|
|
||||||
''.join(sorted(block, key=str.lower)) for block in blocks
|
|
||||||
]
|
|
||||||
# And the result is written back to README.md
|
|
||||||
sorted_file.write(''.join(blocks))
|
|
||||||
|
|
||||||
# Then we call the sorting method
|
|
||||||
sort_blocks()
|
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
main()
|
|
||||||
181
website/build.py
181
website/build.py
@ -10,179 +10,20 @@ from typing import TypedDict
|
|||||||
from jinja2 import Environment, FileSystemLoader
|
from jinja2 import Environment, FileSystemLoader
|
||||||
from readme_parser import parse_readme, slugify
|
from readme_parser import parse_readme, slugify
|
||||||
|
|
||||||
# Thematic grouping of categories. Each category name must match exactly
|
|
||||||
# as it appears in README.md (the ## heading text).
|
|
||||||
SECTION_GROUPS: list[tuple[str, list[str]]] = [
|
|
||||||
(
|
|
||||||
"Web & API",
|
|
||||||
[
|
|
||||||
"Admin Panels",
|
|
||||||
"CMS",
|
|
||||||
"Email",
|
|
||||||
"Static Site Generator",
|
|
||||||
"URL Manipulation",
|
|
||||||
"Web Frameworks",
|
|
||||||
"RESTful API",
|
|
||||||
"GraphQL",
|
|
||||||
"WebSocket",
|
|
||||||
"ASGI Servers",
|
|
||||||
"WSGI Servers",
|
|
||||||
"HTTP Clients",
|
|
||||||
"Template Engine",
|
|
||||||
"Web Asset Management",
|
|
||||||
"Web Content Extracting",
|
|
||||||
"Web Crawling",
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"AI & ML",
|
|
||||||
[
|
|
||||||
"AI and Agents",
|
|
||||||
"Machine Learning",
|
|
||||||
"Deep Learning",
|
|
||||||
"Computer Vision",
|
|
||||||
"Natural Language Processing",
|
|
||||||
"Recommender Systems",
|
|
||||||
"Robotics",
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"Data & Science",
|
|
||||||
[
|
|
||||||
"Data Analysis",
|
|
||||||
"Data Validation",
|
|
||||||
"Data Visualization",
|
|
||||||
"Geolocation",
|
|
||||||
"Science",
|
|
||||||
"Quantum Computing",
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"DevOps & Infrastructure",
|
|
||||||
[
|
|
||||||
"DevOps Tools",
|
|
||||||
"Distributed Computing",
|
|
||||||
"Task Queues",
|
|
||||||
"Job Scheduler",
|
|
||||||
"Serverless Frameworks",
|
|
||||||
"Logging",
|
|
||||||
"Processes",
|
|
||||||
"Shell",
|
|
||||||
"Network Virtualization",
|
|
||||||
"RPC Servers",
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"Database & Storage",
|
|
||||||
[
|
|
||||||
"Database",
|
|
||||||
"Database Drivers",
|
|
||||||
"ORM",
|
|
||||||
"Caching",
|
|
||||||
"Search",
|
|
||||||
"Serialization",
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"Development Tools",
|
|
||||||
[
|
|
||||||
"Testing",
|
|
||||||
"Debugging Tools",
|
|
||||||
"Code Analysis",
|
|
||||||
"Build Tools",
|
|
||||||
"Algorithms and Design Patterns",
|
|
||||||
"Refactoring",
|
|
||||||
"Documentation",
|
|
||||||
"Editor Plugins and IDEs",
|
|
||||||
"Interactive Interpreter",
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"CLI & GUI",
|
|
||||||
[
|
|
||||||
"Command-line Interface Development",
|
|
||||||
"Command-line Tools",
|
|
||||||
"GUI Development",
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"Content & Media",
|
|
||||||
[
|
|
||||||
"Audio",
|
|
||||||
"Video",
|
|
||||||
"Game Development",
|
|
||||||
"Image Processing",
|
|
||||||
"Internationalization",
|
|
||||||
"HTML Manipulation",
|
|
||||||
"Text Processing",
|
|
||||||
"Specific Formats Processing",
|
|
||||||
"File Manipulation",
|
|
||||||
"Downloader",
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"System & Runtime",
|
|
||||||
[
|
|
||||||
"Asynchronous Programming",
|
|
||||||
"Environment Management",
|
|
||||||
"Package Management",
|
|
||||||
"Package Repositories",
|
|
||||||
"Date and Time",
|
|
||||||
"Distribution",
|
|
||||||
"Hardware",
|
|
||||||
"Implementations",
|
|
||||||
"Microsoft Windows",
|
|
||||||
"Built-in Classes Enhancement",
|
|
||||||
"Functional Programming",
|
|
||||||
"Configuration Files",
|
|
||||||
],
|
|
||||||
),
|
|
||||||
(
|
|
||||||
"Security & Auth",
|
|
||||||
[
|
|
||||||
"Authentication",
|
|
||||||
"Cryptography",
|
|
||||||
"Penetration Testing",
|
|
||||||
"Permissions",
|
|
||||||
],
|
|
||||||
),
|
|
||||||
("Resources", []), # Filled dynamically from parsed resources
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def group_categories(
|
def group_categories(
|
||||||
categories: list[dict],
|
parsed_groups: list[dict],
|
||||||
resources: list[dict],
|
resources: list[dict],
|
||||||
) -> list[dict]:
|
) -> list[dict]:
|
||||||
"""Organize categories and resources into thematic section groups."""
|
"""Combine parsed groups with resources for template rendering."""
|
||||||
cat_by_name = {c["name"]: c for c in categories}
|
groups = list(parsed_groups)
|
||||||
groups = []
|
|
||||||
grouped_names: set[str] = set()
|
|
||||||
|
|
||||||
for group_name, cat_names in SECTION_GROUPS:
|
if resources:
|
||||||
grouped_names.update(cat_names)
|
|
||||||
if group_name == "Resources":
|
|
||||||
group_cats = list(resources)
|
|
||||||
else:
|
|
||||||
group_cats = [cat_by_name[n] for n in cat_names if n in cat_by_name]
|
|
||||||
|
|
||||||
if group_cats:
|
|
||||||
groups.append(
|
|
||||||
{
|
|
||||||
"name": group_name,
|
|
||||||
"slug": slugify(group_name),
|
|
||||||
"categories": group_cats,
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Any categories not in a group go into "Other"
|
|
||||||
ungrouped = [c for c in categories if c["name"] not in grouped_names]
|
|
||||||
if ungrouped:
|
|
||||||
groups.append(
|
groups.append(
|
||||||
{
|
{
|
||||||
"name": "Other",
|
"name": "Resources",
|
||||||
"slug": "other",
|
"slug": slugify("Resources"),
|
||||||
"categories": ungrouped,
|
"categories": list(resources),
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -295,11 +136,11 @@ def build(repo_root: str) -> None:
|
|||||||
subtitle = stripped
|
subtitle = stripped
|
||||||
break
|
break
|
||||||
|
|
||||||
categories, resources = parse_readme(readme_text)
|
parsed_groups, resources = parse_readme(readme_text)
|
||||||
# All fields pre-computed: entry_count, content_html, preview, description
|
|
||||||
|
|
||||||
|
categories = [cat for g in parsed_groups for cat in g["categories"]]
|
||||||
total_entries = sum(c["entry_count"] for c in categories)
|
total_entries = sum(c["entry_count"] for c in categories)
|
||||||
groups = group_categories(categories, resources)
|
groups = group_categories(parsed_groups, resources)
|
||||||
entries = extract_entries(categories, groups)
|
entries = extract_entries(categories, groups)
|
||||||
|
|
||||||
stars_data = load_stars(website / "data" / "github_stars.json")
|
stars_data = load_stars(website / "data" / "github_stars.json")
|
||||||
@ -344,7 +185,7 @@ def build(repo_root: str) -> None:
|
|||||||
|
|
||||||
shutil.copy(repo / "README.md", site_dir / "llms.txt")
|
shutil.copy(repo / "README.md", site_dir / "llms.txt")
|
||||||
|
|
||||||
print(f"Built single page with {len(categories)} categories + {len(resources)} resources")
|
print(f"Built single page with {len(parsed_groups)} groups, {len(categories)} categories + {len(resources)} resources")
|
||||||
print(f"Total entries: {total_entries}")
|
print(f"Total entries: {total_entries}")
|
||||||
print(f"Output: {site_dir}")
|
print(f"Output: {site_dir}")
|
||||||
|
|
||||||
|
|||||||
@ -32,6 +32,12 @@ class ParsedSection(TypedDict):
|
|||||||
content_html: str # rendered HTML, properly escaped
|
content_html: str # rendered HTML, properly escaped
|
||||||
|
|
||||||
|
|
||||||
|
class ParsedGroup(TypedDict):
|
||||||
|
name: str
|
||||||
|
slug: str
|
||||||
|
categories: list[ParsedSection]
|
||||||
|
|
||||||
|
|
||||||
# --- Slugify ----------------------------------------------------------------
|
# --- Slugify ----------------------------------------------------------------
|
||||||
|
|
||||||
_SLUG_NON_ALNUM_RE = re.compile(r"[^a-z0-9\s-]")
|
_SLUG_NON_ALNUM_RE = re.compile(r"[^a-z0-9\s-]")
|
||||||
@ -305,6 +311,25 @@ def _render_section_html(content_nodes: list[SyntaxTreeNode]) -> str:
|
|||||||
# --- Section splitting -------------------------------------------------------
|
# --- Section splitting -------------------------------------------------------
|
||||||
|
|
||||||
|
|
||||||
|
def _build_section(name: str, body: list[SyntaxTreeNode]) -> ParsedSection:
|
||||||
|
"""Build a ParsedSection from a heading name and its body nodes."""
|
||||||
|
desc = _extract_description(body)
|
||||||
|
content_nodes = body[1:] if desc else body
|
||||||
|
entries = _parse_section_entries(content_nodes)
|
||||||
|
entry_count = len(entries) + sum(len(e["also_see"]) for e in entries)
|
||||||
|
preview = ", ".join(e["name"] for e in entries[:4])
|
||||||
|
content_html = _render_section_html(content_nodes)
|
||||||
|
return ParsedSection(
|
||||||
|
name=name,
|
||||||
|
slug=slugify(name),
|
||||||
|
description=desc,
|
||||||
|
entries=entries,
|
||||||
|
entry_count=entry_count,
|
||||||
|
preview=preview,
|
||||||
|
content_html=content_html,
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def _group_by_h2(
|
def _group_by_h2(
|
||||||
nodes: list[SyntaxTreeNode],
|
nodes: list[SyntaxTreeNode],
|
||||||
) -> list[ParsedSection]:
|
) -> list[ParsedSection]:
|
||||||
@ -317,22 +342,7 @@ def _group_by_h2(
|
|||||||
nonlocal current_name
|
nonlocal current_name
|
||||||
if current_name is None:
|
if current_name is None:
|
||||||
return
|
return
|
||||||
desc = _extract_description(current_body)
|
sections.append(_build_section(current_name, current_body))
|
||||||
content_nodes = current_body[1:] if desc else current_body
|
|
||||||
entries = _parse_section_entries(content_nodes)
|
|
||||||
entry_count = len(entries) + sum(len(e["also_see"]) for e in entries)
|
|
||||||
preview = ", ".join(e["name"] for e in entries[:4])
|
|
||||||
content_html = _render_section_html(content_nodes)
|
|
||||||
|
|
||||||
sections.append(ParsedSection(
|
|
||||||
name=current_name,
|
|
||||||
slug=slugify(current_name),
|
|
||||||
description=desc,
|
|
||||||
entries=entries,
|
|
||||||
entry_count=entry_count,
|
|
||||||
preview=preview,
|
|
||||||
content_html=content_html,
|
|
||||||
))
|
|
||||||
current_name = None
|
current_name = None
|
||||||
|
|
||||||
for node in nodes:
|
for node in nodes:
|
||||||
@ -347,10 +357,86 @@ def _group_by_h2(
|
|||||||
return sections
|
return sections
|
||||||
|
|
||||||
|
|
||||||
def parse_readme(text: str) -> tuple[list[ParsedSection], list[ParsedSection]]:
|
def _is_bold_marker(node: SyntaxTreeNode) -> str | None:
|
||||||
"""Parse README.md text into categories and resources.
|
"""Detect a bold-only paragraph used as a group marker.
|
||||||
|
|
||||||
Returns (categories, resources) where each is a list of ParsedSection dicts.
|
Pattern: a paragraph whose only content is **Group Name** (possibly
|
||||||
|
surrounded by empty text nodes in the AST).
|
||||||
|
Returns the group name text, or None if not a group marker.
|
||||||
|
"""
|
||||||
|
if node.type != "paragraph":
|
||||||
|
return None
|
||||||
|
for child in node.children:
|
||||||
|
if child.type != "inline":
|
||||||
|
continue
|
||||||
|
# Filter out empty text nodes that markdown-it inserts around strong
|
||||||
|
meaningful = [c for c in child.children if not (c.type == "text" and c.content == "")]
|
||||||
|
if len(meaningful) == 1 and meaningful[0].type == "strong":
|
||||||
|
return render_inline_text(meaningful[0].children)
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_grouped_sections(
|
||||||
|
nodes: list[SyntaxTreeNode],
|
||||||
|
) -> list[ParsedGroup]:
|
||||||
|
"""Parse nodes into groups of categories using bold markers as group boundaries.
|
||||||
|
|
||||||
|
Bold-only paragraphs (**Group Name**) delimit groups. H2 headings under each
|
||||||
|
bold marker become categories within that group. Categories appearing before
|
||||||
|
any bold marker go into an "Other" group.
|
||||||
|
"""
|
||||||
|
groups: list[ParsedGroup] = []
|
||||||
|
current_group_name: str | None = None
|
||||||
|
current_group_cats: list[ParsedSection] = []
|
||||||
|
current_cat_name: str | None = None
|
||||||
|
current_cat_body: list[SyntaxTreeNode] = []
|
||||||
|
|
||||||
|
def flush_cat() -> None:
|
||||||
|
nonlocal current_cat_name
|
||||||
|
if current_cat_name is None:
|
||||||
|
return
|
||||||
|
current_group_cats.append(_build_section(current_cat_name, current_cat_body))
|
||||||
|
current_cat_name = None
|
||||||
|
|
||||||
|
def flush_group() -> None:
|
||||||
|
nonlocal current_group_name, current_group_cats
|
||||||
|
if not current_group_cats:
|
||||||
|
current_group_name = None
|
||||||
|
current_group_cats = []
|
||||||
|
return
|
||||||
|
name = current_group_name or "Other"
|
||||||
|
groups.append(ParsedGroup(
|
||||||
|
name=name,
|
||||||
|
slug=slugify(name),
|
||||||
|
categories=list(current_group_cats),
|
||||||
|
))
|
||||||
|
current_group_name = None
|
||||||
|
current_group_cats = []
|
||||||
|
|
||||||
|
for node in nodes:
|
||||||
|
bold_name = _is_bold_marker(node)
|
||||||
|
if bold_name is not None:
|
||||||
|
flush_cat()
|
||||||
|
flush_group()
|
||||||
|
current_group_name = bold_name
|
||||||
|
current_cat_body = []
|
||||||
|
elif node.type == "heading" and node.tag == "h2":
|
||||||
|
flush_cat()
|
||||||
|
current_cat_name = _heading_text(node)
|
||||||
|
current_cat_body = []
|
||||||
|
elif current_cat_name is not None:
|
||||||
|
current_cat_body.append(node)
|
||||||
|
|
||||||
|
flush_cat()
|
||||||
|
flush_group()
|
||||||
|
return groups
|
||||||
|
|
||||||
|
|
||||||
|
def parse_readme(text: str) -> tuple[list[ParsedGroup], list[ParsedSection]]:
|
||||||
|
"""Parse README.md text into grouped categories and resources.
|
||||||
|
|
||||||
|
Returns (groups, resources) where groups is a list of ParsedGroup dicts
|
||||||
|
containing nested categories, and resources is a flat list of ParsedSection.
|
||||||
"""
|
"""
|
||||||
md = MarkdownIt("commonmark")
|
md = MarkdownIt("commonmark")
|
||||||
tokens = md.parse(text)
|
tokens = md.parse(text)
|
||||||
@ -382,7 +468,7 @@ def parse_readme(text: str) -> tuple[list[ParsedSection], list[ParsedSection]]:
|
|||||||
res_end = contributing_idx or len(children)
|
res_end = contributing_idx or len(children)
|
||||||
res_nodes = children[resources_idx + 1 : res_end]
|
res_nodes = children[resources_idx + 1 : res_end]
|
||||||
|
|
||||||
categories = _group_by_h2(cat_nodes)
|
groups = _parse_grouped_sections(cat_nodes)
|
||||||
resources = _group_by_h2(res_nodes)
|
resources = _group_by_h2(res_nodes)
|
||||||
|
|
||||||
return categories, resources
|
return groups, resources
|
||||||
|
|||||||
@ -48,28 +48,33 @@ class TestSlugify:
|
|||||||
|
|
||||||
|
|
||||||
class TestGroupCategories:
|
class TestGroupCategories:
|
||||||
def test_groups_known_categories(self):
|
def test_appends_resources(self):
|
||||||
cats = [
|
parsed_groups = [
|
||||||
{"name": "Web Frameworks", "slug": "web-frameworks"},
|
{"name": "G1", "slug": "g1", "categories": [{"name": "Cat1"}]},
|
||||||
{"name": "Testing", "slug": "testing"},
|
|
||||||
]
|
]
|
||||||
groups = group_categories(cats, [])
|
|
||||||
group_names = [g["name"] for g in groups]
|
|
||||||
assert "Web & API" in group_names
|
|
||||||
assert "Development Tools" in group_names
|
|
||||||
|
|
||||||
def test_ungrouped_go_to_other(self):
|
|
||||||
cats = [{"name": "Unknown Category", "slug": "unknown-category"}]
|
|
||||||
groups = group_categories(cats, [])
|
|
||||||
group_names = [g["name"] for g in groups]
|
|
||||||
assert "Other" in group_names
|
|
||||||
|
|
||||||
def test_resources_grouped(self):
|
|
||||||
resources = [{"name": "Newsletters", "slug": "newsletters"}]
|
resources = [{"name": "Newsletters", "slug": "newsletters"}]
|
||||||
groups = group_categories([], resources)
|
groups = group_categories(parsed_groups, resources)
|
||||||
group_names = [g["name"] for g in groups]
|
group_names = [g["name"] for g in groups]
|
||||||
|
assert "G1" in group_names
|
||||||
assert "Resources" in group_names
|
assert "Resources" in group_names
|
||||||
|
|
||||||
|
def test_no_resources_no_extra_group(self):
|
||||||
|
parsed_groups = [
|
||||||
|
{"name": "G1", "slug": "g1", "categories": [{"name": "Cat1"}]},
|
||||||
|
]
|
||||||
|
groups = group_categories(parsed_groups, [])
|
||||||
|
assert len(groups) == 1
|
||||||
|
assert groups[0]["name"] == "G1"
|
||||||
|
|
||||||
|
def test_preserves_group_order(self):
|
||||||
|
parsed_groups = [
|
||||||
|
{"name": "Second", "slug": "second", "categories": [{"name": "C2"}]},
|
||||||
|
{"name": "First", "slug": "first", "categories": [{"name": "C1"}]},
|
||||||
|
]
|
||||||
|
groups = group_categories(parsed_groups, [])
|
||||||
|
assert groups[0]["name"] == "Second"
|
||||||
|
assert groups[1]["name"] == "First"
|
||||||
|
|
||||||
|
|
||||||
# ---------------------------------------------------------------------------
|
# ---------------------------------------------------------------------------
|
||||||
# build (integration)
|
# build (integration)
|
||||||
@ -114,6 +119,8 @@ class TestBuild:
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Tools**
|
||||||
|
|
||||||
## Widgets
|
## Widgets
|
||||||
|
|
||||||
_Widget libraries._
|
_Widget libraries._
|
||||||
@ -176,10 +183,14 @@ class TestBuild:
|
|||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
|
**Group A**
|
||||||
|
|
||||||
## Alpha
|
## Alpha
|
||||||
|
|
||||||
- [a](https://x.com) - A.
|
- [a](https://x.com) - A.
|
||||||
|
|
||||||
|
**Group B**
|
||||||
|
|
||||||
## Beta
|
## Beta
|
||||||
|
|
||||||
- [b](https://x.com) - B.
|
- [b](https://x.com) - B.
|
||||||
@ -194,6 +205,8 @@ class TestBuild:
|
|||||||
index_html = (tmp_path / "website" / "output" / "index.html").read_text()
|
index_html = (tmp_path / "website" / "output" / "index.html").read_text()
|
||||||
assert "Alpha" in index_html
|
assert "Alpha" in index_html
|
||||||
assert "Beta" in index_html
|
assert "Beta" in index_html
|
||||||
|
assert "Group A" in index_html
|
||||||
|
assert "Group B" in index_html
|
||||||
|
|
||||||
def test_index_contains_preview_text(self, tmp_path):
|
def test_index_contains_preview_text(self, tmp_path):
|
||||||
readme = textwrap.dedent("""\
|
readme = textwrap.dedent("""\
|
||||||
|
|||||||
@ -115,27 +115,74 @@ MINIMAL_README = textwrap.dedent("""\
|
|||||||
""")
|
""")
|
||||||
|
|
||||||
|
|
||||||
|
GROUPED_README = textwrap.dedent("""\
|
||||||
|
# Awesome Python
|
||||||
|
|
||||||
|
Some intro text.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Group One**
|
||||||
|
|
||||||
|
## Alpha
|
||||||
|
|
||||||
|
_Libraries for alpha stuff._
|
||||||
|
|
||||||
|
- [lib-a](https://example.com/a) - Does A.
|
||||||
|
- [lib-b](https://example.com/b) - Does B.
|
||||||
|
|
||||||
|
**Group Two**
|
||||||
|
|
||||||
|
## Beta
|
||||||
|
|
||||||
|
_Tools for beta._
|
||||||
|
|
||||||
|
- [lib-c](https://example.com/c) - Does C.
|
||||||
|
|
||||||
|
## Gamma
|
||||||
|
|
||||||
|
- [lib-d](https://example.com/d) - Does D.
|
||||||
|
|
||||||
|
# Resources
|
||||||
|
|
||||||
|
Where to discover resources.
|
||||||
|
|
||||||
|
## Newsletters
|
||||||
|
|
||||||
|
- [News One](https://example.com/n1)
|
||||||
|
|
||||||
|
# Contributing
|
||||||
|
|
||||||
|
Please contribute!
|
||||||
|
""")
|
||||||
|
|
||||||
|
|
||||||
class TestParseReadmeSections:
|
class TestParseReadmeSections:
|
||||||
def test_category_count(self):
|
def test_ungrouped_categories_go_to_other(self):
|
||||||
cats, resources = parse_readme(MINIMAL_README)
|
groups, resources = parse_readme(MINIMAL_README)
|
||||||
assert len(cats) == 2
|
assert len(groups) == 1
|
||||||
|
assert groups[0]["name"] == "Other"
|
||||||
|
assert len(groups[0]["categories"]) == 2
|
||||||
|
|
||||||
def test_resource_count(self):
|
def test_ungrouped_category_names(self):
|
||||||
cats, resources = parse_readme(MINIMAL_README)
|
groups, _ = parse_readme(MINIMAL_README)
|
||||||
assert len(resources) == 2
|
cats = groups[0]["categories"]
|
||||||
|
|
||||||
def test_category_names(self):
|
|
||||||
cats, _ = parse_readme(MINIMAL_README)
|
|
||||||
assert cats[0]["name"] == "Alpha"
|
assert cats[0]["name"] == "Alpha"
|
||||||
assert cats[1]["name"] == "Beta"
|
assert cats[1]["name"] == "Beta"
|
||||||
|
|
||||||
|
def test_resource_count(self):
|
||||||
|
_, resources = parse_readme(MINIMAL_README)
|
||||||
|
assert len(resources) == 2
|
||||||
|
|
||||||
def test_category_slugs(self):
|
def test_category_slugs(self):
|
||||||
cats, _ = parse_readme(MINIMAL_README)
|
groups, _ = parse_readme(MINIMAL_README)
|
||||||
|
cats = groups[0]["categories"]
|
||||||
assert cats[0]["slug"] == "alpha"
|
assert cats[0]["slug"] == "alpha"
|
||||||
assert cats[1]["slug"] == "beta"
|
assert cats[1]["slug"] == "beta"
|
||||||
|
|
||||||
def test_category_description(self):
|
def test_category_description(self):
|
||||||
cats, _ = parse_readme(MINIMAL_README)
|
groups, _ = parse_readme(MINIMAL_README)
|
||||||
|
cats = groups[0]["categories"]
|
||||||
assert cats[0]["description"] == "Libraries for alpha stuff."
|
assert cats[0]["description"] == "Libraries for alpha stuff."
|
||||||
assert cats[1]["description"] == "Tools for beta."
|
assert cats[1]["description"] == "Tools for beta."
|
||||||
|
|
||||||
@ -145,13 +192,16 @@ class TestParseReadmeSections:
|
|||||||
assert resources[1]["name"] == "Podcasts"
|
assert resources[1]["name"] == "Podcasts"
|
||||||
|
|
||||||
def test_contributing_skipped(self):
|
def test_contributing_skipped(self):
|
||||||
cats, resources = parse_readme(MINIMAL_README)
|
groups, resources = parse_readme(MINIMAL_README)
|
||||||
all_names = [c["name"] for c in cats] + [r["name"] for r in resources]
|
all_names = []
|
||||||
|
for g in groups:
|
||||||
|
all_names.extend(c["name"] for c in g["categories"])
|
||||||
|
all_names.extend(r["name"] for r in resources)
|
||||||
assert "Contributing" not in all_names
|
assert "Contributing" not in all_names
|
||||||
|
|
||||||
def test_no_separator(self):
|
def test_no_separator(self):
|
||||||
cats, resources = parse_readme("# Just a heading\n\nSome text.\n")
|
groups, resources = parse_readme("# Just a heading\n\nSome text.\n")
|
||||||
assert cats == []
|
assert groups == []
|
||||||
assert resources == []
|
assert resources == []
|
||||||
|
|
||||||
def test_no_description(self):
|
def test_no_description(self):
|
||||||
@ -174,7 +224,8 @@ class TestParseReadmeSections:
|
|||||||
|
|
||||||
Done.
|
Done.
|
||||||
""")
|
""")
|
||||||
cats, resources = parse_readme(readme)
|
groups, resources = parse_readme(readme)
|
||||||
|
cats = groups[0]["categories"]
|
||||||
assert cats[0]["description"] == ""
|
assert cats[0]["description"] == ""
|
||||||
assert cats[0]["entries"][0]["name"] == "item"
|
assert cats[0]["entries"][0]["name"] == "item"
|
||||||
|
|
||||||
@ -194,10 +245,114 @@ class TestParseReadmeSections:
|
|||||||
|
|
||||||
Done.
|
Done.
|
||||||
""")
|
""")
|
||||||
cats, _ = parse_readme(readme)
|
groups, _ = parse_readme(readme)
|
||||||
|
cats = groups[0]["categories"]
|
||||||
assert cats[0]["description"] == "Algorithms. Also see awesome-algos."
|
assert cats[0]["description"] == "Algorithms. Also see awesome-algos."
|
||||||
|
|
||||||
|
|
||||||
|
class TestParseGroupedReadme:
|
||||||
|
def test_group_count(self):
|
||||||
|
groups, _ = parse_readme(GROUPED_README)
|
||||||
|
assert len(groups) == 2
|
||||||
|
|
||||||
|
def test_group_names(self):
|
||||||
|
groups, _ = parse_readme(GROUPED_README)
|
||||||
|
assert groups[0]["name"] == "Group One"
|
||||||
|
assert groups[1]["name"] == "Group Two"
|
||||||
|
|
||||||
|
def test_group_slugs(self):
|
||||||
|
groups, _ = parse_readme(GROUPED_README)
|
||||||
|
assert groups[0]["slug"] == "group-one"
|
||||||
|
assert groups[1]["slug"] == "group-two"
|
||||||
|
|
||||||
|
def test_group_one_has_one_category(self):
|
||||||
|
groups, _ = parse_readme(GROUPED_README)
|
||||||
|
assert len(groups[0]["categories"]) == 1
|
||||||
|
assert groups[0]["categories"][0]["name"] == "Alpha"
|
||||||
|
|
||||||
|
def test_group_two_has_two_categories(self):
|
||||||
|
groups, _ = parse_readme(GROUPED_README)
|
||||||
|
assert len(groups[1]["categories"]) == 2
|
||||||
|
assert groups[1]["categories"][0]["name"] == "Beta"
|
||||||
|
assert groups[1]["categories"][1]["name"] == "Gamma"
|
||||||
|
|
||||||
|
def test_resources_still_parsed(self):
|
||||||
|
_, resources = parse_readme(GROUPED_README)
|
||||||
|
assert len(resources) == 1
|
||||||
|
assert resources[0]["name"] == "Newsletters"
|
||||||
|
|
||||||
|
def test_empty_group_skipped(self):
|
||||||
|
readme = textwrap.dedent("""\
|
||||||
|
# T
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Empty**
|
||||||
|
|
||||||
|
**HasCats**
|
||||||
|
|
||||||
|
## Cat
|
||||||
|
|
||||||
|
- [x](https://x.com) - X.
|
||||||
|
|
||||||
|
# Contributing
|
||||||
|
|
||||||
|
Done.
|
||||||
|
""")
|
||||||
|
groups, _ = parse_readme(readme)
|
||||||
|
assert len(groups) == 1
|
||||||
|
assert groups[0]["name"] == "HasCats"
|
||||||
|
|
||||||
|
def test_bold_with_extra_text_not_group_marker(self):
|
||||||
|
readme = textwrap.dedent("""\
|
||||||
|
# T
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
**Note:** This is not a group marker.
|
||||||
|
|
||||||
|
## Cat
|
||||||
|
|
||||||
|
- [x](https://x.com) - X.
|
||||||
|
|
||||||
|
# Contributing
|
||||||
|
|
||||||
|
Done.
|
||||||
|
""")
|
||||||
|
groups, _ = parse_readme(readme)
|
||||||
|
# "Note:" has text after the strong node, so it's not a group marker
|
||||||
|
# Category goes into "Other"
|
||||||
|
assert len(groups) == 1
|
||||||
|
assert groups[0]["name"] == "Other"
|
||||||
|
|
||||||
|
def test_categories_before_any_group_marker(self):
|
||||||
|
readme = textwrap.dedent("""\
|
||||||
|
# T
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Orphan
|
||||||
|
|
||||||
|
- [x](https://x.com) - X.
|
||||||
|
|
||||||
|
**A Group**
|
||||||
|
|
||||||
|
## Grouped
|
||||||
|
|
||||||
|
- [y](https://x.com) - Y.
|
||||||
|
|
||||||
|
# Contributing
|
||||||
|
|
||||||
|
Done.
|
||||||
|
""")
|
||||||
|
groups, _ = parse_readme(readme)
|
||||||
|
assert len(groups) == 2
|
||||||
|
assert groups[0]["name"] == "Other"
|
||||||
|
assert groups[0]["categories"][0]["name"] == "Orphan"
|
||||||
|
assert groups[1]["name"] == "A Group"
|
||||||
|
assert groups[1]["categories"][0]["name"] == "Grouped"
|
||||||
|
|
||||||
|
|
||||||
def _content_nodes(md_text: str) -> list[SyntaxTreeNode]:
|
def _content_nodes(md_text: str) -> list[SyntaxTreeNode]:
|
||||||
"""Helper: parse markdown and return all block nodes."""
|
"""Helper: parse markdown and return all block nodes."""
|
||||||
md = MarkdownIt("commonmark")
|
md = MarkdownIt("commonmark")
|
||||||
@ -283,7 +438,8 @@ class TestParseSectionEntries:
|
|||||||
|
|
||||||
Done.
|
Done.
|
||||||
""")
|
""")
|
||||||
cats, _ = parse_readme(readme)
|
groups, _ = parse_readme(readme)
|
||||||
|
cats = groups[0]["categories"]
|
||||||
# 2 main entries + 1 also_see = 3
|
# 2 main entries + 1 also_see = 3
|
||||||
assert cats[0]["entry_count"] == 3
|
assert cats[0]["entry_count"] == 3
|
||||||
|
|
||||||
@ -305,7 +461,8 @@ class TestParseSectionEntries:
|
|||||||
|
|
||||||
Done.
|
Done.
|
||||||
""")
|
""")
|
||||||
cats, _ = parse_readme(readme)
|
groups, _ = parse_readme(readme)
|
||||||
|
cats = groups[0]["categories"]
|
||||||
assert cats[0]["preview"] == "alpha, beta, gamma, delta"
|
assert cats[0]["preview"] == "alpha, beta, gamma, delta"
|
||||||
|
|
||||||
def test_description_html_escapes_xss(self):
|
def test_description_html_escapes_xss(self):
|
||||||
@ -366,10 +523,17 @@ class TestParseRealReadme:
|
|||||||
readme_path = os.path.join(os.path.dirname(__file__), "..", "..", "README.md")
|
readme_path = os.path.join(os.path.dirname(__file__), "..", "..", "README.md")
|
||||||
with open(readme_path, encoding="utf-8") as f:
|
with open(readme_path, encoding="utf-8") as f:
|
||||||
self.readme_text = f.read()
|
self.readme_text = f.read()
|
||||||
self.cats, self.resources = parse_readme(self.readme_text)
|
self.groups, self.resources = parse_readme(self.readme_text)
|
||||||
|
self.cats = [c for g in self.groups for c in g["categories"]]
|
||||||
|
|
||||||
def test_at_least_83_categories(self):
|
def test_at_least_11_groups(self):
|
||||||
assert len(self.cats) >= 83
|
assert len(self.groups) >= 11
|
||||||
|
|
||||||
|
def test_first_group_is_ai_ml(self):
|
||||||
|
assert self.groups[0]["name"] == "AI & ML"
|
||||||
|
|
||||||
|
def test_at_least_76_categories(self):
|
||||||
|
assert len(self.cats) >= 76
|
||||||
|
|
||||||
def test_resources_has_newsletters_and_podcasts(self):
|
def test_resources_has_newsletters_and_podcasts(self):
|
||||||
names = [r["name"] for r in self.resources]
|
names = [r["name"] for r in self.resources]
|
||||||
@ -380,21 +544,17 @@ class TestParseRealReadme:
|
|||||||
all_names = [c["name"] for c in self.cats] + [r["name"] for r in self.resources]
|
all_names = [c["name"] for c in self.cats] + [r["name"] for r in self.resources]
|
||||||
assert "Contributing" not in all_names
|
assert "Contributing" not in all_names
|
||||||
|
|
||||||
def test_first_category_is_admin_panels(self):
|
def test_first_category_is_ai_and_agents(self):
|
||||||
assert self.cats[0]["name"] == "Admin Panels"
|
assert self.cats[0]["name"] == "AI and Agents"
|
||||||
assert self.cats[0]["slug"] == "admin-panels"
|
assert self.cats[0]["slug"] == "ai-and-agents"
|
||||||
|
|
||||||
def test_last_category_is_wsgi_servers(self):
|
def test_web_apis_slug(self):
|
||||||
assert self.cats[-1]["name"] == "WSGI Servers"
|
|
||||||
assert self.cats[-1]["slug"] == "wsgi-servers"
|
|
||||||
|
|
||||||
def test_restful_api_slug(self):
|
|
||||||
slugs = [c["slug"] for c in self.cats]
|
slugs = [c["slug"] for c in self.cats]
|
||||||
assert "restful-api" in slugs
|
assert "web-apis" in slugs
|
||||||
|
|
||||||
def test_descriptions_extracted(self):
|
def test_descriptions_extracted(self):
|
||||||
admin = self.cats[0]
|
ai = next(c for c in self.cats if c["name"] == "AI and Agents")
|
||||||
assert admin["description"] == "Libraries for administrative interfaces."
|
assert "AI applications" in ai["description"]
|
||||||
|
|
||||||
def test_entry_counts_nonzero(self):
|
def test_entry_counts_nonzero(self):
|
||||||
for cat in self.cats:
|
for cat in self.cats:
|
||||||
@ -422,3 +582,8 @@ class TestParseRealReadme:
|
|||||||
algos = next(c for c in self.cats if c["name"] == "Algorithms and Design Patterns")
|
algos = next(c for c in self.cats if c["name"] == "Algorithms and Design Patterns")
|
||||||
assert "awesome-algorithms" in algos["description"]
|
assert "awesome-algorithms" in algos["description"]
|
||||||
assert "https://" not in algos["description"]
|
assert "https://" not in algos["description"]
|
||||||
|
|
||||||
|
def test_miscellaneous_in_own_group(self):
|
||||||
|
misc_group = next((g for g in self.groups if g["name"] == "Miscellaneous"), None)
|
||||||
|
assert misc_group is not None
|
||||||
|
assert any(c["name"] == "Miscellaneous" for c in misc_group["categories"])
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user