fix: add suppot for inline tags (#100)

tylerdave · web-flow · commit 23d7260110fb · 2026-04-05T21:16:50.000-04:00
diff --git a/docs/Markdown Support.md b/docs/Markdown Support.md
@@ -98,8 +98,8 @@ Math syntax is always parsed. The default theme loads KaTeX from CDN for renderi
 
 | Feature          | Syntax               | Status | Notes                |
 | ---------------- | -------------------- | ------ | -------------------- |
-| Inline tags      | `#tag`               | ❌     | Future (collections) |
-| Nested tags      | `#parent/child`      | ❌     | Future (collections) |
+| Inline tags      | `#tag`               | ✅     | Extracted and rendered as links |
+| Nested tags      | `#parent/child`      | ✅     | Hierarchical — parent tags auto-created |
 | Frontmatter tags | `tags: [tag1, tag2]` | ✅     | Already parsed       |
 
 ### Metadata
@@ -136,7 +136,6 @@ This ensures:
 
 - Block references: `[[page#^block]]`
 - Inline fields: `key:: value` (Dataview compatibility)
-- Inline tags: `#tag`, `#parent/child`
 
 ### Not Planned
 
diff --git a/plans/ideas.md b/plans/ideas.md
@@ -2,16 +2,11 @@
 
 Future ideas and research notes. Not currently planned.
 
-## Pre-v1 Candidates
-
-- **ASCII-only slugs** — strip Unicode/accents from URLs. Breaking change (URLs change), best done before v1 locks them. NFD normalization (no deps) handles European languages; `python-slugify` for full transliteration. See research notes below.
-- **Configurable URL casing** — option to preserve original filename casing instead of slugifying
-
 ## Syntax & Content
 
 - **Block references** (`[[page#^block]]`)
 - **Inline fields** (`key:: value` — Dataview compatibility)
-- **Inline tags** (`#tag`, `#parent/child`)
+
 - **Configurable markdown preset** (commonmark, gfm-like, custom plugins)
 - **Build-time mermaid rendering** — render mermaid diagrams to SVG at build time instead of client-side JS (via mermaid CLI `mmdc`)
 
@@ -43,17 +38,3 @@ Future ideas and research notes. Not currently planned.
 - **Extract icon handling** — move icon resolution into a standalone package
 - **Merge `./site` and `./docs`** — consolidate demo site and docs site
 
----
-
-## ASCII-only Slugs Research
-
-**Problem**: Unicode in URLs causes encoding issues, server compatibility problems, and SEO challenges.
-
-**Approaches**:
-
-1. **Strip accents (NFD normalization)** — no dependencies, works for European languages. "Café" → "Cafe". Doesn't handle Cyrillic/CJK.
-2. **python-slugify** — comprehensive transliteration including Cyrillic, Greek, CJK. Adds ~500KB dependency.
-
-**Implementation location**: `src/rockgarden/urls.py` — `generate_slug()` function.
-
-**Recommendation**: NFD strip approach (no deps) with optional python-slugify for advanced cases. Breaking change — URLs would change, but acceptable pre-v1.
diff --git a/src/rockgarden/obsidian/inline_tags.py b/src/rockgarden/obsidian/inline_tags.py
@@ -0,0 +1,71 @@
+"""Inline tag extraction and rendering.
+
+Extracts Obsidian-style inline tags (#tag, #parent/child) from markdown
+content. Tags are collected for the page's tag list and rendered as
+clickable links. Code blocks are protected from processing.
+"""
+
+import re
+
+from rockgarden.urls import get_tag_url, normalize_tag
+
+INLINE_TAG_PATTERN = re.compile(r"(?<!\w)#([a-zA-Z][\w-]*(?:/[\w-]+)*)")
+# Protect code blocks AND markdown links from tag extraction
+PROTECTED_PATTERN = re.compile(
+    r"```[\s\S]*?```|~~~[\s\S]*?~~~|`[^`\n]+`|\[[^\]]*\]\([^)]*\)"
+)
+PLACEHOLDER_PATTERN = re.compile(r"\x00CODE(\d+)\x00")
+
+
+def extract_inline_tags(
+    content: str,
+    clean_urls: bool = True,
+    base_path: str = "",
+    ascii_urls: bool = False,
+) -> tuple[str, list[str]]:
+    """Extract inline tags from content and render them as links.
+
+    Returns:
+        Tuple of (modified_content, list_of_raw_tag_strings).
+        The modified content has inline tags replaced with markdown links.
+    """
+    code_blocks: list[str] = []
+
+    def save_code_block(match: re.Match) -> str:
+        code_blocks.append(match.group(0))
+        return f"\x00CODE{len(code_blocks) - 1}\x00"
+
+    content = PROTECTED_PATTERN.sub(save_code_block, content)
+
+    tags: list[str] = []
+
+    def replace_tag(match: re.Match) -> str:
+        raw_tag = match.group(1)
+        tags.append(raw_tag)
+        slug = normalize_tag(raw_tag, ascii_urls)
+        url = get_tag_url(slug, clean_urls, base_path)
+        return f"[#{raw_tag}]({url})"
+
+    content = INLINE_TAG_PATTERN.sub(replace_tag, content)
+
+    def restore_code_block(match: re.Match) -> str:
+        idx = int(match.group(1))
+        return code_blocks[idx]
+
+    content = PLACEHOLDER_PATTERN.sub(restore_code_block, content)
+    return content, tags
+
+
+def expand_hierarchical_tags(tags: list[str]) -> list[str]:
+    """Expand nested tags into all ancestor segments.
+
+    ["project/active", "python"] → ["project/active", "project", "python"]
+    ["a/b/c"] → ["a/b/c", "a/b", "a"]
+    """
+    expanded: set[str] = set()
+    for tag in tags:
+        expanded.add(tag)
+        parts = tag.split("/")
+        for i in range(1, len(parts)):
+            expanded.add("/".join(parts[:i]))
+    return sorted(expanded)
diff --git a/src/rockgarden/output/builder.py b/src/rockgarden/output/builder.py
@@ -48,6 +48,9 @@
     process_wikilinks,
 )
 from rockgarden.obsidian.comments import strip_comments
+from rockgarden.obsidian.inline_tags import (
+    extract_inline_tags,
+)
 from rockgarden.output.build_info import get_build_info
 from rockgarden.output.feed import build_atom_feed
 from rockgarden.output.llms_txt import build_llms_full_txt, build_llms_txt
@@ -672,6 +675,15 @@ def build_site(
         content = transform_md_links(content, clean_urls)
         if config.build.inline_icons:
             content = process_inline_icons(content)
+        content, inline_tags = extract_inline_tags(
+            content, clean_urls, base_path, config.site.ascii_urls
+        )
+        if inline_tags:
+            existing = page.frontmatter.get("tags", [])
+            if isinstance(existing, str):
+                existing = [existing]
+            merged = list(dict.fromkeys(existing + inline_tags))
+            page.frontmatter["tags"] = merged
         page.html = process_callouts(render_markdown(content))
 
         toc_entries = None
diff --git a/src/rockgarden/output/tags.py b/src/rockgarden/output/tags.py
@@ -5,6 +5,7 @@
 from jinja2 import Environment
 
 from rockgarden.content.models import Page
+from rockgarden.obsidian.inline_tags import expand_hierarchical_tags
 from rockgarden.urls import get_url, normalize_tag
 
 
@@ -19,6 +20,7 @@ def collect_tags(pages: list[Page], ascii_urls: bool = False) -> dict[str, list[
         raw_tags = page.frontmatter.get("tags", [])
         if isinstance(raw_tags, str):
             raw_tags = [raw_tags]
+        raw_tags = expand_hierarchical_tags(raw_tags)
         for tag in raw_tags:
             slug = normalize_tag(tag, ascii_urls)
             if slug:
diff --git a/tests/test_inline_tags.py b/tests/test_inline_tags.py
@@ -0,0 +1,140 @@
+"""Tests for inline tag extraction and rendering."""
+
+from rockgarden.config import Config, SiteConfig
+from rockgarden.obsidian.inline_tags import (
+    expand_hierarchical_tags,
+    extract_inline_tags,
+)
+from rockgarden.output.builder import build_site
+
+
+class TestExtractInlineTags:
+    def test_basic_tag(self):
+        content, tags = extract_inline_tags("Hello #python world")
+        assert tags == ["python"]
+        assert "[#python](" in content
+
+    def test_multiple_tags(self):
+        content, tags = extract_inline_tags("Use #python and #rust")
+        assert tags == ["python", "rust"]
+
+    def test_nested_tag(self):
+        content, tags = extract_inline_tags("Status: #project/active")
+        assert tags == ["project/active"]
+        assert "[#project/active](" in content
+
+    def test_not_in_code_span(self):
+        content, tags = extract_inline_tags("Use `#python` for scripting")
+        assert tags == []
+        assert "`#python`" in content
+
+    def test_not_in_fenced_block(self):
+        content, tags = extract_inline_tags("```\n#python\n```")
+        assert tags == []
+
+    def test_not_a_heading(self):
+        content, tags = extract_inline_tags("# Heading\n\nSome text")
+        assert tags == []
+
+    def test_not_a_number(self):
+        content, tags = extract_inline_tags("Issue #123 is fixed")
+        assert tags == []
+
+    def test_tag_at_start_of_line(self):
+        content, tags = extract_inline_tags("#python is great")
+        assert tags == ["python"]
+
+    def test_tag_with_hyphens(self):
+        content, tags = extract_inline_tags("Use #my-tag here")
+        assert tags == ["my-tag"]
+
+    def test_tag_link_url(self):
+        content, tags = extract_inline_tags("Hello #python", base_path="/docs")
+        assert "[#python](/docs/tags/python/)" in content
+
+    def test_tag_link_no_clean_urls(self):
+        content, tags = extract_inline_tags("Hello #python", clean_urls=False)
+        assert "[#python](/tags/python.html)" in content
+
+    def test_no_tags(self):
+        content, tags = extract_inline_tags("No tags here")
+        assert tags == []
+        assert content == "No tags here"
+
+    def test_tag_after_punctuation(self):
+        content, tags = extract_inline_tags("see: #python")
+        assert tags == ["python"]
+
+    def test_tag_not_inside_word(self):
+        content, tags = extract_inline_tags("foo#bar is not a tag")
+        assert tags == []
+
+    def test_not_in_anchor_link(self):
+        content, tags = extract_inline_tags("[Jump](#introduction)")
+        assert tags == []
+        assert content == "[Jump](#introduction)"
+
+    def test_not_in_markdown_link_text(self):
+        content, tags = extract_inline_tags("[text with #python](http://example.com)")
+        assert tags == []
+
+    def test_tag_outside_link_still_works(self):
+        content, tags = extract_inline_tags("[link](#anchor) and #python")
+        assert tags == ["python"]
+        assert "[link](#anchor)" in content
+
+
+class TestExpandHierarchicalTags:
+    def test_flat_tags_unchanged(self):
+        assert expand_hierarchical_tags(["python", "rust"]) == ["python", "rust"]
+
+    def test_nested_expands(self):
+        result = expand_hierarchical_tags(["project/active"])
+        assert "project/active" in result
+        assert "project" in result
+
+    def test_deep_nesting(self):
+        result = expand_hierarchical_tags(["a/b/c"])
+        assert set(result) == {"a", "a/b", "a/b/c"}
+
+    def test_dedup(self):
+        result = expand_hierarchical_tags(["project/a", "project/b"])
+        assert result.count("project") == 1
+
+    def test_empty(self):
+        assert expand_hierarchical_tags([]) == []
+
+
+class TestInlineTagsIntegration:
+    def test_inline_tags_merged_with_frontmatter(self, tmp_path):
+        source = tmp_path / "content"
+        source.mkdir()
+        (source / "page.md").write_text(
+            "---\ntags: [existing]\n---\n# Hello\n\nSee #python for details.\n"
+        )
+        output = tmp_path / "output"
+        config = Config(site=SiteConfig(source=source, output=output))
+        build_site(config, source, output)
+        # Both tag pages should exist
+        assert (output / "tags" / "existing" / "index.html").exists()
+        assert (output / "tags" / "python" / "index.html").exists()
+
+    def test_hierarchical_tag_pages_generated(self, tmp_path):
+        source = tmp_path / "content"
+        source.mkdir()
+        (source / "page.md").write_text("---\ntags: [project/active]\n---\n# Hello\n")
+        output = tmp_path / "output"
+        config = Config(site=SiteConfig(source=source, output=output))
+        build_site(config, source, output)
+        assert (output / "tags" / "project-active" / "index.html").exists()
+        assert (output / "tags" / "project" / "index.html").exists()
+
+    def test_inline_tag_renders_as_link(self, tmp_path):
+        source = tmp_path / "content"
+        source.mkdir()
+        (source / "page.md").write_text("# Hello\n\nSee #python for details.\n")
+        output = tmp_path / "output"
+        config = Config(site=SiteConfig(source=source, output=output))
+        build_site(config, source, output)
+        html = (output / "page" / "index.html").read_text()
+        assert "/tags/python/" in html