Skip to content

Commit 23d7260

Browse files
authored
fix: add suppot for inline tags (#100)
1 parent 59172d0 commit 23d7260

6 files changed

Lines changed: 228 additions & 23 deletions

File tree

docs/Markdown Support.md

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -98,8 +98,8 @@ Math syntax is always parsed. The default theme loads KaTeX from CDN for renderi
9898

9999
| Feature | Syntax | Status | Notes |
100100
| ---------------- | -------------------- | ------ | -------------------- |
101-
| Inline tags | `#tag` | | Future (collections) |
102-
| Nested tags | `#parent/child` | | Future (collections) |
101+
| Inline tags | `#tag` | | Extracted and rendered as links |
102+
| Nested tags | `#parent/child` | | Hierarchical — parent tags auto-created |
103103
| Frontmatter tags | `tags: [tag1, tag2]` || Already parsed |
104104

105105
### Metadata
@@ -136,7 +136,6 @@ This ensures:
136136

137137
- Block references: `[[page#^block]]`
138138
- Inline fields: `key:: value` (Dataview compatibility)
139-
- Inline tags: `#tag`, `#parent/child`
140139

141140
### Not Planned
142141

plans/ideas.md

Lines changed: 1 addition & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,16 +2,11 @@
22

33
Future ideas and research notes. Not currently planned.
44

5-
## Pre-v1 Candidates
6-
7-
- **ASCII-only slugs** — strip Unicode/accents from URLs. Breaking change (URLs change), best done before v1 locks them. NFD normalization (no deps) handles European languages; `python-slugify` for full transliteration. See research notes below.
8-
- **Configurable URL casing** — option to preserve original filename casing instead of slugifying
9-
105
## Syntax & Content
116

127
- **Block references** (`[[page#^block]]`)
138
- **Inline fields** (`key:: value` — Dataview compatibility)
14-
- **Inline tags** (`#tag`, `#parent/child`)
9+
1510
- **Configurable markdown preset** (commonmark, gfm-like, custom plugins)
1611
- **Build-time mermaid rendering** — render mermaid diagrams to SVG at build time instead of client-side JS (via mermaid CLI `mmdc`)
1712

@@ -43,17 +38,3 @@ Future ideas and research notes. Not currently planned.
4338
- **Extract icon handling** — move icon resolution into a standalone package
4439
- **Merge `./site` and `./docs`** — consolidate demo site and docs site
4540

46-
---
47-
48-
## ASCII-only Slugs Research
49-
50-
**Problem**: Unicode in URLs causes encoding issues, server compatibility problems, and SEO challenges.
51-
52-
**Approaches**:
53-
54-
1. **Strip accents (NFD normalization)** — no dependencies, works for European languages. "Café" → "Cafe". Doesn't handle Cyrillic/CJK.
55-
2. **python-slugify** — comprehensive transliteration including Cyrillic, Greek, CJK. Adds ~500KB dependency.
56-
57-
**Implementation location**: `src/rockgarden/urls.py``generate_slug()` function.
58-
59-
**Recommendation**: NFD strip approach (no deps) with optional python-slugify for advanced cases. Breaking change — URLs would change, but acceptable pre-v1.
Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
"""Inline tag extraction and rendering.
2+
3+
Extracts Obsidian-style inline tags (#tag, #parent/child) from markdown
4+
content. Tags are collected for the page's tag list and rendered as
5+
clickable links. Code blocks are protected from processing.
6+
"""
7+
8+
import re
9+
10+
from rockgarden.urls import get_tag_url, normalize_tag
11+
12+
INLINE_TAG_PATTERN = re.compile(r"(?<!\w)#([a-zA-Z][\w-]*(?:/[\w-]+)*)")
13+
# Protect code blocks AND markdown links from tag extraction
14+
PROTECTED_PATTERN = re.compile(
15+
r"```[\s\S]*?```|~~~[\s\S]*?~~~|`[^`\n]+`|\[[^\]]*\]\([^)]*\)"
16+
)
17+
PLACEHOLDER_PATTERN = re.compile(r"\x00CODE(\d+)\x00")
18+
19+
20+
def extract_inline_tags(
21+
content: str,
22+
clean_urls: bool = True,
23+
base_path: str = "",
24+
ascii_urls: bool = False,
25+
) -> tuple[str, list[str]]:
26+
"""Extract inline tags from content and render them as links.
27+
28+
Returns:
29+
Tuple of (modified_content, list_of_raw_tag_strings).
30+
The modified content has inline tags replaced with markdown links.
31+
"""
32+
code_blocks: list[str] = []
33+
34+
def save_code_block(match: re.Match) -> str:
35+
code_blocks.append(match.group(0))
36+
return f"\x00CODE{len(code_blocks) - 1}\x00"
37+
38+
content = PROTECTED_PATTERN.sub(save_code_block, content)
39+
40+
tags: list[str] = []
41+
42+
def replace_tag(match: re.Match) -> str:
43+
raw_tag = match.group(1)
44+
tags.append(raw_tag)
45+
slug = normalize_tag(raw_tag, ascii_urls)
46+
url = get_tag_url(slug, clean_urls, base_path)
47+
return f"[#{raw_tag}]({url})"
48+
49+
content = INLINE_TAG_PATTERN.sub(replace_tag, content)
50+
51+
def restore_code_block(match: re.Match) -> str:
52+
idx = int(match.group(1))
53+
return code_blocks[idx]
54+
55+
content = PLACEHOLDER_PATTERN.sub(restore_code_block, content)
56+
return content, tags
57+
58+
59+
def expand_hierarchical_tags(tags: list[str]) -> list[str]:
60+
"""Expand nested tags into all ancestor segments.
61+
62+
["project/active", "python"] → ["project/active", "project", "python"]
63+
["a/b/c"] → ["a/b/c", "a/b", "a"]
64+
"""
65+
expanded: set[str] = set()
66+
for tag in tags:
67+
expanded.add(tag)
68+
parts = tag.split("/")
69+
for i in range(1, len(parts)):
70+
expanded.add("/".join(parts[:i]))
71+
return sorted(expanded)

src/rockgarden/output/builder.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@
4848
process_wikilinks,
4949
)
5050
from rockgarden.obsidian.comments import strip_comments
51+
from rockgarden.obsidian.inline_tags import (
52+
extract_inline_tags,
53+
)
5154
from rockgarden.output.build_info import get_build_info
5255
from rockgarden.output.feed import build_atom_feed
5356
from rockgarden.output.llms_txt import build_llms_full_txt, build_llms_txt
@@ -672,6 +675,15 @@ def build_site(
672675
content = transform_md_links(content, clean_urls)
673676
if config.build.inline_icons:
674677
content = process_inline_icons(content)
678+
content, inline_tags = extract_inline_tags(
679+
content, clean_urls, base_path, config.site.ascii_urls
680+
)
681+
if inline_tags:
682+
existing = page.frontmatter.get("tags", [])
683+
if isinstance(existing, str):
684+
existing = [existing]
685+
merged = list(dict.fromkeys(existing + inline_tags))
686+
page.frontmatter["tags"] = merged
675687
page.html = process_callouts(render_markdown(content))
676688

677689
toc_entries = None

src/rockgarden/output/tags.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
from jinja2 import Environment
66

77
from rockgarden.content.models import Page
8+
from rockgarden.obsidian.inline_tags import expand_hierarchical_tags
89
from rockgarden.urls import get_url, normalize_tag
910

1011

@@ -19,6 +20,7 @@ def collect_tags(pages: list[Page], ascii_urls: bool = False) -> dict[str, list[
1920
raw_tags = page.frontmatter.get("tags", [])
2021
if isinstance(raw_tags, str):
2122
raw_tags = [raw_tags]
23+
raw_tags = expand_hierarchical_tags(raw_tags)
2224
for tag in raw_tags:
2325
slug = normalize_tag(tag, ascii_urls)
2426
if slug:

tests/test_inline_tags.py

Lines changed: 140 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,140 @@
1+
"""Tests for inline tag extraction and rendering."""
2+
3+
from rockgarden.config import Config, SiteConfig
4+
from rockgarden.obsidian.inline_tags import (
5+
expand_hierarchical_tags,
6+
extract_inline_tags,
7+
)
8+
from rockgarden.output.builder import build_site
9+
10+
11+
class TestExtractInlineTags:
12+
def test_basic_tag(self):
13+
content, tags = extract_inline_tags("Hello #python world")
14+
assert tags == ["python"]
15+
assert "[#python](" in content
16+
17+
def test_multiple_tags(self):
18+
content, tags = extract_inline_tags("Use #python and #rust")
19+
assert tags == ["python", "rust"]
20+
21+
def test_nested_tag(self):
22+
content, tags = extract_inline_tags("Status: #project/active")
23+
assert tags == ["project/active"]
24+
assert "[#project/active](" in content
25+
26+
def test_not_in_code_span(self):
27+
content, tags = extract_inline_tags("Use `#python` for scripting")
28+
assert tags == []
29+
assert "`#python`" in content
30+
31+
def test_not_in_fenced_block(self):
32+
content, tags = extract_inline_tags("```\n#python\n```")
33+
assert tags == []
34+
35+
def test_not_a_heading(self):
36+
content, tags = extract_inline_tags("# Heading\n\nSome text")
37+
assert tags == []
38+
39+
def test_not_a_number(self):
40+
content, tags = extract_inline_tags("Issue #123 is fixed")
41+
assert tags == []
42+
43+
def test_tag_at_start_of_line(self):
44+
content, tags = extract_inline_tags("#python is great")
45+
assert tags == ["python"]
46+
47+
def test_tag_with_hyphens(self):
48+
content, tags = extract_inline_tags("Use #my-tag here")
49+
assert tags == ["my-tag"]
50+
51+
def test_tag_link_url(self):
52+
content, tags = extract_inline_tags("Hello #python", base_path="/docs")
53+
assert "[#python](/docs/tags/python/)" in content
54+
55+
def test_tag_link_no_clean_urls(self):
56+
content, tags = extract_inline_tags("Hello #python", clean_urls=False)
57+
assert "[#python](/tags/python.html)" in content
58+
59+
def test_no_tags(self):
60+
content, tags = extract_inline_tags("No tags here")
61+
assert tags == []
62+
assert content == "No tags here"
63+
64+
def test_tag_after_punctuation(self):
65+
content, tags = extract_inline_tags("see: #python")
66+
assert tags == ["python"]
67+
68+
def test_tag_not_inside_word(self):
69+
content, tags = extract_inline_tags("foo#bar is not a tag")
70+
assert tags == []
71+
72+
def test_not_in_anchor_link(self):
73+
content, tags = extract_inline_tags("[Jump](#introduction)")
74+
assert tags == []
75+
assert content == "[Jump](#introduction)"
76+
77+
def test_not_in_markdown_link_text(self):
78+
content, tags = extract_inline_tags("[text with #python](http://example.com)")
79+
assert tags == []
80+
81+
def test_tag_outside_link_still_works(self):
82+
content, tags = extract_inline_tags("[link](#anchor) and #python")
83+
assert tags == ["python"]
84+
assert "[link](#anchor)" in content
85+
86+
87+
class TestExpandHierarchicalTags:
88+
def test_flat_tags_unchanged(self):
89+
assert expand_hierarchical_tags(["python", "rust"]) == ["python", "rust"]
90+
91+
def test_nested_expands(self):
92+
result = expand_hierarchical_tags(["project/active"])
93+
assert "project/active" in result
94+
assert "project" in result
95+
96+
def test_deep_nesting(self):
97+
result = expand_hierarchical_tags(["a/b/c"])
98+
assert set(result) == {"a", "a/b", "a/b/c"}
99+
100+
def test_dedup(self):
101+
result = expand_hierarchical_tags(["project/a", "project/b"])
102+
assert result.count("project") == 1
103+
104+
def test_empty(self):
105+
assert expand_hierarchical_tags([]) == []
106+
107+
108+
class TestInlineTagsIntegration:
109+
def test_inline_tags_merged_with_frontmatter(self, tmp_path):
110+
source = tmp_path / "content"
111+
source.mkdir()
112+
(source / "page.md").write_text(
113+
"---\ntags: [existing]\n---\n# Hello\n\nSee #python for details.\n"
114+
)
115+
output = tmp_path / "output"
116+
config = Config(site=SiteConfig(source=source, output=output))
117+
build_site(config, source, output)
118+
# Both tag pages should exist
119+
assert (output / "tags" / "existing" / "index.html").exists()
120+
assert (output / "tags" / "python" / "index.html").exists()
121+
122+
def test_hierarchical_tag_pages_generated(self, tmp_path):
123+
source = tmp_path / "content"
124+
source.mkdir()
125+
(source / "page.md").write_text("---\ntags: [project/active]\n---\n# Hello\n")
126+
output = tmp_path / "output"
127+
config = Config(site=SiteConfig(source=source, output=output))
128+
build_site(config, source, output)
129+
assert (output / "tags" / "project-active" / "index.html").exists()
130+
assert (output / "tags" / "project" / "index.html").exists()
131+
132+
def test_inline_tag_renders_as_link(self, tmp_path):
133+
source = tmp_path / "content"
134+
source.mkdir()
135+
(source / "page.md").write_text("# Hello\n\nSee #python for details.\n")
136+
output = tmp_path / "output"
137+
config = Config(site=SiteConfig(source=source, output=output))
138+
build_site(config, source, output)
139+
html = (output / "page" / "index.html").read_text()
140+
assert "/tags/python/" in html

0 commit comments

Comments
 (0)