Skip to content

Commit 7b30162

Browse files
authored
fix: add optional llms.txt generation (#74)
add llms.txt generation
1 parent ef487e1 commit 7b30162

6 files changed

Lines changed: 337 additions & 0 deletions

File tree

docs/Configuration.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,17 @@ Atom feed generation. Requires `site.base_url` to be set.
115115
| `modified_date_fallback` | `bool` | `false` | Use file modification time when no frontmatter date field matches |
116116
| `timezone` | `str` | `"UTC"` | IANA timezone for date display (e.g. `"US/Eastern"`) |
117117

118+
## `[llms_txt]`
119+
120+
Generates an `llms.txt` file following the [llmstxt.org](https://llmstxt.org/) specification, making site content discoverable by LLMs. Requires `site.base_url` to be set.
121+
122+
| Field | Type | Default | Description |
123+
| ------------- | ------ | ------- | ------------------------------------------------- |
124+
| `enabled` | `bool` | `false` | Enable llms.txt generation |
125+
| `description` | `str` | `""` | Site summary shown as a blockquote below the title |
126+
127+
Pages are grouped by named collection first, then by top-level directory, then root-level pages.
128+
118129
## `[hooks]`
119130

120131
Shell commands executed at build lifecycle stages. Commands run sequentially; any non-zero exit aborts the build.

docs/rockgarden.toml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,11 @@ title = "Rockgarden"
33
clean_urls = true
44
output = "../_site"
55
ignore_patterns = []
6+
base_url = "https://rockgarden.build"
7+
8+
[llms_txt]
9+
enabled = true
10+
description = "Rockgarden is an opinionated static site generator that turns a folder of Markdown files into a fast, searchable, interlinked website. It supports CommonMark, GitHub Flavored Markdown, and Obsidian syntax out of the box."
611

712
[dates]
813
timezone = "US/Eastern"

src/rockgarden/config.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,13 @@ class FeedConfig(BaseModel):
138138
collections: list[str] = Field(default_factory=list)
139139

140140

141+
class LlmsTxtConfig(BaseModel):
142+
"""llms.txt generation configuration."""
143+
144+
enabled: bool = False
145+
description: str = ""
146+
147+
141148
class HooksConfig(BaseModel):
142149
"""Build hook commands executed at lifecycle stages."""
143150

@@ -169,6 +176,7 @@ class Config(BaseModel):
169176
search: SearchConfig = Field(default_factory=SearchConfig)
170177
dates: DatesConfig = Field(default_factory=DatesConfig)
171178
feed: FeedConfig = Field(default_factory=FeedConfig)
179+
llms_txt: LlmsTxtConfig = Field(default_factory=LlmsTxtConfig)
172180
hooks: HooksConfig = Field(default_factory=HooksConfig)
173181
collections: list[CollectionConfig] = Field(default_factory=list)
174182

src/rockgarden/output/builder.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@
4848
)
4949
from rockgarden.output.build_info import get_build_info
5050
from rockgarden.output.feed import build_atom_feed
51+
from rockgarden.output.llms_txt import build_llms_txt
5152
from rockgarden.output.search import build_search_index, strip_html
5253
from rockgarden.output.sitemap import build_sitemap
5354
from rockgarden.output.tags import build_tag_pages, collect_tags
@@ -750,6 +751,21 @@ def build_site(
750751
feed_file.parent.mkdir(parents=True, exist_ok=True)
751752
feed_file.write_text(feed_xml)
752753

754+
# Generate llms.txt if base_url is configured and llms_txt enabled
755+
if config.site.base_url and config.llms_txt.enabled:
756+
llms_txt_content = build_llms_txt(
757+
pages,
758+
rendered_folder_indexes,
759+
collections,
760+
config.site.base_url,
761+
config.site.title,
762+
config.llms_txt.description,
763+
clean_urls,
764+
base_path,
765+
config.nav.links,
766+
)
767+
(output / "llms.txt").write_text(llms_txt_content)
768+
753769
# Generate 404 page
754770
not_found_template = env.get_template("404.html")
755771
not_found_layout = resolve_layout({}, config.theme.default_layout)

src/rockgarden/output/llms_txt.py

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
"""llms.txt generation following the llmstxt.org specification."""
2+
3+
from __future__ import annotations
4+
5+
from rockgarden.config import NavLinkConfig
6+
from rockgarden.content.collection import Collection
7+
from rockgarden.content.models import Page
8+
from rockgarden.nav.folder_index import FolderIndex
9+
from rockgarden.urls import get_url
10+
11+
12+
def _flatten_nav_links(links: list[NavLinkConfig]) -> list[NavLinkConfig]:
13+
"""Flatten nested nav links, keeping only entries with a URL."""
14+
result: list[NavLinkConfig] = []
15+
for link in links:
16+
if link.url:
17+
result.append(link)
18+
result.extend(_flatten_nav_links(link.children))
19+
return result
20+
21+
22+
def build_llms_txt(
23+
pages: list[Page],
24+
folder_indexes: list[FolderIndex],
25+
collections: dict[str, Collection],
26+
base_url: str,
27+
site_title: str,
28+
description: str = "",
29+
clean_urls: bool = True,
30+
base_path: str = "",
31+
nav_links: list[NavLinkConfig] | None = None,
32+
) -> str:
33+
"""Generate an llms.txt string.
34+
35+
Sections are ordered: named collections first, then remaining pages grouped
36+
by top-level directory, then root-level pages, then nav links.
37+
38+
Args:
39+
pages: All content pages.
40+
folder_indexes: All folder index pages.
41+
collections: Named collections mapping.
42+
base_url: Site base URL (e.g., "https://example.com").
43+
site_title: Site title for the H1 heading.
44+
description: Site description for the blockquote.
45+
clean_urls: Whether clean URLs are enabled.
46+
base_path: URL path prefix.
47+
nav_links: Custom navigation links from config.
48+
49+
Returns:
50+
llms.txt content string.
51+
"""
52+
lines: list[str] = [f"# {site_title}"]
53+
54+
if description:
55+
lines.append("")
56+
lines.append(f"> {description}")
57+
58+
collected_slugs: set[str] = set()
59+
for col in collections.values():
60+
entries = [e for e in col.entries if isinstance(e, Page)]
61+
if not entries:
62+
continue
63+
lines.append("")
64+
lines.append(f"## {col.name}")
65+
lines.append("")
66+
for entry in sorted(entries, key=lambda p: p.title.lower()):
67+
url = base_url + get_url(entry.slug, clean_urls, base_path)
68+
lines.append(f"- [{entry.title}]({url})")
69+
collected_slugs.add(entry.slug)
70+
71+
dir_groups: dict[str, list[Page | FolderIndex]] = {}
72+
root_items: list[Page | FolderIndex] = []
73+
74+
all_items: list[Page | FolderIndex] = [
75+
p for p in pages if p.slug not in collected_slugs
76+
]
77+
collected_dirs = {col.config.source for col in collections.values() if col.entries}
78+
all_items.extend(
79+
fi for fi in folder_indexes if fi.slug.rsplit("/", 1)[0] not in collected_dirs
80+
)
81+
82+
for item in all_items:
83+
parts = item.slug.split("/")
84+
if len(parts) > 1:
85+
top_dir = parts[0]
86+
dir_groups.setdefault(top_dir, []).append(item)
87+
else:
88+
root_items.append(item)
89+
90+
for dir_name in sorted(dir_groups):
91+
items = sorted(dir_groups[dir_name], key=lambda p: p.title.lower())
92+
lines.append("")
93+
lines.append(f"## {dir_name}")
94+
lines.append("")
95+
for item in items:
96+
url = base_url + get_url(item.slug, clean_urls, base_path)
97+
lines.append(f"- [{item.title}]({url})")
98+
99+
if root_items:
100+
items = sorted(root_items, key=lambda p: p.title.lower())
101+
lines.append("")
102+
lines.append("## Pages")
103+
lines.append("")
104+
for item in items:
105+
url = base_url + get_url(item.slug, clean_urls, base_path)
106+
lines.append(f"- [{item.title}]({url})")
107+
108+
if nav_links:
109+
flat_links = _flatten_nav_links(nav_links)
110+
if flat_links:
111+
lines.append("")
112+
lines.append("## Links")
113+
lines.append("")
114+
for link in flat_links:
115+
lines.append(f"- [{link.label}]({link.url})")
116+
117+
lines.append("")
118+
return "\n".join(lines)

tests/test_llms_txt.py

Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
"""Tests for llms.txt generation."""
2+
3+
from pathlib import Path
4+
5+
from rockgarden.config import CollectionConfig, NavLinkConfig
6+
from rockgarden.content.collection import Collection
7+
from rockgarden.content.models import Page
8+
from rockgarden.nav.folder_index import FolderIndex
9+
from rockgarden.output.llms_txt import build_llms_txt
10+
11+
12+
class TestBuildLlmsTxt:
13+
def _make_page(self, slug, title=None, source_prefix=""):
14+
source = f"{source_prefix}/{slug}.md" if source_prefix else f"{slug}.md"
15+
return Page(
16+
source_path=Path(source),
17+
slug=slug,
18+
frontmatter={"title": title} if title else {},
19+
)
20+
21+
def test_title_heading(self):
22+
result = build_llms_txt([], [], {}, "https://example.com", "My Site")
23+
assert result.startswith("# My Site\n")
24+
25+
def test_description_blockquote(self):
26+
result = build_llms_txt(
27+
[], [], {}, "https://example.com", "My Site", description="A cool site."
28+
)
29+
assert "> A cool site." in result
30+
31+
def test_no_description(self):
32+
result = build_llms_txt([], [], {}, "https://example.com", "My Site")
33+
assert ">" not in result
34+
35+
def test_root_pages_under_pages_heading(self):
36+
pages = [self._make_page("about", "About")]
37+
result = build_llms_txt(pages, [], {}, "https://example.com", "My Site")
38+
assert "## Pages" in result
39+
assert "- [About](https://example.com/about/)" in result
40+
41+
def test_pages_grouped_by_directory(self):
42+
pages = [
43+
self._make_page("docs/intro", "Introduction"),
44+
self._make_page("docs/setup", "Setup"),
45+
]
46+
result = build_llms_txt(pages, [], {}, "https://example.com", "My Site")
47+
assert "## docs" in result
48+
assert "- [Introduction](https://example.com/docs/intro/)" in result
49+
assert "- [Setup](https://example.com/docs/setup/)" in result
50+
assert "## Pages" not in result
51+
52+
def test_collection_pages_grouped_separately(self):
53+
page = self._make_page("blog/post1", "First Post", source_prefix="blog")
54+
col_config = CollectionConfig(name="Blog", source="blog")
55+
col = Collection(name="Blog", config=col_config, entries=[page])
56+
result = build_llms_txt(
57+
[page], [], {"blog": col}, "https://example.com", "My Site"
58+
)
59+
assert "## Blog" in result
60+
assert "- [First Post](https://example.com/blog/post1/)" in result
61+
# Should not also appear under directory grouping
62+
lines = result.split("\n")
63+
assert lines.count("## Blog") == 1
64+
assert "## blog" not in result
65+
66+
def test_folder_indexes_included(self):
67+
folders = [FolderIndex(slug="docs/index", title="Docs", children=[])]
68+
result = build_llms_txt([], folders, {}, "https://example.com", "My Site")
69+
assert "## docs" in result
70+
assert "- [Docs](https://example.com/docs/)" in result
71+
72+
def test_clean_urls_false(self):
73+
pages = [self._make_page("about", "About")]
74+
result = build_llms_txt(
75+
pages, [], {}, "https://example.com", "My Site", clean_urls=False
76+
)
77+
assert "https://example.com/about.html" in result
78+
79+
def test_base_path(self):
80+
pages = [self._make_page("about", "About")]
81+
result = build_llms_txt(
82+
pages, [], {}, "https://example.com", "My Site", base_path="/docs"
83+
)
84+
assert "https://example.com/docs/about/" in result
85+
86+
def test_alphabetical_ordering(self):
87+
pages = [
88+
self._make_page("zebra", "Zebra"),
89+
self._make_page("apple", "Apple"),
90+
]
91+
result = build_llms_txt(pages, [], {}, "https://example.com", "My Site")
92+
apple_pos = result.index("Apple")
93+
zebra_pos = result.index("Zebra")
94+
assert apple_pos < zebra_pos
95+
96+
def test_collections_before_directories(self):
97+
page_col = self._make_page("blog/post", "Post", source_prefix="blog")
98+
page_dir = self._make_page("docs/guide", "Guide")
99+
col_config = CollectionConfig(name="Blog", source="blog")
100+
col = Collection(name="Blog", config=col_config, entries=[page_col])
101+
result = build_llms_txt(
102+
[page_col, page_dir],
103+
[],
104+
{"blog": col},
105+
"https://example.com",
106+
"My Site",
107+
)
108+
blog_pos = result.index("## Blog")
109+
docs_pos = result.index("## docs")
110+
assert blog_pos < docs_pos
111+
112+
def test_empty_site(self):
113+
result = build_llms_txt([], [], {}, "https://example.com", "My Site")
114+
assert result == "# My Site\n"
115+
116+
def test_nav_links_section(self):
117+
links = [
118+
NavLinkConfig(label="GitHub", url="https://github.com/example"),
119+
NavLinkConfig(label="Tags", url="/tags/"),
120+
]
121+
result = build_llms_txt(
122+
[], [], {}, "https://example.com", "My Site", nav_links=links
123+
)
124+
assert "## Links" in result
125+
assert "- [GitHub](https://github.com/example)" in result
126+
assert "- [Tags](/tags/)" in result
127+
128+
def test_nav_links_after_pages(self):
129+
links = [NavLinkConfig(label="GitHub", url="https://github.com/example")]
130+
pages = [self._make_page("about", "About")]
131+
result = build_llms_txt(
132+
pages, [], {}, "https://example.com", "My Site", nav_links=links
133+
)
134+
pages_pos = result.index("## Pages")
135+
links_pos = result.index("## Links")
136+
assert pages_pos < links_pos
137+
138+
def test_nav_links_flattens_nested(self):
139+
links = [
140+
NavLinkConfig(
141+
label="Parent",
142+
url="",
143+
children=[
144+
NavLinkConfig(label="Child", url="https://example.com/child"),
145+
],
146+
),
147+
]
148+
result = build_llms_txt(
149+
[], [], {}, "https://example.com", "My Site", nav_links=links
150+
)
151+
assert "- [Child](https://example.com/child)" in result
152+
assert "Parent" not in result
153+
154+
def test_no_nav_links_no_section(self):
155+
result = build_llms_txt(
156+
[], [], {}, "https://example.com", "My Site", nav_links=[]
157+
)
158+
assert "## Links" not in result
159+
160+
def test_collection_folder_index_not_duplicated(self):
161+
page = self._make_page("blog/post1", "First Post", source_prefix="blog")
162+
folder = FolderIndex(slug="blog/index", title="Blog", children=[])
163+
col_config = CollectionConfig(name="Blog", source="blog")
164+
col = Collection(name="Blog", config=col_config, entries=[page])
165+
result = build_llms_txt(
166+
[page], [folder], {"blog": col}, "https://example.com", "My Site"
167+
)
168+
assert "## Blog" in result
169+
assert "## blog" not in result
170+
171+
def test_trailing_newline(self):
172+
result = build_llms_txt(
173+
[self._make_page("about", "About")],
174+
[],
175+
{},
176+
"https://example.com",
177+
"My Site",
178+
)
179+
assert result.endswith("\n")

0 commit comments

Comments
 (0)