From af0bdce9abfc21d0a63af70a647c5046129cc207 Mon Sep 17 00:00:00 2001 From: Nikhil Pareek Date: Tue, 12 May 2026 18:53:56 +0530 Subject: [PATCH] feat(seo): cover every docs page in /llms.txt and /llms-full.txt Both endpoints walked only tabNavigation, so any .mdx page on disk that isn't curated into navigation.ts was silently missing from LLM-facing surfaces. With ~83 pages on disk but not in the nav (mostly cookbooks, recent additions), the gap was non-trivial. After this change both endpoints still emit the navigation-driven sections first to preserve the human-grouped structure, then sweep src/pages/docs/**/*.mdx and emit anything not yet linked under an "Additional Pages" section (llms.txt) or as appended entries (llms-full.txt). Frontmatter `title:` is used when present; the slug is title-cased as a fallback. New cookbook entries, release notes, FAQ updates, or any newly added doc page now show up in /llms.txt and /llms-full.txt on the next deploy without a navigation.ts edit. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/pages/llms-full.txt.ts | 68 +++++++++++++++++++++++++++- src/pages/llms.txt.ts | 90 ++++++++++++++++++++++++++++++++++---- 2 files changed, 147 insertions(+), 11 deletions(-) diff --git a/src/pages/llms-full.txt.ts b/src/pages/llms-full.txt.ts index a8b323ff..23f2d387 100644 --- a/src/pages/llms-full.txt.ts +++ b/src/pages/llms-full.txt.ts @@ -19,7 +19,8 @@ export const GET: APIRoute = async () => { lines.push('> Complete documentation content for Future AGI — an AI lifecycle platform for building, evaluating, observing, and optimizing AI applications.'); lines.push(''); - // Collect all hrefs from navigation + // Primary source: curated navigation, in the order humans grouped them. + const seen = new Set(); const hrefs: { title: string; href: string }[] = []; for (const tab of tabNavigation) { for (const group of tab.groups) { @@ -27,10 +28,10 @@ export const GET: APIRoute = async () => { } } - // For each page, read the MDX and extract content for (const { title, href } of hrefs) { const content = await readPageContent(href); if (!content) continue; + seen.add(href); lines.push(`---`); lines.push(''); @@ -41,11 +42,74 @@ export const GET: APIRoute = async () => { lines.push(''); } + // Sweep src/pages/docs for any .mdx pages not already emitted. Cookbook + // entries, release notes, FAQs, and pages added without a navigation + // update get their full content included automatically. + const onDisk = await walkDocsPages(DOCS_DIR); + const additional = onDisk + .filter(({ href }) => !seen.has(href)) + .sort((a, b) => a.href.localeCompare(b.href)); + + for (const { href, title } of additional) { + const content = await readPageContent(href); + if (!content) continue; + lines.push(`---`); + lines.push(''); + lines.push(`## ${title}`); + lines.push(`URL: ${SITE}${href}`); + lines.push(''); + lines.push(content); + lines.push(''); + } + return new Response(lines.join('\n'), { headers: { 'Content-Type': 'text/plain; charset=utf-8' }, }); }; +async function walkDocsPages( + dir: string, + prefix = '/docs', +): Promise<{ href: string; title: string }[]> { + const out: { href: string; title: string }[] = []; + let entries; + try { + entries = await fs.readdir(dir, { withFileTypes: true }); + } catch { + return out; + } + for (const entry of entries) { + const full = path.join(dir, entry.name); + if (entry.isDirectory()) { + out.push(...(await walkDocsPages(full, `${prefix}/${entry.name}`))); + continue; + } + if (!entry.name.endsWith('.mdx') && !entry.name.endsWith('.md')) continue; + if (entry.name.startsWith('_')) continue; + const stem = entry.name.replace(/\.(mdx|md)$/, ''); + const href = stem === 'index' ? prefix : `${prefix}/${stem}`; + const title = await readTitle(full).catch(() => null); + out.push({ + href, + title: title || titlecase(stem === 'index' ? path.basename(prefix) : stem), + }); + } + return out; +} + +async function readTitle(file: string): Promise { + const raw = await fs.readFile(file, 'utf-8'); + const match = raw.match(/^title:\s*['"]?([^'"\n]+)['"]?\s*$/m); + return match ? match[1].trim() : null; +} + +function titlecase(slug: string): string { + return slug + .split('-') + .map((p) => p.charAt(0).toUpperCase() + p.slice(1)) + .join(' '); +} + function collectHrefs(items: NavItem[], out: { title: string; href: string }[]) { for (const item of items) { if (item.href) { diff --git a/src/pages/llms.txt.ts b/src/pages/llms.txt.ts index 80bedc50..428eddb4 100644 --- a/src/pages/llms.txt.ts +++ b/src/pages/llms.txt.ts @@ -1,23 +1,33 @@ import type { APIRoute } from 'astro'; +import fs from 'node:fs/promises'; +import path from 'node:path'; import { tabNavigation } from '../lib/navigation'; import type { NavItem } from '../lib/navigation'; const SITE = 'https://docs.futureagi.com'; +const DOCS_DIR = path.join(process.cwd(), 'src/pages/docs'); /** * Generate /llms.txt — a concise, LLM-friendly overview of the documentation. * Follows the llms.txt specification: https://llmstxt.org + * + * Primary source is the curated tabNavigation so the human-grouped section + * headers stay intact. After walking the nav, the script scans + * src/pages/docs for any .mdx pages not already linked and lists them in an + * "Additional Pages" section so cookbook entries, release notes, FAQs, and + * any new pages added without a nav update still show up in /llms.txt. */ -export const GET: APIRoute = () => { +export const GET: APIRoute = async () => { const lines: string[] = []; + const seen = new Set(); - // Title & summary lines.push('# Future AGI Documentation'); lines.push(''); - lines.push('> Future AGI is an AI lifecycle platform for building, evaluating, observing, and optimizing AI applications. This documentation covers the Python SDK, platform features, integrations, and API reference.'); + lines.push( + '> Future AGI is an AI lifecycle platform for building, evaluating, observing, and optimizing AI applications. This documentation covers the Python SDK, platform features, integrations, and API reference.', + ); lines.push(''); - // Key sections with links lines.push('## Docs'); lines.push(''); @@ -25,15 +35,33 @@ export const GET: APIRoute = () => { for (const group of tab.groups) { lines.push(`### ${group.group}`); lines.push(''); - collectLinks(group.items, lines); + collectLinks(group.items, lines, seen); lines.push(''); } } - // Optional: pointer to full version + // Walk src/pages/docs and emit anything not already linked under + // "Additional Pages". Cookbook entries, release notes, FAQs, and pages + // added without a nav update surface here automatically. + const onDisk = await walkDocsPages(DOCS_DIR); + const additional = onDisk + .filter(({ href }) => !seen.has(href)) + .sort((a, b) => a.href.localeCompare(b.href)); + + if (additional.length > 0) { + lines.push('### Additional Pages'); + lines.push(''); + for (const { href, title } of additional) { + lines.push(`- [${title}](${SITE}${href})`); + } + lines.push(''); + } + lines.push('## Full Documentation'); lines.push(''); - lines.push(`For the complete documentation with all page content, see [llms-full.txt](${SITE}/llms-full.txt).`); + lines.push( + `For the complete documentation with all page content, see [llms-full.txt](${SITE}/llms-full.txt).`, + ); lines.push(''); return new Response(lines.join('\n'), { @@ -41,13 +69,57 @@ export const GET: APIRoute = () => { }); }; -function collectLinks(items: NavItem[], lines: string[]) { +function collectLinks(items: NavItem[], lines: string[], seen: Set) { for (const item of items) { if (item.href) { lines.push(`- [${item.title}](${SITE}${item.href})`); + seen.add(item.href); } if (item.items) { - collectLinks(item.items, lines); + collectLinks(item.items, lines, seen); } } } + +async function walkDocsPages( + dir: string, + prefix = '/docs', +): Promise<{ href: string; title: string }[]> { + const out: { href: string; title: string }[] = []; + let entries; + try { + entries = await fs.readdir(dir, { withFileTypes: true }); + } catch { + return out; + } + for (const entry of entries) { + const full = path.join(dir, entry.name); + if (entry.isDirectory()) { + out.push(...(await walkDocsPages(full, `${prefix}/${entry.name}`))); + continue; + } + if (!entry.name.endsWith('.mdx') && !entry.name.endsWith('.md')) continue; + if (entry.name.startsWith('_')) continue; + const stem = entry.name.replace(/\.(mdx|md)$/, ''); + const href = stem === 'index' ? prefix : `${prefix}/${stem}`; + const title = await readTitle(full).catch(() => null); + out.push({ + href, + title: title || titlecase(stem === 'index' ? path.basename(prefix) : stem), + }); + } + return out; +} + +async function readTitle(file: string): Promise { + const raw = await fs.readFile(file, 'utf8'); + const match = raw.match(/^title:\s*['"]?([^'"\n]+)['"]?\s*$/m); + return match ? match[1].trim() : null; +} + +function titlecase(slug: string): string { + return slug + .split('-') + .map((p) => p.charAt(0).toUpperCase() + p.slice(1)) + .join(' '); +}