From af0bdce9abfc21d0a63af70a647c5046129cc207 Mon Sep 17 00:00:00 2001
From: Nikhil Pareek <nick.coolvyas@gmail.com>
Date: Tue, 12 May 2026 18:53:56 +0530
Subject: [PATCH] feat(seo): cover every docs page in /llms.txt and
 /llms-full.txt

Both endpoints walked only tabNavigation, so any .mdx page on disk
that isn't curated into navigation.ts was silently missing from
LLM-facing surfaces. With ~83 pages on disk but not in the nav
(mostly cookbooks, recent additions), the gap was non-trivial.

After this change both endpoints still emit the navigation-driven
sections first to preserve the human-grouped structure, then sweep
src/pages/docs/**/*.mdx and emit anything not yet linked under an
"Additional Pages" section (llms.txt) or as appended entries
(llms-full.txt). Frontmatter `title:` is used when present; the
slug is title-cased as a fallback.

New cookbook entries, release notes, FAQ updates, or any newly
added doc page now show up in /llms.txt and /llms-full.txt on the
next deploy without a navigation.ts edit.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 src/pages/llms-full.txt.ts | 68 +++++++++++++++++++++++++++-
 src/pages/llms.txt.ts      | 90 ++++++++++++++++++++++++++++++++++----
 2 files changed, 147 insertions(+), 11 deletions(-)
diff --git a/src/pages/llms-full.txt.ts b/src/pages/llms-full.txt.ts
index a8b323ff..23f2d387 100644
--- a/src/pages/llms-full.txt.ts
+++ b/src/pages/llms-full.txt.ts
@@ -19,7 +19,8 @@ export const GET: APIRoute = async () => {
   lines.push('> Complete documentation content for Future AGI — an AI lifecycle platform for building, evaluating, observing, and optimizing AI applications.');
   lines.push('');
 
-  // Collect all hrefs from navigation
+  // Primary source: curated navigation, in the order humans grouped them.
+  const seen = new Set<string>();
   const hrefs: { title: string; href: string }[] = [];
   for (const tab of tabNavigation) {
     for (const group of tab.groups) {
@@ -27,10 +28,10 @@ export const GET: APIRoute = async () => {
     }
   }
 
-  // For each page, read the MDX and extract content
   for (const { title, href } of hrefs) {
     const content = await readPageContent(href);
     if (!content) continue;
+    seen.add(href);
 
     lines.push(`---`);
     lines.push('');
@@ -41,11 +42,74 @@ export const GET: APIRoute = async () => {
     lines.push('');
   }
 
+  // Sweep src/pages/docs for any .mdx pages not already emitted. Cookbook
+  // entries, release notes, FAQs, and pages added without a navigation
+  // update get their full content included automatically.
+  const onDisk = await walkDocsPages(DOCS_DIR);
+  const additional = onDisk
+    .filter(({ href }) => !seen.has(href))
+    .sort((a, b) => a.href.localeCompare(b.href));
+
+  for (const { href, title } of additional) {
+    const content = await readPageContent(href);
+    if (!content) continue;
+    lines.push(`---`);
+    lines.push('');
+    lines.push(`## ${title}`);
+    lines.push(`URL: ${SITE}${href}`);
+    lines.push('');
+    lines.push(content);
+    lines.push('');
+  }
+
   return new Response(lines.join('\n'), {
     headers: { 'Content-Type': 'text/plain; charset=utf-8' },
   });
 };
 
+async function walkDocsPages(
+  dir: string,
+  prefix = '/docs',
+): Promise<{ href: string; title: string }[]> {
+  const out: { href: string; title: string }[] = [];
+  let entries;
+  try {
+    entries = await fs.readdir(dir, { withFileTypes: true });
+  } catch {
+    return out;
+  }
+  for (const entry of entries) {
+    const full = path.join(dir, entry.name);
+    if (entry.isDirectory()) {
+      out.push(...(await walkDocsPages(full, `${prefix}/${entry.name}`)));
+      continue;
+    }
+    if (!entry.name.endsWith('.mdx') && !entry.name.endsWith('.md')) continue;
+    if (entry.name.startsWith('_')) continue;
+    const stem = entry.name.replace(/\.(mdx|md)$/, '');
+    const href = stem === 'index' ? prefix : `${prefix}/${stem}`;
+    const title = await readTitle(full).catch(() => null);
+    out.push({
+      href,
+      title: title || titlecase(stem === 'index' ? path.basename(prefix) : stem),
+    });
+  }
+  return out;
+}
+
+async function readTitle(file: string): Promise<string | null> {
+  const raw = await fs.readFile(file, 'utf-8');
+  const match = raw.match(/^title:\s*['"]?([^'"\n]+)['"]?\s*$/m);
+  return match ? match[1].trim() : null;
+}
+
+function titlecase(slug: string): string {
+  return slug
+    .split('-')
+    .map((p) => p.charAt(0).toUpperCase() + p.slice(1))
+    .join(' ');
+}
+
 function collectHrefs(items: NavItem[], out: { title: string; href: string }[]) {
   for (const item of items) {
     if (item.href) {
diff --git a/src/pages/llms.txt.ts b/src/pages/llms.txt.ts
index 80bedc50..428eddb4 100644
--- a/src/pages/llms.txt.ts
+++ b/src/pages/llms.txt.ts
@@ -1,23 +1,33 @@
 import type { APIRoute } from 'astro';
+import fs from 'node:fs/promises';
+import path from 'node:path';
 import { tabNavigation } from '../lib/navigation';
 import type { NavItem } from '../lib/navigation';
 
 const SITE = 'https://docs.futureagi.com';
+const DOCS_DIR = path.join(process.cwd(), 'src/pages/docs');
 
 /**
  * Generate /llms.txt — a concise, LLM-friendly overview of the documentation.
  * Follows the llms.txt specification: https://llmstxt.org
+ *
+ * Primary source is the curated tabNavigation so the human-grouped section
+ * headers stay intact. After walking the nav, the script scans
+ * src/pages/docs for any .mdx pages not already linked and lists them in an
+ * "Additional Pages" section so cookbook entries, release notes, FAQs, and
+ * any new pages added without a nav update still show up in /llms.txt.
  */
-export const GET: APIRoute = () => {
+export const GET: APIRoute = async () => {
   const lines: string[] = [];
+  const seen = new Set<string>();
 
-  // Title & summary
   lines.push('# Future AGI Documentation');
   lines.push('');
-  lines.push('> Future AGI is an AI lifecycle platform for building, evaluating, observing, and optimizing AI applications. This documentation covers the Python SDK, platform features, integrations, and API reference.');
+  lines.push(
+    '> Future AGI is an AI lifecycle platform for building, evaluating, observing, and optimizing AI applications. This documentation covers the Python SDK, platform features, integrations, and API reference.',
+  );
   lines.push('');
 
-  // Key sections with links
   lines.push('## Docs');
   lines.push('');
 
@@ -25,15 +35,33 @@ export const GET: APIRoute = () => {
     for (const group of tab.groups) {
       lines.push(`### ${group.group}`);
       lines.push('');
-      collectLinks(group.items, lines);
+      collectLinks(group.items, lines, seen);
       lines.push('');
     }
   }
 
-  // Optional: pointer to full version
+  // Walk src/pages/docs and emit anything not already linked under
+  // "Additional Pages". Cookbook entries, release notes, FAQs, and pages
+  // added without a nav update surface here automatically.
+  const onDisk = await walkDocsPages(DOCS_DIR);
+  const additional = onDisk
+    .filter(({ href }) => !seen.has(href))
+    .sort((a, b) => a.href.localeCompare(b.href));
+
+  if (additional.length > 0) {
+    lines.push('### Additional Pages');
+    lines.push('');
+    for (const { href, title } of additional) {
+      lines.push(`- [${title}](${SITE}${href})`);
+    }
+    lines.push('');
+  }
+
   lines.push('## Full Documentation');
   lines.push('');
-  lines.push(`For the complete documentation with all page content, see [llms-full.txt](${SITE}/llms-full.txt).`);
+  lines.push(
+    `For the complete documentation with all page content, see [llms-full.txt](${SITE}/llms-full.txt).`,
+  );
   lines.push('');
 
   return new Response(lines.join('\n'), {
@@ -41,13 +69,57 @@ export const GET: APIRoute = () => {
   });
 };
 
-function collectLinks(items: NavItem[], lines: string[]) {
+function collectLinks(items: NavItem[], lines: string[], seen: Set<string>) {
   for (const item of items) {
     if (item.href) {
       lines.push(`- [${item.title}](${SITE}${item.href})`);
+      seen.add(item.href);
     }
     if (item.items) {
-      collectLinks(item.items, lines);
+      collectLinks(item.items, lines, seen);
     }
   }
 }
+
+async function walkDocsPages(
+  dir: string,
+  prefix = '/docs',
+): Promise<{ href: string; title: string }[]> {
+  const out: { href: string; title: string }[] = [];
+  let entries;
+  try {
+    entries = await fs.readdir(dir, { withFileTypes: true });
+  } catch {
+    return out;
+  }
+  for (const entry of entries) {
+    const full = path.join(dir, entry.name);
+    if (entry.isDirectory()) {
+      out.push(...(await walkDocsPages(full, `${prefix}/${entry.name}`)));
+      continue;
+    }
+    if (!entry.name.endsWith('.mdx') && !entry.name.endsWith('.md')) continue;
+    if (entry.name.startsWith('_')) continue;
+    const stem = entry.name.replace(/\.(mdx|md)$/, '');
+    const href = stem === 'index' ? prefix : `${prefix}/${stem}`;
+    const title = await readTitle(full).catch(() => null);
+    out.push({
+      href,
+      title: title || titlecase(stem === 'index' ? path.basename(prefix) : stem),
+    });
+  }
+  return out;
+}
+
+async function readTitle(file: string): Promise<string | null> {
+  const raw = await fs.readFile(file, 'utf8');
+  const match = raw.match(/^title:\s*['"]?([^'"\n]+)['"]?\s*$/m);
+  return match ? match[1].trim() : null;
+}
+
+function titlecase(slug: string): string {
+  return slug
+    .split('-')
+    .map((p) => p.charAt(0).toUpperCase() + p.slice(1))
+    .join(' ');
+}