Add llms.txt export for public docs
--- Build: pass | Tests: pass — 2382 passed (708 files)
This commit is contained in:
parent
777a9a5235
commit
b67d04a0d4
9 changed files with 6542 additions and 4 deletions
5820
bootstrap/cms/clawdie-docs/public/llms-full.txt
Normal file
5820
bootstrap/cms/clawdie-docs/public/llms-full.txt
Normal file
File diff suppressed because it is too large
Load diff
64
bootstrap/cms/clawdie-docs/public/llms.txt
Normal file
64
bootstrap/cms/clawdie-docs/public/llms.txt
Normal file
|
|
@ -0,0 +1,64 @@
|
|||
# Clawdie Documentation
|
||||
|
||||
> Clawdie is a FreeBSD-native AI operator and multi-agent controlplane. This file maps the public documentation for installation, architecture, operations, reference material, and roadmap planning.
|
||||
|
||||
Use this file as the curated map of the public docs. If you need a one-shot context file containing the same English public docs concatenated together, use `/llms-full.txt` on the same site.
|
||||
|
||||
## Overview
|
||||
|
||||
- [Overview](https://docs.clawdie.si/): Operator documentation for Clawdie on FreeBSD.
|
||||
|
||||
## Install
|
||||
|
||||
- [Installation](https://docs.clawdie.si/install/): Operator install paths for Clawdie on FreeBSD.
|
||||
- [Control Plane Install](https://docs.clawdie.si/install/controlplane-install/): Install the Clawdie control plane with the standard orchestrator.
|
||||
- [First Boot](https://docs.clawdie.si/install/first-boot/): Edit one file, flash a USB, boot — get to a working assistant with minimal interaction.
|
||||
- [Fresh Install Checklist](https://docs.clawdie.si/install/fresh-install-checklist/): Verification checklist for new Clawdie-AI installations (bare metal, bhyve VM, or jail-based). Run after firstboot completes. Each check includes the exact command and expected result.
|
||||
- [Install Orchestrator](https://docs.clawdie.si/install/install/): Single-command install flow for Clawdie.
|
||||
- [ISO Install](https://docs.clawdie.si/install/iso/): Bootable USB images for bare metal and offline installs.
|
||||
- [Requirements](https://docs.clawdie.si/install/requirements/): Host, runtime, and toolchain requirements for Clawdie.
|
||||
|
||||
## Architecture
|
||||
|
||||
- [Architecture](https://docs.clawdie.si/architecture/): Core runtime layout, isolation model, and deployment shapes.
|
||||
- [Admin Panel](https://docs.clawdie.si/architecture/admin-panel/): bsddialog-based system management UI for Clawdie on FreeBSD.
|
||||
- [Bastille on FreeBSD 15](https://docs.clawdie.si/architecture/bastille/): Clawdie uses Bastille as the host-side jail manager for its Warden runtime on FreeBSD.
|
||||
- [Control Plane](https://docs.clawdie.si/architecture/controlplane/): Starting with v0.10.0, Clawdie has a built-in multi-agent control plane. The agent named after your install (e.g. "Clawdie" or "Atlas") becomes the **orchestrator of her own computer** — with a Sysadmin, DBA, and Git Admin working under her.
|
||||
- [Deployment Models](https://docs.clawdie.si/architecture/deployment-models/): Clawdie-AI supports two first-class deployment targets. Both run the same core runtime — Bastille jails, PostgreSQL (skills, memory, ops databases), nginx, Telegram channel, rc.d service. The difference is how the host gets set up.
|
||||
- [FreeBSD Jail Implementation Guide](https://docs.clawdie.si/architecture/freebsd-jail-implementation/): This document describes the current Clawdie jail layout on FreeBSD.
|
||||
- [Host Operator Model](https://docs.clawdie.si/architecture/host-operator-model/): Current `main` uses the FreeBSD host as the operator surface.
|
||||
- [Jail Networking Strategy](https://docs.clawdie.si/architecture/jail-networking/): This document defines the current networking model for Clawdie on FreeBSD.
|
||||
- [Warden Runtime](https://docs.clawdie.si/architecture/warden/): `Warden` is the project name for Clawdie's isolated execution and service layer on FreeBSD.
|
||||
|
||||
## Operate
|
||||
|
||||
- [Operations](https://docs.clawdie.si/operate/): Runbooks and operational references for Clawdie.
|
||||
- [Data Service Disaster Recovery](https://docs.clawdie.si/operate/db-disaster-recovery/): This document covers the full recovery lifecycle for the Data Service (PostgreSQL on the host by default, or the db jail when `DB_RUNTIME=jail`): detection, triage, rollback options, and surgical repair.
|
||||
- [Docs and tenant-site publishing](https://docs.clawdie.si/operate/docs-publishing/): Current operator workflow for Astro/Starlight docs deploys and manual tenant-site publishing from the cms jail.
|
||||
- [Git Storage](https://docs.clawdie.si/operate/git-storage/): This document defines the local code-hosting target for Clawdie.
|
||||
- [Monitoring Model](https://docs.clawdie.si/operate/monitoring/): Clawdie monitoring is split into distinct layers so "process is running" is not confused with "system is healthy".
|
||||
- [Operator Commands](https://docs.clawdie.si/operate/operator-commands/): Reference for the Telegram slash commands operators use to inspect and control the running agent.
|
||||
- [Provider Fallback](https://docs.clawdie.si/operate/provider-fallback/): Automatic LLM provider switching when the primary provider hits a usage cap.
|
||||
- [Clawdie Security Model](https://docs.clawdie.si/operate/security/): Clawdie is designed around a simple idea:
|
||||
- [Structured Reports](https://docs.clawdie.si/operate/structured-reports/): The Observed / Interpretation / Operator Notes pattern, the report family, and the free-text routing layer.
|
||||
|
||||
## Reference
|
||||
|
||||
- [Reference](https://docs.clawdie.si/reference/): Deep dives and reference documentation.
|
||||
- [Multi-Agent Deployment (Optional)](https://docs.clawdie.si/reference/multi-agent/): Clawdie supports running multiple independent agents on the same FreeBSD host, but the default path is **one agent per host or per bhyve VM**. Use this guide only when you intentionally want a second agent later.
|
||||
- [Claude Agent SDK Deep Dive](https://docs.clawdie.si/reference/sdk-deep-dive/): Findings from reverse-engineering `@anthropic-ai/claude-agent-sdk` v0.2.29–0.2.34 to understand how `query()` works, why agent teams subagents were being killed, and how to fix it. Supplemented with official SDK reference docs.
|
||||
|
||||
## Optional
|
||||
|
||||
### Localization
|
||||
|
||||
- [Documentation Localization](https://docs.clawdie.si/localization/): Current Crowdin to Astro workflow for localized docs in Clawdie.
|
||||
- [How the Pipeline Works](https://docs.clawdie.si/localization/how-it-works/): Current step-by-step flow from docs source to Crowdin to Astro deploy.
|
||||
- [Setup Instructions](https://docs.clawdie.si/localization/setup/): Set up Crowdin access and use the current cms-jail Astro workflow for localized docs.
|
||||
- [Troubleshooting](https://docs.clawdie.si/localization/troubleshooting/): Common localization pipeline problems in the current cms-jail Astro workflow.
|
||||
|
||||
### Roadmap
|
||||
|
||||
- [Roadmap](https://docs.clawdie.si/roadmap/): Channel and platform roadmap documents.
|
||||
- [Channels — Implementation Plan](https://docs.clawdie.si/roadmap/channels-plan/): **Status:** Plan only — no implementation started **Last updated:** 7.apr.2026 **Strategy:** see channels-roadmap.md
|
||||
- [Channels Roadmap](https://docs.clawdie.si/roadmap/channels-roadmap/): **Status:** Planning — no implementation started **Last updated:** 16.mar.2026
|
||||
|
|
@ -1,4 +1,6 @@
|
|||
import { spawnSync } from 'node:child_process';
|
||||
import path from 'node:path';
|
||||
import { fileURLToPath } from 'node:url';
|
||||
|
||||
function runNodeScript(args) {
|
||||
const result = spawnSync(process.execPath, args, {
|
||||
|
|
@ -10,5 +12,29 @@ function runNodeScript(args) {
|
|||
}
|
||||
}
|
||||
|
||||
function runCommand(command, args) {
|
||||
const result = spawnSync(command, args, {
|
||||
stdio: 'inherit',
|
||||
env: process.env,
|
||||
});
|
||||
if (result.status !== 0) {
|
||||
process.exit(result.status ?? 1);
|
||||
}
|
||||
}
|
||||
|
||||
const __dirname = path.dirname(fileURLToPath(import.meta.url));
|
||||
const siteRoot = path.resolve(__dirname, '..');
|
||||
const repoRoot = path.resolve(siteRoot, '../../..');
|
||||
|
||||
runNodeScript(['scripts/sync-host-config.mjs']);
|
||||
runNodeScript(['scripts/sync-public-docs.mjs']);
|
||||
runCommand('npx', [
|
||||
'tsx',
|
||||
path.join(repoRoot, 'scripts', 'generate-llms-files.ts'),
|
||||
'--source-dir',
|
||||
path.join(repoRoot, 'docs', 'public'),
|
||||
'--output-dir',
|
||||
path.join(siteRoot, 'public'),
|
||||
'--site-base-url',
|
||||
'https://docs.clawdie.si',
|
||||
]);
|
||||
|
|
|
|||
|
|
@ -56,11 +56,23 @@ That deploy flow runs:
|
|||
|
||||
1. `scripts/sync-host-config.mjs`
|
||||
2. `scripts/sync-public-docs.mjs`
|
||||
3. `astro build`
|
||||
4. deploy into the cms jail webroot
|
||||
3. `scripts/generate-llms-files.ts` for `/llms.txt` and `/llms-full.txt`
|
||||
4. `astro build`
|
||||
5. deploy into the cms jail webroot
|
||||
|
||||
`npm run deploy` protects `/usr/local/www/<tenant>/sites/` by default, so a docs deploy does not wipe tenant-site output.
|
||||
|
||||
### Machine-readable docs output
|
||||
|
||||
The docs build also emits two English-only root artifacts for external LLM
|
||||
consumers:
|
||||
|
||||
- `/llms.txt` — a curated map of the public docs
|
||||
- `/llms-full.txt` — one concatenated context file built from the same public
|
||||
docs source
|
||||
|
||||
These are generated from `docs/public/` during the normal docs prebuild path.
|
||||
|
||||
## Strapi editorial flow
|
||||
|
||||
When docs-related content changes in Strapi:
|
||||
|
|
|
|||
|
|
@ -56,11 +56,23 @@ That deploy flow runs:
|
|||
|
||||
1. `scripts/sync-host-config.mjs`
|
||||
2. `scripts/sync-public-docs.mjs`
|
||||
3. `astro build`
|
||||
4. deploy into the cms jail webroot
|
||||
3. `scripts/generate-llms-files.ts` for `/llms.txt` and `/llms-full.txt`
|
||||
4. `astro build`
|
||||
5. deploy into the cms jail webroot
|
||||
|
||||
`npm run deploy` protects `/usr/local/www/<tenant>/sites/` by default, so a docs deploy does not wipe tenant-site output.
|
||||
|
||||
### Machine-readable docs output
|
||||
|
||||
The docs build also emits two English-only root artifacts for external LLM
|
||||
consumers:
|
||||
|
||||
- `/llms.txt` — a curated map of the public docs
|
||||
- `/llms-full.txt` — one concatenated context file built from the same public
|
||||
docs source
|
||||
|
||||
These are generated from `docs/public/` during the normal docs prebuild path.
|
||||
|
||||
## Strapi editorial flow
|
||||
|
||||
When docs-related content changes in Strapi:
|
||||
|
|
|
|||
5
justfile
5
justfile
|
|
@ -49,6 +49,11 @@ build-skills-artifact *args="":
|
|||
refresh-skills-artifact *args="":
|
||||
node scripts/memory/refresh-skills-artifact.mjs {{ args }}
|
||||
|
||||
# Generate llms.txt and llms-full.txt from English docs/public/ into the Astro public dir
|
||||
[group("build")]
|
||||
build-llms:
|
||||
npx tsx scripts/generate-llms-files.ts
|
||||
|
||||
# Pre-commit gate (typecheck + format check)
|
||||
[group("build")]
|
||||
pre-commit: typecheck format-check
|
||||
|
|
|
|||
75
scripts/generate-llms-files.ts
Normal file
75
scripts/generate-llms-files.ts
Normal file
|
|
@ -0,0 +1,75 @@
|
|||
import path from 'path';
|
||||
|
||||
import { writeLlmsArtifacts } from '../src/docs-llms.js';
|
||||
|
||||
interface Args {
|
||||
sourceDir: string;
|
||||
outputDir: string;
|
||||
siteBaseUrl: string;
|
||||
docignorePath: string;
|
||||
}
|
||||
|
||||
function parseArgs(argv: string[]): Args {
|
||||
let sourceDir = path.join(process.cwd(), 'docs', 'public');
|
||||
let outputDir = path.join(
|
||||
process.cwd(),
|
||||
'bootstrap',
|
||||
'cms',
|
||||
'clawdie-docs',
|
||||
'public',
|
||||
);
|
||||
let siteBaseUrl = 'https://docs.clawdie.si';
|
||||
let docignorePath = path.join(sourceDir, '.docignore');
|
||||
|
||||
for (let i = 0; i < argv.length; i += 1) {
|
||||
const arg = argv[i];
|
||||
if (arg === '--source-dir') {
|
||||
sourceDir = path.resolve(argv[i + 1] || sourceDir);
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if (arg === '--output-dir') {
|
||||
outputDir = path.resolve(argv[i + 1] || outputDir);
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if (arg === '--site-base-url') {
|
||||
siteBaseUrl = (argv[i + 1] || siteBaseUrl).trim();
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if (arg === '--docignore') {
|
||||
docignorePath = path.resolve(argv[i + 1] || docignorePath);
|
||||
i += 1;
|
||||
continue;
|
||||
}
|
||||
if (arg === '--help') {
|
||||
console.log(
|
||||
[
|
||||
'Usage: npx tsx scripts/generate-llms-files.ts [options]',
|
||||
'',
|
||||
'Options:',
|
||||
' --source-dir DIR Source markdown root (default: docs/public)',
|
||||
' --output-dir DIR Output directory for llms artifacts',
|
||||
' --site-base-url URL Site base URL for generated links',
|
||||
' --docignore FILE Optional docignore filter file',
|
||||
].join('\n'),
|
||||
);
|
||||
process.exit(0);
|
||||
}
|
||||
throw new Error(`Unknown argument: ${arg}`);
|
||||
}
|
||||
|
||||
return {
|
||||
sourceDir,
|
||||
outputDir,
|
||||
siteBaseUrl,
|
||||
docignorePath,
|
||||
};
|
||||
}
|
||||
|
||||
const args = parseArgs(process.argv.slice(2));
|
||||
const result = writeLlmsArtifacts(args);
|
||||
console.log(
|
||||
`Generated llms artifacts for ${result.pageCount} pages: ${result.files.join(', ')}`,
|
||||
);
|
||||
180
src/docs-llms.test.ts
Normal file
180
src/docs-llms.test.ts
Normal file
|
|
@ -0,0 +1,180 @@
|
|||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
|
||||
import { afterEach, describe, expect, it } from 'vitest';
|
||||
|
||||
import {
|
||||
docUrlPath,
|
||||
loadLlmsDocPages,
|
||||
renderLlmsFullTxt,
|
||||
renderLlmsTxt,
|
||||
writeLlmsArtifacts,
|
||||
} from './docs-llms.js';
|
||||
|
||||
const TMP_ROOT = path.join(process.cwd(), 'tmp', 'test-docs-llms');
|
||||
|
||||
function resetDir(dir: string): void {
|
||||
fs.rmSync(dir, { recursive: true, force: true });
|
||||
fs.mkdirSync(dir, { recursive: true });
|
||||
}
|
||||
|
||||
function write(relativePath: string, content: string): void {
|
||||
const target = path.join(TMP_ROOT, relativePath);
|
||||
fs.mkdirSync(path.dirname(target), { recursive: true });
|
||||
fs.writeFileSync(target, content, 'utf8');
|
||||
}
|
||||
|
||||
afterEach(() => {
|
||||
fs.rmSync(TMP_ROOT, { recursive: true, force: true });
|
||||
});
|
||||
|
||||
describe('docUrlPath', () => {
|
||||
it('maps root and index pages to directory URLs', () => {
|
||||
expect(docUrlPath('index.md')).toBe('/');
|
||||
expect(docUrlPath('install/index.md')).toBe('/install/');
|
||||
expect(docUrlPath('install/iso.md')).toBe('/install/iso/');
|
||||
});
|
||||
});
|
||||
|
||||
describe('loadLlmsDocPages', () => {
|
||||
it('loads only English public docs and respects docignore', () => {
|
||||
resetDir(TMP_ROOT);
|
||||
write(
|
||||
'.docignore',
|
||||
['*-INTERNAL.md', '*/private/', 'DOCUMENTATION-POLICY.md'].join('\n'),
|
||||
);
|
||||
write(
|
||||
'index.md',
|
||||
[
|
||||
'---',
|
||||
'title: Overview',
|
||||
'description: Main overview',
|
||||
'---',
|
||||
'',
|
||||
'This is the docs overview.',
|
||||
].join('\n'),
|
||||
);
|
||||
write(
|
||||
'operate/security.md',
|
||||
[
|
||||
'---',
|
||||
'title: Security',
|
||||
'description: Privilege model',
|
||||
'---',
|
||||
'',
|
||||
'# Security',
|
||||
'',
|
||||
'Security details here.',
|
||||
].join('\n'),
|
||||
);
|
||||
write(
|
||||
'roadmap/channels-roadmap.md',
|
||||
[
|
||||
'# Channels roadmap',
|
||||
'',
|
||||
'Roadmap details here.',
|
||||
].join('\n'),
|
||||
);
|
||||
write('sl/index.md', '# Slovene\n\nIgnored locale.');
|
||||
write('private/secret.md', '# Secret\n\nIgnored private page.');
|
||||
write('TEMP-INTERNAL.md', '# Temp\n\nIgnored internal page.');
|
||||
|
||||
const pages = loadLlmsDocPages({ sourceDir: TMP_ROOT });
|
||||
expect(pages.map((page) => page.relPath)).toEqual([
|
||||
'index.md',
|
||||
'operate/security.md',
|
||||
'roadmap/channels-roadmap.md',
|
||||
]);
|
||||
expect(pages[2]?.optional).toBe(true);
|
||||
});
|
||||
});
|
||||
|
||||
describe('llms rendering', () => {
|
||||
it('renders curated llms.txt with optional section and full context without frontmatter', () => {
|
||||
resetDir(TMP_ROOT);
|
||||
write(
|
||||
'index.md',
|
||||
[
|
||||
'---',
|
||||
'title: Overview',
|
||||
'description: Main docs overview',
|
||||
'---',
|
||||
'',
|
||||
'This is the overview body.',
|
||||
].join('\n'),
|
||||
);
|
||||
write(
|
||||
'install/install.md',
|
||||
[
|
||||
'---',
|
||||
'title: Install',
|
||||
'description: Installation guide',
|
||||
'---',
|
||||
'',
|
||||
'# Install',
|
||||
'',
|
||||
'Install body.',
|
||||
].join('\n'),
|
||||
);
|
||||
write(
|
||||
'localization/index.md',
|
||||
[
|
||||
'---',
|
||||
'title: Localization',
|
||||
'description: Localization flow',
|
||||
'---',
|
||||
'',
|
||||
'# Localization',
|
||||
'',
|
||||
'Localization body.',
|
||||
].join('\n'),
|
||||
);
|
||||
|
||||
const pages = loadLlmsDocPages({ sourceDir: TMP_ROOT });
|
||||
const llms = renderLlmsTxt({
|
||||
siteBaseUrl: 'https://docs.clawdie.si',
|
||||
pages,
|
||||
});
|
||||
expect(llms).toContain('# Clawdie Documentation');
|
||||
expect(llms).toContain('## Overview');
|
||||
expect(llms).toContain('## Install');
|
||||
expect(llms).toContain('## Optional');
|
||||
expect(llms).toContain('### Localization');
|
||||
expect(llms).toContain('[Install](https://docs.clawdie.si/install/install/)');
|
||||
|
||||
const full = renderLlmsFullTxt({
|
||||
siteBaseUrl: 'https://docs.clawdie.si',
|
||||
pages,
|
||||
});
|
||||
expect(full).toContain('# Clawdie Documentation Full Context');
|
||||
expect(full).toContain('Source: docs/public/install/install.md');
|
||||
expect(full).not.toContain('---\ntitle:');
|
||||
expect(full).toContain('# Install');
|
||||
});
|
||||
|
||||
it('writes both artifacts to the output directory', () => {
|
||||
resetDir(TMP_ROOT);
|
||||
write(
|
||||
'index.md',
|
||||
[
|
||||
'---',
|
||||
'title: Overview',
|
||||
'description: Main docs overview',
|
||||
'---',
|
||||
'',
|
||||
'Overview body.',
|
||||
].join('\n'),
|
||||
);
|
||||
|
||||
const outDir = path.join(TMP_ROOT, 'out');
|
||||
const result = writeLlmsArtifacts({
|
||||
sourceDir: TMP_ROOT,
|
||||
outputDir: outDir,
|
||||
siteBaseUrl: 'https://docs.clawdie.si',
|
||||
});
|
||||
|
||||
expect(result.pageCount).toBe(1);
|
||||
expect(fs.existsSync(path.join(outDir, 'llms.txt'))).toBe(true);
|
||||
expect(fs.existsSync(path.join(outDir, 'llms-full.txt'))).toBe(true);
|
||||
});
|
||||
});
|
||||
344
src/docs-llms.ts
Normal file
344
src/docs-llms.ts
Normal file
|
|
@ -0,0 +1,344 @@
|
|||
import fs from 'fs';
|
||||
import path from 'path';
|
||||
|
||||
export interface LlmsDocPage {
|
||||
relPath: string;
|
||||
urlPath: string;
|
||||
title: string;
|
||||
description: string;
|
||||
body: string;
|
||||
category: string;
|
||||
optional: boolean;
|
||||
}
|
||||
|
||||
const CATEGORY_ORDER = [
|
||||
'overview',
|
||||
'install',
|
||||
'architecture',
|
||||
'operate',
|
||||
'reference',
|
||||
'localization',
|
||||
'roadmap',
|
||||
] as const;
|
||||
|
||||
const CATEGORY_LABELS: Record<string, string> = {
|
||||
overview: 'Overview',
|
||||
install: 'Install',
|
||||
architecture: 'Architecture',
|
||||
operate: 'Operate',
|
||||
reference: 'Reference',
|
||||
localization: 'Localization',
|
||||
roadmap: 'Roadmap',
|
||||
};
|
||||
|
||||
const OPTIONAL_CATEGORIES = new Set(['localization', 'roadmap']);
|
||||
|
||||
interface ParsedFrontmatter {
|
||||
attributes: Record<string, string>;
|
||||
body: string;
|
||||
}
|
||||
|
||||
function walkMarkdownFiles(rootDir: string, relPrefix = ''): string[] {
|
||||
const entries = fs.readdirSync(rootDir, { withFileTypes: true });
|
||||
const files: string[] = [];
|
||||
for (const entry of entries) {
|
||||
if (entry.name.startsWith('.')) continue;
|
||||
const relPath = relPrefix ? path.join(relPrefix, entry.name) : entry.name;
|
||||
const absPath = path.join(rootDir, entry.name);
|
||||
if (entry.isDirectory()) {
|
||||
files.push(...walkMarkdownFiles(absPath, relPath));
|
||||
continue;
|
||||
}
|
||||
if (!entry.isFile()) continue;
|
||||
if (!/\.(md|mdx)$/iu.test(entry.name)) continue;
|
||||
files.push(relPath);
|
||||
}
|
||||
return files.sort();
|
||||
}
|
||||
|
||||
function normalizeRelPath(relPath: string): string {
|
||||
return relPath.replace(/\\/gu, '/');
|
||||
}
|
||||
|
||||
function isLocalePath(relPath: string): boolean {
|
||||
const first = normalizeRelPath(relPath).split('/')[0] || '';
|
||||
return /^[a-z]{2}$/u.test(first);
|
||||
}
|
||||
|
||||
function globToRegExp(pattern: string): RegExp {
|
||||
const escaped = pattern
|
||||
.replace(/[.+^${}()|[\]\\]/gu, '\\$&')
|
||||
.replace(/\*/gu, '.*');
|
||||
return new RegExp(`^${escaped}$`, 'u');
|
||||
}
|
||||
|
||||
export function parseDocignorePatterns(contents: string): string[] {
|
||||
return contents
|
||||
.split('\n')
|
||||
.map((line) => line.trim())
|
||||
.filter((line) => line.length > 0 && !line.startsWith('#'));
|
||||
}
|
||||
|
||||
function shouldIgnorePath(relPath: string, patterns: string[]): boolean {
|
||||
const normalized = normalizeRelPath(relPath);
|
||||
return patterns.some((pattern) => {
|
||||
if (pattern.endsWith('/')) {
|
||||
const dir = pattern.replace(/^\*\//u, '').replace(/\/$/u, '');
|
||||
return (
|
||||
normalized === dir ||
|
||||
normalized.startsWith(`${dir}/`) ||
|
||||
normalized.includes(`/${dir}/`)
|
||||
);
|
||||
}
|
||||
return globToRegExp(pattern).test(normalized);
|
||||
});
|
||||
}
|
||||
|
||||
export function parseFrontmatter(content: string): ParsedFrontmatter {
|
||||
if (!content.startsWith('---\n')) {
|
||||
return { attributes: {}, body: content.trim() };
|
||||
}
|
||||
const end = content.indexOf('\n---\n', 4);
|
||||
if (end < 0) {
|
||||
return { attributes: {}, body: content.trim() };
|
||||
}
|
||||
const raw = content.slice(4, end);
|
||||
const attributes: Record<string, string> = {};
|
||||
for (const line of raw.split('\n')) {
|
||||
const match = line.match(/^([A-Za-z0-9_-]+):\s*(.*)$/u);
|
||||
if (!match) continue;
|
||||
const [, key, value] = match;
|
||||
attributes[key.toLowerCase()] = value.trim().replace(/^['"]|['"]$/gu, '');
|
||||
}
|
||||
return {
|
||||
attributes,
|
||||
body: content.slice(end + 5).trim(),
|
||||
};
|
||||
}
|
||||
|
||||
function extractTitle(body: string, fallback: string): string {
|
||||
for (const line of body.split('\n')) {
|
||||
const trimmed = line.trim();
|
||||
if (!trimmed) continue;
|
||||
const heading = trimmed.match(/^#\s+(.+)$/u);
|
||||
if (heading) return heading[1].trim();
|
||||
break;
|
||||
}
|
||||
return fallback;
|
||||
}
|
||||
|
||||
function extractDescription(body: string): string {
|
||||
const lines = body.split('\n');
|
||||
const paragraph: string[] = [];
|
||||
let inCode = false;
|
||||
|
||||
for (const rawLine of lines) {
|
||||
const line = rawLine.trim();
|
||||
if (line.startsWith('```')) {
|
||||
inCode = !inCode;
|
||||
continue;
|
||||
}
|
||||
if (inCode) continue;
|
||||
if (!line) {
|
||||
if (paragraph.length > 0) break;
|
||||
continue;
|
||||
}
|
||||
if (
|
||||
line.startsWith('#') ||
|
||||
line.startsWith('- ') ||
|
||||
line.startsWith('* ') ||
|
||||
line.startsWith('|') ||
|
||||
/^\d+\.\s/u.test(line) ||
|
||||
line.startsWith('>') ||
|
||||
line.startsWith('```')
|
||||
) {
|
||||
if (paragraph.length > 0) break;
|
||||
continue;
|
||||
}
|
||||
paragraph.push(line);
|
||||
}
|
||||
|
||||
return paragraph.join(' ').replace(/\s+/gu, ' ').trim();
|
||||
}
|
||||
|
||||
function fallbackTitleFromPath(relPath: string): string {
|
||||
const base = path.basename(relPath, path.extname(relPath));
|
||||
if (base === 'index') {
|
||||
const parent = path.basename(path.dirname(relPath));
|
||||
if (parent && parent !== '.') {
|
||||
return parent.replace(/[-_]/gu, ' ');
|
||||
}
|
||||
return 'Overview';
|
||||
}
|
||||
return base.replace(/[-_]/gu, ' ');
|
||||
}
|
||||
|
||||
export function docUrlPath(relPath: string): string {
|
||||
const normalized = normalizeRelPath(relPath);
|
||||
if (normalized === 'index.md') return '/';
|
||||
if (normalized.endsWith('/index.md')) {
|
||||
return `/${normalized.slice(0, -'index.md'.length)}`;
|
||||
}
|
||||
return `/${normalized.replace(/\.mdx?$/iu, '')}/`;
|
||||
}
|
||||
|
||||
function docCategory(relPath: string): string {
|
||||
const normalized = normalizeRelPath(relPath);
|
||||
const first = normalized.split('/')[0] || '';
|
||||
if (!first || first === 'index.md') return 'overview';
|
||||
if (CATEGORY_ORDER.includes(first as (typeof CATEGORY_ORDER)[number])) {
|
||||
return first;
|
||||
}
|
||||
return 'overview';
|
||||
}
|
||||
|
||||
export function loadLlmsDocPages(opts: {
|
||||
sourceDir: string;
|
||||
docignorePath?: string;
|
||||
}): LlmsDocPage[] {
|
||||
const sourceDir = path.resolve(opts.sourceDir);
|
||||
const docignorePath = opts.docignorePath
|
||||
? path.resolve(opts.docignorePath)
|
||||
: path.join(sourceDir, '.docignore');
|
||||
const patterns = fs.existsSync(docignorePath)
|
||||
? parseDocignorePatterns(fs.readFileSync(docignorePath, 'utf8'))
|
||||
: [];
|
||||
|
||||
const files = walkMarkdownFiles(sourceDir)
|
||||
.map(normalizeRelPath)
|
||||
.filter((relPath) => !isLocalePath(relPath))
|
||||
.filter((relPath) => !shouldIgnorePath(relPath, patterns));
|
||||
|
||||
const pages = files.map((relPath) => {
|
||||
const absPath = path.join(sourceDir, relPath);
|
||||
const { attributes, body } = parseFrontmatter(fs.readFileSync(absPath, 'utf8'));
|
||||
const fallbackTitle = fallbackTitleFromPath(relPath);
|
||||
const title = attributes.title || extractTitle(body, fallbackTitle);
|
||||
const description = attributes.description || extractDescription(body) || title;
|
||||
const category = docCategory(relPath);
|
||||
return {
|
||||
relPath,
|
||||
urlPath: docUrlPath(relPath),
|
||||
title,
|
||||
description,
|
||||
body,
|
||||
category,
|
||||
optional: OPTIONAL_CATEGORIES.has(category),
|
||||
};
|
||||
});
|
||||
|
||||
return pages.sort((a, b) => {
|
||||
const categoryDiff =
|
||||
CATEGORY_ORDER.indexOf(a.category as (typeof CATEGORY_ORDER)[number]) -
|
||||
CATEGORY_ORDER.indexOf(b.category as (typeof CATEGORY_ORDER)[number]);
|
||||
if (categoryDiff !== 0) return categoryDiff;
|
||||
if (a.urlPath === '/' && b.urlPath !== '/') return -1;
|
||||
if (a.urlPath !== '/' && b.urlPath === '/') return 1;
|
||||
return a.urlPath.localeCompare(b.urlPath);
|
||||
});
|
||||
}
|
||||
|
||||
function joinUrl(siteBaseUrl: string, urlPath: string): string {
|
||||
const base = siteBaseUrl.replace(/\/+$/u, '');
|
||||
if (urlPath === '/') return `${base}/`;
|
||||
return `${base}${urlPath}`;
|
||||
}
|
||||
|
||||
function sectionLinks(siteBaseUrl: string, pages: LlmsDocPage[]): string[] {
|
||||
return pages.map((page) => {
|
||||
const href = joinUrl(siteBaseUrl, page.urlPath);
|
||||
return `- [${page.title}](${href}): ${page.description}`;
|
||||
});
|
||||
}
|
||||
|
||||
export function renderLlmsTxt(opts: {
|
||||
siteBaseUrl: string;
|
||||
pages: LlmsDocPage[];
|
||||
}): string {
|
||||
const requiredPages = opts.pages.filter((page) => !page.optional);
|
||||
const optionalPages = opts.pages.filter((page) => page.optional);
|
||||
const lines = [
|
||||
'# Clawdie Documentation',
|
||||
'',
|
||||
'> Clawdie is a FreeBSD-native AI operator and multi-agent controlplane. This file maps the public documentation for installation, architecture, operations, reference material, and roadmap planning.',
|
||||
'',
|
||||
'Use this file as the curated map of the public docs. If you need a one-shot context file containing the same English public docs concatenated together, use `/llms-full.txt` on the same site.',
|
||||
'',
|
||||
];
|
||||
|
||||
for (const category of CATEGORY_ORDER) {
|
||||
if (OPTIONAL_CATEGORIES.has(category)) continue;
|
||||
const pages = requiredPages.filter((page) => page.category === category);
|
||||
if (pages.length === 0) continue;
|
||||
lines.push(`## ${CATEGORY_LABELS[category]}`);
|
||||
lines.push('');
|
||||
lines.push(...sectionLinks(opts.siteBaseUrl, pages));
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
if (optionalPages.length > 0) {
|
||||
lines.push('## Optional');
|
||||
lines.push('');
|
||||
for (const category of CATEGORY_ORDER) {
|
||||
if (!OPTIONAL_CATEGORIES.has(category)) continue;
|
||||
const pages = optionalPages.filter((page) => page.category === category);
|
||||
if (pages.length === 0) continue;
|
||||
lines.push(`### ${CATEGORY_LABELS[category]}`);
|
||||
lines.push('');
|
||||
lines.push(...sectionLinks(opts.siteBaseUrl, pages));
|
||||
lines.push('');
|
||||
}
|
||||
}
|
||||
|
||||
return `${lines.join('\n').trim()}\n`;
|
||||
}
|
||||
|
||||
export function renderLlmsFullTxt(opts: {
|
||||
siteBaseUrl: string;
|
||||
pages: LlmsDocPage[];
|
||||
}): string {
|
||||
const lines = [
|
||||
'# Clawdie Documentation Full Context',
|
||||
'',
|
||||
'> Concatenated English public documentation for Clawdie. Source of truth: `docs/public/` in the main repository.',
|
||||
'',
|
||||
];
|
||||
|
||||
for (const page of opts.pages) {
|
||||
lines.push(`## ${page.title}`);
|
||||
lines.push('');
|
||||
lines.push(`URL: ${joinUrl(opts.siteBaseUrl, page.urlPath)}`);
|
||||
lines.push(`Source: docs/public/${page.relPath}`);
|
||||
lines.push('');
|
||||
lines.push(page.body.trim());
|
||||
lines.push('');
|
||||
}
|
||||
|
||||
return `${lines.join('\n').trim()}\n`;
|
||||
}
|
||||
|
||||
export function writeLlmsArtifacts(opts: {
|
||||
sourceDir: string;
|
||||
outputDir: string;
|
||||
siteBaseUrl: string;
|
||||
docignorePath?: string;
|
||||
}): { pageCount: number; files: string[] } {
|
||||
const pages = loadLlmsDocPages({
|
||||
sourceDir: opts.sourceDir,
|
||||
docignorePath: opts.docignorePath,
|
||||
});
|
||||
fs.mkdirSync(opts.outputDir, { recursive: true });
|
||||
const llmsPath = path.join(opts.outputDir, 'llms.txt');
|
||||
const llmsFullPath = path.join(opts.outputDir, 'llms-full.txt');
|
||||
fs.writeFileSync(
|
||||
llmsPath,
|
||||
renderLlmsTxt({ siteBaseUrl: opts.siteBaseUrl, pages }),
|
||||
'utf8',
|
||||
);
|
||||
fs.writeFileSync(
|
||||
llmsFullPath,
|
||||
renderLlmsFullTxt({ siteBaseUrl: opts.siteBaseUrl, pages }),
|
||||
'utf8',
|
||||
);
|
||||
return { pageCount: pages.length, files: [llmsPath, llmsFullPath] };
|
||||
}
|
||||
Loading…
Add table
Reference in a new issue