|
| 1 | +/** |
| 2 | + * gbrain orphans — Surface pages with no inbound wikilinks. |
| 3 | + * |
| 4 | + * Deterministic: zero LLM calls. Queries the links table for pages with |
| 5 | + * no entries where to_page_id = pages.id. By default filters out |
| 6 | + * auto-generated pages and pseudo-pages where no inbound links is expected. |
| 7 | + * |
| 8 | + * Usage: |
| 9 | + * gbrain orphans # list orphans grouped by domain |
| 10 | + * gbrain orphans --json # JSON output for agent consumption |
| 11 | + * gbrain orphans --count # just the number |
| 12 | + * gbrain orphans --include-pseudo # include auto-generated/pseudo pages |
| 13 | + */ |
| 14 | + |
| 15 | +import type { BrainEngine } from '../core/engine.ts'; |
| 16 | +import * as db from '../core/db.ts'; |
| 17 | + |
| 18 | +// --- Types --- |
| 19 | + |
| 20 | +export interface OrphanPage { |
| 21 | + slug: string; |
| 22 | + title: string; |
| 23 | + domain: string; |
| 24 | +} |
| 25 | + |
| 26 | +export interface OrphanResult { |
| 27 | + orphans: OrphanPage[]; |
| 28 | + total_orphans: number; |
| 29 | + total_linkable: number; |
| 30 | + total_pages: number; |
| 31 | + excluded: number; |
| 32 | +} |
| 33 | + |
| 34 | +// --- Filter constants --- |
| 35 | + |
| 36 | +/** Slug suffixes that are always auto-generated root files */ |
| 37 | +const AUTO_SUFFIX_PATTERNS = ['/_index', '/log']; |
| 38 | + |
| 39 | +/** Page slugs that are pseudo-pages by convention */ |
| 40 | +const PSEUDO_SLUGS = new Set(['_atlas', '_index', '_stats', '_orphans', '_scratch', 'claude']); |
| 41 | + |
| 42 | +/** Slug segment that marks raw sources */ |
| 43 | +const RAW_SEGMENT = '/raw/'; |
| 44 | + |
| 45 | +/** Slug prefixes where no inbound links is expected */ |
| 46 | +const DENY_PREFIXES = [ |
| 47 | + 'output/', |
| 48 | + 'dashboards/', |
| 49 | + 'scripts/', |
| 50 | + 'templates/', |
| 51 | + 'openclaw/config/', |
| 52 | +]; |
| 53 | + |
| 54 | +/** First slug segments where no inbound links is expected */ |
| 55 | +const FIRST_SEGMENT_EXCLUSIONS = new Set(['scratch', 'thoughts', 'catalog', 'entities']); |
| 56 | + |
| 57 | +// --- Filter logic --- |
| 58 | + |
| 59 | +/** |
| 60 | + * Returns true if a slug should be excluded from orphan reporting by default. |
| 61 | + * These are pages where having no inbound links is expected / not a content problem. |
| 62 | + */ |
| 63 | +export function shouldExclude(slug: string): boolean { |
| 64 | + // Pseudo-pages (exact match) |
| 65 | + if (PSEUDO_SLUGS.has(slug)) return true; |
| 66 | + |
| 67 | + // Auto-generated suffix patterns |
| 68 | + for (const suffix of AUTO_SUFFIX_PATTERNS) { |
| 69 | + if (slug.endsWith(suffix)) return true; |
| 70 | + } |
| 71 | + |
| 72 | + // Raw source slugs |
| 73 | + if (slug.includes(RAW_SEGMENT)) return true; |
| 74 | + |
| 75 | + // Deny-prefix slugs |
| 76 | + for (const prefix of DENY_PREFIXES) { |
| 77 | + if (slug.startsWith(prefix)) return true; |
| 78 | + } |
| 79 | + |
| 80 | + // First-segment exclusions |
| 81 | + const firstSegment = slug.split('/')[0]; |
| 82 | + if (FIRST_SEGMENT_EXCLUSIONS.has(firstSegment)) return true; |
| 83 | + |
| 84 | + return false; |
| 85 | +} |
| 86 | + |
| 87 | +/** |
| 88 | + * Derive domain from frontmatter or first slug segment. |
| 89 | + */ |
| 90 | +export function deriveDomain(frontmatterDomain: string | null | undefined, slug: string): string { |
| 91 | + if (frontmatterDomain && typeof frontmatterDomain === 'string' && frontmatterDomain.trim()) { |
| 92 | + return frontmatterDomain.trim(); |
| 93 | + } |
| 94 | + return slug.split('/')[0] || 'root'; |
| 95 | +} |
| 96 | + |
| 97 | +// --- Core query --- |
| 98 | + |
| 99 | +/** |
| 100 | + * Find pages with no inbound links. |
| 101 | + * Returns raw rows from the DB (all pages regardless of filter). |
| 102 | + */ |
| 103 | +export async function queryOrphanPages(): Promise<{ slug: string; title: string; domain: string | null }[]> { |
| 104 | + const sql = db.getConnection(); |
| 105 | + const rows = await sql` |
| 106 | + SELECT |
| 107 | + p.slug, |
| 108 | + COALESCE(p.title, p.slug) AS title, |
| 109 | + p.frontmatter->>'domain' AS domain |
| 110 | + FROM pages p |
| 111 | + WHERE NOT EXISTS ( |
| 112 | + SELECT 1 FROM links l WHERE l.to_page_id = p.id |
| 113 | + ) |
| 114 | + ORDER BY p.slug |
| 115 | + `; |
| 116 | + return rows as { slug: string; title: string; domain: string | null }[]; |
| 117 | +} |
| 118 | + |
| 119 | +/** |
| 120 | + * Find orphan pages, with optional pseudo-page filtering. |
| 121 | + * Returns structured OrphanResult with totals. |
| 122 | + */ |
| 123 | +export async function findOrphans(includePseudo: boolean = false): Promise<OrphanResult> { |
| 124 | + const allOrphans = await queryOrphanPages(); |
| 125 | + const totalPages = allOrphans.length; // pages with no inbound links |
| 126 | + |
| 127 | + // Count total pages in DB for the summary line |
| 128 | + const sql = db.getConnection(); |
| 129 | + const [{ count: totalPagesCount }] = await sql`SELECT count(*)::int AS count FROM pages`; |
| 130 | + const total = Number(totalPagesCount); |
| 131 | + |
| 132 | + const filtered = includePseudo |
| 133 | + ? allOrphans |
| 134 | + : allOrphans.filter(row => !shouldExclude(row.slug)); |
| 135 | + |
| 136 | + const orphans: OrphanPage[] = filtered.map(row => ({ |
| 137 | + slug: row.slug, |
| 138 | + title: row.title, |
| 139 | + domain: deriveDomain(row.domain, row.slug), |
| 140 | + })); |
| 141 | + |
| 142 | + const excluded = allOrphans.length - filtered.length; |
| 143 | + |
| 144 | + return { |
| 145 | + orphans, |
| 146 | + total_orphans: orphans.length, |
| 147 | + total_linkable: filtered.length + (total - allOrphans.length), |
| 148 | + total_pages: total, |
| 149 | + excluded, |
| 150 | + }; |
| 151 | +} |
| 152 | + |
| 153 | +// --- Output formatters --- |
| 154 | + |
| 155 | +export function formatOrphansText(result: OrphanResult): string { |
| 156 | + const lines: string[] = []; |
| 157 | + |
| 158 | + const { orphans, total_orphans, total_linkable, total_pages, excluded } = result; |
| 159 | + lines.push( |
| 160 | + `${total_orphans} orphans out of ${total_linkable} linkable pages (${total_pages} total; ${excluded} excluded)\n`, |
| 161 | + ); |
| 162 | + |
| 163 | + if (orphans.length === 0) { |
| 164 | + lines.push('No orphan pages found.'); |
| 165 | + return lines.join('\n'); |
| 166 | + } |
| 167 | + |
| 168 | + // Group by domain, sort alphabetically within each group |
| 169 | + const byDomain = new Map<string, OrphanPage[]>(); |
| 170 | + for (const page of orphans) { |
| 171 | + const list = byDomain.get(page.domain) || []; |
| 172 | + list.push(page); |
| 173 | + byDomain.set(page.domain, list); |
| 174 | + } |
| 175 | + |
| 176 | + // Sort domains alphabetically |
| 177 | + const sortedDomains = [...byDomain.keys()].sort(); |
| 178 | + for (const domain of sortedDomains) { |
| 179 | + const pages = byDomain.get(domain)!.sort((a, b) => a.slug.localeCompare(b.slug)); |
| 180 | + lines.push(`[${domain}]`); |
| 181 | + for (const page of pages) { |
| 182 | + lines.push(` ${page.slug} ${page.title}`); |
| 183 | + } |
| 184 | + lines.push(''); |
| 185 | + } |
| 186 | + |
| 187 | + return lines.join('\n').trimEnd(); |
| 188 | +} |
| 189 | + |
| 190 | +// --- CLI entry point --- |
| 191 | + |
| 192 | +export async function runOrphans(_engine: BrainEngine, args: string[]) { |
| 193 | + const json = args.includes('--json'); |
| 194 | + const count = args.includes('--count'); |
| 195 | + const includePseudo = args.includes('--include-pseudo'); |
| 196 | + |
| 197 | + if (args.includes('--help') || args.includes('-h')) { |
| 198 | + console.log(`Usage: gbrain orphans [options] |
| 199 | +
|
| 200 | +Find pages with no inbound wikilinks. |
| 201 | +
|
| 202 | +Options: |
| 203 | + --json Output as JSON (for agent consumption) |
| 204 | + --count Output just the number of orphans |
| 205 | + --include-pseudo Include auto-generated and pseudo pages in results |
| 206 | + --help, -h Show this help |
| 207 | +
|
| 208 | +Output (default): grouped by domain, sorted alphabetically within each group |
| 209 | +Summary line: N orphans out of M linkable pages (K total; K-M excluded) |
| 210 | +`); |
| 211 | + return; |
| 212 | + } |
| 213 | + |
| 214 | + const result = await findOrphans(includePseudo); |
| 215 | + |
| 216 | + if (count) { |
| 217 | + console.log(String(result.total_orphans)); |
| 218 | + return; |
| 219 | + } |
| 220 | + |
| 221 | + if (json) { |
| 222 | + console.log(JSON.stringify(result, null, 2)); |
| 223 | + return; |
| 224 | + } |
| 225 | + |
| 226 | + console.log(formatOrphansText(result)); |
| 227 | +} |
0 commit comments