#!/usr/bin/env node
/**
* Generate a synthetic npm-workspaces TypeScript monorepo for fallow performance testing.
*
* Usage:
* node gen-monorepo.cjs --out <dir> --workspaces N --files-per-workspace M [--git] [--dupes] [--heavy]
*
* Defaults: --workspaces 80 --files-per-workspace 300 (≈24,000 files)
*
* Each workspace gets:
* - A package.json with deps that activate ~10 fallow plugins (react, eslint, vitest, ...)
* - A tsconfig.json
* - M source files with realistic export counts and cross-imports
*
* The repo as a whole gets:
* - A root package.json declaring all workspaces
* - A root tsconfig.base.json
* - Optional: `git init` + initial commit (when --git is passed)
* - Optional: each workspace gets one large duplicated file (when --dupes is passed)
* - Optional: barrel files + cross-workspace imports + heterogeneous plugin deps (when --heavy is passed)
*/
const fs = require('node:fs');
const path = require('node:path');
const { execSync } = require('node:child_process');
// ─── Args ────────────────────────────────────────────────────────────────────
const args = parseArgs(process.argv.slice(2));
const OUT = path.resolve(args.out || './synthetic-monorepo');
const WORKSPACES = parseInt(args.workspaces || '80', 10);
const FILES_PER_WS = parseInt(args['files-per-workspace'] || '300', 10);
const WITH_GIT = args.git === true;
const WITH_DUPES = args.dupes === true;
const EXTRA_COMMITS = parseInt(args.commits || '0', 10);
// `--heavy`: turn on barrel files + cross-workspace imports + heterogeneous plugin deps.
// Designed to stress fallow's analyze + plugins stages the way real-world monorepos do.
const HEAVY = args.heavy === true;
// `--config-files`: per-workspace, write extra fallow-relevant config files (jest, eslint,
// tailwind, project.json, *.config.ts variants). Designed to stress fallow's plugins stage,
// which globs every config pattern against every discovered file and reads matches from disk.
const WITH_CONFIG_FILES = args['config-files'] === true;
// 80-line block copy-pasted into every workspace -> guaranteed clones for the dupes scanner.
const DUPLICATED_BLOCK = Array.from({ length: 80 }, (_, n) => `const k${n} = ${n} * 7 + ${n};`).join('\n') + '\n';
// 5 different plugin profiles to rotate through workspaces in --heavy mode.
// Each profile activates a distinct set of fallow plugins.
const PLUGIN_PROFILES = [
// 0: React + Storybook + Vitest
{
deps: { react: '^18.0.0', 'react-dom': '^18.0.0' },
devDeps: { typescript: '^5.0.0', vitest: '^1.0.0', '@storybook/react': '^7.0.0', tailwindcss: '^3.0.0', eslint: '^8.0.0' },
},
// 1: Angular
{
deps: { '@angular/core': '^17.0.0', '@angular/common': '^17.0.0', '@angular/router': '^17.0.0', rxjs: '^7.0.0' },
devDeps: { typescript: '^5.0.0', '@angular/cli': '^17.0.0', karma: '^6.0.0', jasmine: '^5.0.0', '@storybook/angular': '^7.0.0' },
},
// 2: Vue + Vite
{
deps: { vue: '^3.0.0', 'vue-router': '^4.0.0', pinia: '^2.0.0' },
devDeps: { typescript: '^5.0.0', vite: '^5.0.0', vitest: '^1.0.0', '@vitejs/plugin-vue': '^5.0.0', cypress: '^13.0.0' },
},
// 3: SvelteKit + Playwright
{
deps: { svelte: '^4.0.0', '@sveltejs/kit': '^2.0.0' },
devDeps: { typescript: '^5.0.0', vite: '^5.0.0', vitest: '^1.0.0', playwright: '^1.0.0', '@playwright/test': '^1.0.0' },
},
// 4: Next.js + Jest + GraphQL
{
deps: { next: '^14.0.0', react: '^18.0.0', 'react-dom': '^18.0.0', graphql: '^16.0.0' },
devDeps: { typescript: '^5.0.0', jest: '^29.0.0', '@graphql-codegen/cli': '^5.0.0', sentry: '^7.0.0', prettier: '^3.0.0' },
},
];
console.error(`Generating: ${OUT}`);
console.error(` workspaces=${WORKSPACES} files/ws=${FILES_PER_WS} (~${WORKSPACES * FILES_PER_WS} files)`);
console.error(` git=${WITH_GIT} commits=${EXTRA_COMMITS} dupes=${WITH_DUPES}`);
// ─── Generate ────────────────────────────────────────────────────────────────
if (fs.existsSync(OUT)) {
fs.rmSync(OUT, { recursive: true, force: true });
}
fs.mkdirSync(OUT, { recursive: true });
writeRoot();
for (let w = 0; w < WORKSPACES; w++) {
writeWorkspace(w);
}
if (WITH_GIT) initGit();
console.error('Done.');
// ─── Templates ───────────────────────────────────────────────────────────────
function writeRoot() {
const pkg = {
name: 'synthetic-monorepo',
private: true,
workspaces: Array.from({ length: WORKSPACES }, (_, w) => `packages/ws-${w}`),
};
fs.writeFileSync(path.join(OUT, 'package.json'), JSON.stringify(pkg, null, 2) + '\n');
fs.writeFileSync(
path.join(OUT, 'tsconfig.base.json'),
JSON.stringify(
{
compilerOptions: {
target: 'ES2022',
module: 'ESNext',
moduleResolution: 'bundler',
strict: true,
esModuleInterop: true,
skipLibCheck: true,
},
},
null,
2,
) + '\n',
);
// Root tsconfig.json that references all workspaces (silences fallow's broken-chain warning)
fs.writeFileSync(
path.join(OUT, 'tsconfig.json'),
JSON.stringify({ extends: './tsconfig.base.json', files: [] }, null, 2) + '\n',
);
fs.writeFileSync(path.join(OUT, '.gitignore'), 'node_modules/\n.fallow/\n');
}
function writeWorkspace(w) {
const wsDir = path.join(OUT, 'packages', `ws-${w}`);
fs.mkdirSync(path.join(wsDir, 'src'), { recursive: true });
// In --heavy mode rotate through plugin profiles so workspaces are heterogeneous.
// Otherwise use a fixed React-ish profile (small, quick, predictable).
const profile = HEAVY ? PLUGIN_PROFILES[w % PLUGIN_PROFILES.length] : PLUGIN_PROFILES[0];
const pkg = {
name: `@synthetic/ws-${w}`,
version: '0.0.0',
main: 'src/index.ts',
dependencies: profile.deps,
devDependencies: { ...profile.devDeps, husky: '^9.0.0', 'lint-staged': '^15.0.0' },
};
fs.writeFileSync(path.join(wsDir, 'package.json'), JSON.stringify(pkg, null, 2) + '\n');
fs.writeFileSync(
path.join(wsDir, 'tsconfig.json'),
JSON.stringify({ extends: '../../tsconfig.base.json', include: ['src/**/*'] }, null, 2) + '\n',
);
for (let i = 0; i < FILES_PER_WS; i++) {
const filePath = path.join(wsDir, 'src', `mod-${i}.ts`);
fs.writeFileSync(filePath, makeModule(w, i));
}
// index.ts: re-export everything (a real barrel file).
// In --heavy mode this re-exports ALL FILES_PER_WS modules; otherwise just the first 10.
const reexportCount = HEAVY ? FILES_PER_WS : Math.min(10, FILES_PER_WS);
const reexports = Array.from({ length: reexportCount }, (_, i) => `export * from './mod-${i}';`).join('\n');
fs.writeFileSync(path.join(wsDir, 'src', 'index.ts'), reexports + '\n');
// --heavy: also write a deeply-nested barrel that re-exports the barrel,
// creating a re-export chain that fallow has to walk.
if (HEAVY) {
fs.writeFileSync(path.join(wsDir, 'src', 'barrel-1.ts'), `export * from './index';\n`);
fs.writeFileSync(path.join(wsDir, 'src', 'barrel-2.ts'), `export * from './barrel-1';\n`);
fs.writeFileSync(path.join(wsDir, 'src', 'barrel-3.ts'), `export * from './barrel-2';\n`);
}
// Optional duplicated block (one large copy-pasted file per workspace)
if (WITH_DUPES) {
fs.writeFileSync(path.join(wsDir, 'src', 'duplicated-block.ts'), DUPLICATED_BLOCK);
}
// --config-files: write a realistic spread of fallow-relevant config files.
// Each is small (empty object / no-op export), but the *count* matters for
// the plugins stage: every config has to be globbed and read.
if (WITH_CONFIG_FILES) {
fs.writeFileSync(path.join(wsDir, '.eslintrc.json'), '{}\n');
fs.writeFileSync(path.join(wsDir, '.prettierrc'), '{}\n');
fs.writeFileSync(path.join(wsDir, 'tsconfig.spec.json'), '{}\n');
fs.writeFileSync(path.join(wsDir, 'tsconfig.lib.json'), '{}\n');
fs.writeFileSync(path.join(wsDir, 'jest.config.ts'), 'export default {};\n');
fs.writeFileSync(path.join(wsDir, 'vitest.config.ts'), 'export default {};\n');
fs.writeFileSync(path.join(wsDir, 'tailwind.config.ts'), 'export default {};\n');
fs.writeFileSync(path.join(wsDir, 'webpack.config.ts'), 'export default {};\n');
fs.writeFileSync(path.join(wsDir, 'project.json'), '{"name":"ws"}\n');
fs.writeFileSync(path.join(wsDir, '.babelrc'), '{}\n');
}
}
function makeModule(w, i) {
const importTarget = (i + 1) % FILES_PER_WS;
// --heavy: cross-workspace import pulling from the next workspace's barrel.
// Forces fallow's resolver and the analyze stage to walk across workspace boundaries.
const crossWsImport = HEAVY
? `import { value0 as crossWs } from '@synthetic/ws-${(w + 1) % WORKSPACES}';\n`
: '';
// --heavy: import from the local barrel-3 so fallow has a re-export chain to resolve.
const barrelImport = HEAVY ? `import { value1 as fromBarrel } from './barrel-3';\n` : '';
// --heavy: extra exports + a type, an enum, and a re-export to widen the analyze surface.
const extraExports = HEAVY
? `
export type T${i} = { id: number; tag: string; meta: Record<string, unknown> };
export enum E${i} { A = '${w}-${i}-a', B = '${w}-${i}-b', C = '${w}-${i}-c' }
export interface I${i} { fn(x: T${i}): T${i}; }
export { value0 as alias${i} } from './mod-${(i + 2) % FILES_PER_WS}';
export const arr${i} = [1, 2, 3].map((x) => x + ${i});
`
: '';
return `${crossWsImport}${barrelImport}import { value0 as upstream } from './mod-${importTarget}';
export const value0 = upstream + ${i}${HEAVY ? ' + (typeof crossWs === "number" ? crossWs : 0) + (typeof fromBarrel === "number" ? fromBarrel : 0)' : ''};
export const value1 = ${i} * 2;
export const value2 = '${w}-${i}';
export function fn0(x: number): number { return x + ${i}; }
export class Cls0 { id = ${i}; method(): string { return 'm-${i}'; } }${extraExports}`;
}
// ─── Helpers ─────────────────────────────────────────────────────────────────
function initGit() {
console.error('Initializing git...');
execSync('git init -q', { cwd: OUT });
execSync('git config user.email "synthetic@example.com"', { cwd: OUT });
execSync('git config user.name "Synthetic User"', { cwd: OUT });
execSync('git add .', { cwd: OUT });
execSync('git commit -q -m "initial commit"', { cwd: OUT });
if (EXTRA_COMMITS > 0) {
console.error(`Adding ${EXTRA_COMMITS} extra commits (touching one file each)...`);
// Touch one random module per commit so churn data is realistic, not all on one file.
for (let c = 0; c < EXTRA_COMMITS; c++) {
const w = c % WORKSPACES;
const i = c % FILES_PER_WS;
const filePath = path.join(OUT, 'packages', `ws-${w}`, 'src', `mod-${i}.ts`);
// Append a no-op comment line. Each commit modifies a different file when possible.
fs.appendFileSync(filePath, `// commit ${c}\n`);
execSync(`git add ${filePath}`, { cwd: OUT });
execSync(`git commit -q -m "commit ${c}"`, { cwd: OUT });
}
}
}
function parseArgs(argv) {
const out = {};
for (let i = 0; i < argv.length; i++) {
const a = argv[i];
if (!a.startsWith('--')) continue;
const key = a.slice(2);
const next = argv[i + 1];
if (next === undefined || next.startsWith('--')) {
out[key] = true;
} else {
out[key] = next;
i++;
}
}
return out;
}
Problem
The
analyzestage incrates/core/src/analyze/does not appear to use rayon: a grep forpar_iterorrayon::across that directory returns zero hits. The stage runs ~10 independent detectors back-to-back on the main thread:Each writes to a different field of the result struct, so the detectors look mutually independent. Several also internally walk
graph.modules.iter()serially (find_duplicate_exportsat lines 621/645 ofcrates/core/src/analyze/unused_exports.rsis the clearest example).By contrast, the
parse/extractstage already uses rayon (crates/extract/src/lib.rs:73calls.par_iter()insideparse_all_files) and scales nicely with cores.On a synthetic 24,320-file monorepo with realistic barrel files and cross-workspace imports, the
analyzestage takes 6.75 s on a 14-core machine while the process consumes barely more than one core's worth of CPU (user/wall ≈ 0.87). It is the largest single stage in the pipeline.Save the synthetic-monorepo generator as
gen-monorepo.cjs(click to expand — ~260 lines, no external deps)Reproduction (synthetic 24k-file monorepo, fallow 2.60.0)
Save the generator from the section above as
gen-monorepo.cjs.Generate a "heavy" monorepo (barrel files, cross-workspace imports, heterogeneous plugin profiles). Takes ~2 sec:
Run fallow with timing and CPU info:
The
analyzerow dominates thePipeline Performancetable:The
time -psummary shows user CPU is roughly equal to wall clock, despite many cores being available:Proposed solution
Two complementary opportunities, in order of expected impact and ease:
Run the independent detectors in parallel in
find_dead_code_full(crates/core/src/analyze/mod.rs:91). Each detector (find_unused_files,find_unused_exports,find_unused_members,find_unused_dependencies,find_unresolved_imports,find_duplicate_exports,find_boundary_violations,graph.find_cycles, …) writes to a different field ofAnalysisResults, so they're already mutually independent. Arayon::join/scopecould fan them out across the Rayon pool the same wayparse_all_filesdoes.Parallelize the per-module loops inside the heaviest detectors.
find_duplicate_exports(crates/core/src/analyze/unused_exports.rs:621/:645) and the file-scope passes insidefind_unused_exportslook like cleanpar_itercandidates. Graph traversals with shared visited-sets (e.g. cycle detection) may need to stay serial for correctness.Reasonable target: bring
analyzefrom ~6.75 s to ~1–2 s on a 14-core machine on this synthetic repo, matching the multi-core scaling thatparse/extractalready achieves on the same hardware.Alternatives considered
analyzeconcurrently with later stages). Hard because downstream stages depend on the full analyze output.--no-cacheruns, or any run where the input graph changed.