Parallelize CLI report generation in repo-tools

Converts createBinRunner from spawnSync to async spawn, and processes
all CLI packages concurrently with a shared p-limit limiter bounded
by CPU count. Reduces wall-clock time from ~50s to ~12s.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
Signed-off-by: Fredrik Adelöw <freben@spotify.com>
This commit is contained in:
Fredrik Adelöw
2026-03-28 17:48:33 +01:00
parent 0cb1189130
commit 8e9679b3eb
3 changed files with 126 additions and 100 deletions
+5
View File
@@ -0,0 +1,5 @@
---
'@backstage/repo-tools': patch
---
Parallelized CLI report generation, reducing wall-clock time by ~4x.
@@ -19,7 +19,9 @@ import {
resolve as resolvePath,
relative as relativePath,
} from 'node:path';
import os from 'node:os';
import fs from 'fs-extra';
import pLimit from 'p-limit';
import { createBinRunner } from '../../util';
import { CliHelpPage, CliModel } from './types';
import { targetPaths } from '@backstage/cli-common';
@@ -92,11 +94,12 @@ function parseHelpPage(helpPageContent: string) {
async function exploreCliHelpPages(
run: (...args: string[]) => Promise<string>,
limit: pLimit.Limit,
): Promise<CliHelpPage[]> {
const helpPages = new Array<CliHelpPage>();
async function exploreHelpPage(...path: string[]) {
const content = await run(...path, '--help');
const content = await limit(() => run(...path, '--help'));
const parsed = parseHelpPage(content);
helpPages.push({ path, ...parsed });
@@ -126,73 +129,79 @@ export async function runCliExtraction({
packageDirs,
isLocalBuild,
}: CliExtractionOptions) {
for (const packageDir of packageDirs) {
console.log(`## Processing ${packageDir}`);
const fullDir = targetPaths.resolveRoot(packageDir);
const pkgJson = await fs.readJson(resolvePath(fullDir, 'package.json'));
// Share a single concurrency limiter across all packages so that we run
// multiple help-page invocations in parallel without exhausting resources.
const limit = pLimit(os.cpus().length);
if (!pkgJson.bin) {
if (pkgJson.backstage?.role === 'cli') {
throw new Error(
`CLI package ${pkgJson.name} is missing a "bin" field in its package.json`,
);
await Promise.all(
packageDirs.map(async packageDir => {
console.log(`## Processing ${packageDir}`);
const fullDir = targetPaths.resolveRoot(packageDir);
const pkgJson = await fs.readJson(resolvePath(fullDir, 'package.json'));
if (!pkgJson.bin) {
if (pkgJson.backstage?.role === 'cli') {
throw new Error(
`CLI package ${pkgJson.name} is missing a "bin" field in its package.json`,
);
}
return;
}
continue;
}
const models = new Array<CliModel>();
if (typeof pkgJson.bin === 'string') {
const run = createBinRunner(fullDir, pkgJson.bin);
const helpPages = await exploreCliHelpPages(run);
models.push({ name: basename(pkgJson.bin), helpPages });
} else {
for (const [name, path] of Object.entries<string>(pkgJson.bin)) {
const run = createBinRunner(fullDir, path);
const helpPages = await exploreCliHelpPages(run);
models.push({ name, helpPages });
}
}
for (const model of models) {
const report = generateCliReport({ packageName: pkgJson.name, model });
const reportPath = resolvePath(
fullDir,
`cli-report.${models.length === 1 ? '' : `${model.name}.`}md`,
);
const existingReport = await fs
.readFile(reportPath, 'utf8')
.catch(error => {
if (error.code === 'ENOENT') {
return undefined;
}
throw error;
});
if (existingReport !== report) {
if (isLocalBuild) {
console.warn(`CLI report changed for ${packageDir}`);
await fs.writeFile(reportPath, report);
} else {
logApiReportInstructions();
if (existingReport) {
console.log('');
console.log(
`The conflicting file is ${relativePath(
targetPaths.rootDir,
reportPath,
)}, expecting the following content:`,
);
console.log('');
console.log(report);
logApiReportInstructions();
}
throw new Error(`CLI report changed for ${packageDir}, `);
const models = new Array<CliModel>();
if (typeof pkgJson.bin === 'string') {
const run = createBinRunner(fullDir, pkgJson.bin);
const helpPages = await exploreCliHelpPages(run, limit);
models.push({ name: basename(pkgJson.bin), helpPages });
} else {
for (const [name, path] of Object.entries<string>(pkgJson.bin)) {
const run = createBinRunner(fullDir, path);
const helpPages = await exploreCliHelpPages(run, limit);
models.push({ name, helpPages });
}
}
}
}
for (const model of models) {
const report = generateCliReport({ packageName: pkgJson.name, model });
const reportPath = resolvePath(
fullDir,
`cli-report.${models.length === 1 ? '' : `${model.name}.`}md`,
);
const existingReport = await fs
.readFile(reportPath, 'utf8')
.catch(error => {
if (error.code === 'ENOENT') {
return undefined;
}
throw error;
});
if (existingReport !== report) {
if (isLocalBuild) {
console.warn(`CLI report changed for ${packageDir}`);
await fs.writeFile(reportPath, report);
} else {
logApiReportInstructions();
if (existingReport) {
console.log('');
console.log(
`The conflicting file is ${relativePath(
targetPaths.rootDir,
reportPath,
)}, expecting the following content:`,
);
console.log('');
console.log(report);
logApiReportInstructions();
}
throw new Error(`CLI report changed for ${packageDir}, `);
}
}
}
}),
);
}
+47 -35
View File
@@ -14,7 +14,7 @@
* limitations under the License.
*/
import { spawnSync } from 'node:child_process';
import { spawn } from 'node:child_process';
import { randomUUID } from 'node:crypto';
import { openSync, closeSync, readFileSync, unlinkSync } from 'node:fs';
import { tmpdir } from 'node:os';
@@ -24,53 +24,65 @@ import { join } from 'node:path';
const ansiPattern = new RegExp(`${String.fromCharCode(0x1b)}\\[[0-9;]*m`, 'g');
/**
* Redirect stdout to a temp file so that Node.js creates a SyncWriteStream
* (synchronous writes) in the child instead of an async pipe stream. This
* prevents data loss when child processes call process.exit() before the
* async stream buffer has been flushed.
* Redirect stdout to a temp file so that output is captured reliably even when
* child processes call process.exit() before async buffers have been flushed.
*
* Uses spawnSync which blocks the event loop, so no concurrency limiter is
* needed — each call naturally runs sequentially.
* Uses async spawn so that multiple invocations can run concurrently when
* combined with a concurrency limiter.
*/
export function createBinRunner(cwd: string, path: string) {
return async (...command: string[]) => {
const args = path ? [path, ...command] : command;
const outPath = join(tmpdir(), `backstage-cli-out-${randomUUID()}.txt`);
const outFd = openSync(outPath, 'w');
return (...command: string[]) => {
return new Promise<string>((resolve, reject) => {
const args = path ? [path, ...command] : command;
const outPath = join(tmpdir(), `backstage-cli-out-${randomUUID()}.txt`);
const outFd = openSync(outPath, 'w');
try {
const result = spawnSync('node', args, {
const child = spawn('node', args, {
cwd,
env: { ...process.env, NO_COLOR: '1' },
stdio: ['ignore', outFd, 'pipe'],
});
// The fd is duplicated by the OS for the child process, so we can
// close our copy immediately after spawn.
closeSync(outFd);
const stdout = readFileSync(outPath, 'utf8').replace(ansiPattern, '');
if (result.error) {
throw new Error(`Process error: ${result.error.message}`);
}
const stderrChunks: Buffer[] = [];
child.stderr?.on('data', chunk => stderrChunks.push(chunk));
const stderr = result.stderr?.toString() ?? '';
child.on('error', err => {
try {
unlinkSync(outPath);
} catch {
/* ignore cleanup errors */
}
reject(new Error(`Process error: ${err.message}`));
});
if (result.signal) {
throw new Error(
`Process was killed with signal ${result.signal}\n${stderr}`,
);
} else if (result.status !== 0) {
throw new Error(`Process exited with code ${result.status}\n${stderr}`);
} else if (stderr.trim()) {
throw new Error(`Command printed error output: ${stderr}`);
}
child.on('close', (code, signal) => {
try {
const stdout = readFileSync(outPath, 'utf8').replace(ansiPattern, '');
const stderr = Buffer.concat(stderrChunks).toString();
return stdout;
} finally {
try {
unlinkSync(outPath);
} catch {
/* ignore cleanup errors */
}
}
if (signal) {
reject(
new Error(`Process was killed with signal ${signal}\n${stderr}`),
);
} else if (code !== 0) {
reject(new Error(`Process exited with code ${code}\n${stderr}`));
} else if (stderr.trim()) {
reject(new Error(`Command printed error output: ${stderr}`));
} else {
resolve(stdout);
}
} finally {
try {
unlinkSync(outPath);
} catch {
/* ignore cleanup errors */
}
}
});
});
};
}