Merge commit from fork

* chore: ensure redirects are validated against the reading config

* address comments

Signed-off-by: Fredrik Adelöw <freben@gmail.com>

---------

Signed-off-by: Fredrik Adelöw <freben@gmail.com>
Co-authored-by: Fredrik Adelöw <freben@gmail.com>
This commit is contained in:
Ben Lambert
2026-01-20 16:06:14 +01:00
committed by benjdlambert
parent c641c147ab
commit 27f9061d24
8 changed files with 511 additions and 69 deletions
+5
View File
@@ -0,0 +1,5 @@
---
'@backstage/backend-defaults': minor
---
**BREAKING**: The constructor for `FetchUrlReader` is now private. If you have to construct an instance of it, please use `FetchUrlReader.fromConfig` instead.
+15
View File
@@ -0,0 +1,15 @@
---
'@backstage/backend-defaults': minor
---
**BREAKING**: `coreServices.urlReader` now validates that redirect chains are subject to the allow list in `reading.allow` of your app config. If you were relying on redirects that pointed to URLs that were not allowlisted, you will now have to add those to your config as well.
Example:
```diff
backend:
reading:
allow:
- host: example.com
+ - host: storage-api.example.com
```
+6
View File
@@ -0,0 +1,6 @@
---
'@backstage/plugin-scaffolder-backend': patch
'@backstage/plugin-scaffolder-node': patch
---
REwrite]
@@ -12,6 +12,7 @@ import { AzureIntegration } from '@backstage/integration';
import { BitbucketCloudIntegration } from '@backstage/integration';
import { BitbucketIntegration } from '@backstage/integration';
import { BitbucketServerIntegration } from '@backstage/integration';
import { Config } from '@backstage/config';
import { GerritIntegration } from '@backstage/integration';
import { GiteaIntegration } from '@backstage/integration';
import { GithubCredentialsProvider } from '@backstage/integration';
@@ -225,6 +226,8 @@ export class BitbucketUrlReader implements UrlReaderService {
export class FetchUrlReader implements UrlReaderService {
static factory: ReaderFactory;
// (undocumented)
static fromConfig(config: Config): FetchUrlReader;
// (undocumented)
read(url: string): Promise<Buffer>;
// (undocumented)
readTree(): Promise<UrlReaderServiceReadTreeResponse>;
@@ -25,8 +25,6 @@ import { setupServer } from 'msw/node';
import { FetchUrlReader } from './FetchUrlReader';
import { DefaultReadTreeResponseFactory } from './tree';
const fetchUrlReader = new FetchUrlReader();
describe('FetchUrlReader', () => {
const worker = setupServer();
@@ -188,6 +186,16 @@ describe('FetchUrlReader', () => {
describe('read', () => {
it('should return etag from the response', async () => {
const fetchUrlReader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
const { buffer } = await fetchUrlReader.readUrl(
'https://backstage.io/some-resource',
);
@@ -196,12 +204,32 @@ describe('FetchUrlReader', () => {
});
it('should throw NotFound if server responds with 404', async () => {
const fetchUrlReader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
await expect(
fetchUrlReader.readUrl('https://backstage.io/not-exists'),
).rejects.toThrow(NotFoundError);
});
it('should throw Error if server responds with 500', async () => {
const fetchUrlReader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
await expect(
fetchUrlReader.readUrl('https://backstage.io/error'),
).rejects.toThrow(Error);
@@ -210,6 +238,16 @@ describe('FetchUrlReader', () => {
describe('readUrl', () => {
it('should throw NotModified if server responds with 304 from etag', async () => {
const fetchUrlReader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
await expect(
fetchUrlReader.readUrl('https://backstage.io/some-resource', {
etag: 'foo',
@@ -218,6 +256,16 @@ describe('FetchUrlReader', () => {
});
it('should throw NotModified if server responds with 304 from lastModifiedAfter', async () => {
const fetchUrlReader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
await expect(
fetchUrlReader.readUrl('https://backstage.io/some-resource', {
lastModifiedAfter: new Date('2020-01-01T00:00:00Z'),
@@ -238,6 +286,16 @@ describe('FetchUrlReader', () => {
),
);
const fetchUrlReader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
await fetchUrlReader.readUrl(
'https://backstage.io/requires-authentication',
{
@@ -247,6 +305,16 @@ describe('FetchUrlReader', () => {
});
it('should return etag from the response', async () => {
const fetchUrlReader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
const response = await fetchUrlReader.readUrl(
'https://backstage.io/some-resource',
);
@@ -255,20 +323,300 @@ describe('FetchUrlReader', () => {
});
it('should throw NotFound if server responds with 404', async () => {
const fetchUrlReader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
await expect(
fetchUrlReader.readUrl('https://backstage.io/not-exists'),
).rejects.toThrow(NotFoundError);
});
it('should throw Error if server responds with 500', async () => {
const fetchUrlReader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
await expect(
fetchUrlReader.readUrl('https://backstage.io/error'),
).rejects.toThrow(Error);
});
it('should block redirects to disallowed hosts to prevent SSRF', async () => {
const reader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
worker.use(
rest.get('https://backstage.io/redirect', (_req, res, ctx) => {
return res(
ctx.status(302),
ctx.set('location', 'https://evil.com/steal-data'),
);
}),
);
await expect(
reader.readUrl('https://backstage.io/redirect'),
).rejects.toThrow(/not allowed/);
});
it('should allow redirects to allowed hosts', async () => {
const reader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
worker.use(
rest.get('https://backstage.io/redirect', (_req, res, ctx) => {
return res(
ctx.status(302),
ctx.set('location', 'https://backstage.io/some-resource'),
);
}),
);
const response = await reader.readUrl('https://backstage.io/redirect');
expect((await response.buffer()).toString()).toBe('content foo');
});
it('should block initial requests to disallowed hosts', async () => {
const reader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
await expect(
reader.readUrl('https://evil.com/steal-data'),
).rejects.toThrow(/not allowed/);
});
it('should handle relative redirect locations', async () => {
const reader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
worker.use(
rest.get('https://backstage.io/old-path', (_req, res, ctx) => {
return res(ctx.status(301), ctx.set('location', '/some-resource'));
}),
);
const response = await reader.readUrl('https://backstage.io/old-path');
expect((await response.buffer()).toString()).toBe('content foo');
});
it('should handle relative redirect locations with ../', async () => {
const reader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
worker.use(
rest.get('https://backstage.io/deep/nested/path', (_req, res, ctx) => {
return res(
ctx.status(302),
ctx.set('location', '../../some-resource'),
);
}),
);
const response = await reader.readUrl(
'https://backstage.io/deep/nested/path',
);
expect((await response.buffer()).toString()).toBe('content foo');
});
it('should follow multiple redirect hops', async () => {
const reader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
worker.use(
rest.get('https://backstage.io/hop1', (_req, res, ctx) => {
return res(ctx.status(302), ctx.set('location', '/hop2'));
}),
rest.get('https://backstage.io/hop2', (_req, res, ctx) => {
return res(ctx.status(302), ctx.set('location', '/hop3'));
}),
rest.get('https://backstage.io/hop3', (_req, res, ctx) => {
return res(ctx.status(302), ctx.set('location', '/some-resource'));
}),
);
const response = await reader.readUrl('https://backstage.io/hop1');
expect((await response.buffer()).toString()).toBe('content foo');
});
it('should reject when max redirects exceeded', async () => {
const reader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
// Create a chain of 6 redirects (exceeds MAX_REDIRECTS of 5)
worker.use(
rest.get('https://backstage.io/loop0', (_req, res, ctx) => {
return res(ctx.status(302), ctx.set('location', '/loop1'));
}),
rest.get('https://backstage.io/loop1', (_req, res, ctx) => {
return res(ctx.status(302), ctx.set('location', '/loop2'));
}),
rest.get('https://backstage.io/loop2', (_req, res, ctx) => {
return res(ctx.status(302), ctx.set('location', '/loop3'));
}),
rest.get('https://backstage.io/loop3', (_req, res, ctx) => {
return res(ctx.status(302), ctx.set('location', '/loop4'));
}),
rest.get('https://backstage.io/loop4', (_req, res, ctx) => {
return res(ctx.status(302), ctx.set('location', '/loop5'));
}),
rest.get('https://backstage.io/loop5', (_req, res, ctx) => {
return res(ctx.status(302), ctx.set('location', '/loop6'));
}),
);
await expect(
reader.readUrl('https://backstage.io/loop0'),
).rejects.toThrow(/Too many redirects/);
});
it('should block redirect to disallowed host mid-chain', async () => {
const reader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
worker.use(
rest.get('https://backstage.io/hop1', (_req, res, ctx) => {
return res(ctx.status(302), ctx.set('location', '/hop2'));
}),
rest.get('https://backstage.io/hop2', (_req, res, ctx) => {
return res(
ctx.status(302),
ctx.set('location', 'https://evil.com/steal'),
);
}),
);
await expect(reader.readUrl('https://backstage.io/hop1')).rejects.toThrow(
/not allowed/,
);
});
it('should handle 307 and 308 redirects', async () => {
const reader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
worker.use(
rest.get('https://backstage.io/temp-redirect', (_req, res, ctx) => {
return res(ctx.status(307), ctx.set('location', '/some-resource'));
}),
);
const response = await reader.readUrl(
'https://backstage.io/temp-redirect',
);
expect((await response.buffer()).toString()).toBe('content foo');
});
it('should validate paths in redirect targets', async () => {
const reader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io', paths: ['/allowed/'] }],
},
},
}),
);
worker.use(
rest.get('https://backstage.io/allowed/start', (_req, res, ctx) => {
return res(ctx.status(302), ctx.set('location', '/forbidden/path'));
}),
);
await expect(
reader.readUrl('https://backstage.io/allowed/start'),
).rejects.toThrow(/not allowed/);
});
});
describe('search', () => {
it('should return a file', async () => {
const fetchUrlReader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
const data = await fetchUrlReader.search(
`https://backstage.io/some-resource`,
{ etag: 'etag' },
@@ -280,6 +628,16 @@ describe('FetchUrlReader', () => {
});
it('should return an empty list of file if not found', async () => {
const fetchUrlReader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
const data = await fetchUrlReader.search(
`https://backstage.io/not-exists`,
{ etag: 'etag' },
@@ -289,6 +647,16 @@ describe('FetchUrlReader', () => {
});
it('throws if given URL with wildcard', async () => {
const fetchUrlReader = FetchUrlReader.fromConfig(
new ConfigReader({
backend: {
reading: {
allow: [{ host: 'backstage.io' }],
},
},
}),
);
await expect(
fetchUrlReader.search(`https://backstage.io/some-resource*`, {
etag: 'etag',
@@ -30,6 +30,10 @@ import {
import { ReaderFactory } from './types';
import path from 'path';
import { ReadUrlResponseFactory } from './ReadUrlResponseFactory';
import { Config } from '@backstage/config';
const REDIRECT_STATUS_CODES = [301, 302, 307, 308];
const MAX_REDIRECTS = 5;
const isInRange = (num: number, [start, end]: [number, number]) => {
return num >= start && num <= end;
@@ -66,6 +70,37 @@ const parsePortPredicate = (port: string | undefined) => {
return (url: URL) => !url.port;
};
function predicateFromConfig(config: Config): (url: URL) => boolean {
const allow = config
.getOptionalConfigArray('backend.reading.allow')
?.map(allowConfig => {
const paths = allowConfig.getOptionalStringArray('paths');
const checkPath = paths
? (url: URL) => {
const targetPath = path.posix.normalize(url.pathname);
return paths.some(allowedPath =>
targetPath.startsWith(allowedPath),
);
}
: (_url: URL) => true;
const host = allowConfig.getString('host');
const [hostname, port] = host.split(':');
const checkPort = parsePortPredicate(port);
if (hostname.startsWith('*.')) {
const suffix = hostname.slice(1);
return (url: URL) =>
url.hostname.endsWith(suffix) && checkPath(url) && checkPort(url);
}
return (url: URL) =>
url.hostname === hostname && checkPath(url) && checkPort(url);
});
return allow?.length ? url => allow.some(p => p(url)) : () => false;
}
/**
* A {@link @backstage/backend-plugin-api#UrlReaderService} that does a plain fetch of the URL.
*
@@ -85,38 +120,21 @@ export class FetchUrlReader implements UrlReaderService {
* An optional list of paths which are allowed. If the list is omitted all paths are allowed.
*/
static factory: ReaderFactory = ({ config }) => {
const predicates =
config
.getOptionalConfigArray('backend.reading.allow')
?.map(allowConfig => {
const paths = allowConfig.getOptionalStringArray('paths');
const checkPath = paths
? (url: URL) => {
const targetPath = path.posix.normalize(url.pathname);
return paths.some(allowedPath =>
targetPath.startsWith(allowedPath),
);
}
: (_url: URL) => true;
const host = allowConfig.getString('host');
const [hostname, port] = host.split(':');
const checkPort = parsePortPredicate(port);
if (hostname.startsWith('*.')) {
const suffix = hostname.slice(1);
return (url: URL) =>
url.hostname.endsWith(suffix) && checkPath(url) && checkPort(url);
}
return (url: URL) =>
url.hostname === hostname && checkPath(url) && checkPort(url);
}) ?? [];
const reader = new FetchUrlReader();
const predicate = (url: URL) => predicates.some(p => p(url));
const predicate = predicateFromConfig(config);
const reader = new FetchUrlReader({ predicate });
return [{ reader, predicate }];
};
static fromConfig(config: Config): FetchUrlReader {
return new FetchUrlReader({ predicate: predicateFromConfig(config) });
}
readonly #predicate: (url: URL) => boolean;
private constructor(options: { predicate: (url: URL) => boolean }) {
this.#predicate = options.predicate;
}
async read(url: string): Promise<Buffer> {
const response = await this.readUrl(url);
return response.buffer();
@@ -126,41 +144,69 @@ export class FetchUrlReader implements UrlReaderService {
url: string,
options?: UrlReaderServiceReadUrlOptions,
): Promise<UrlReaderServiceReadUrlResponse> {
let response: Response;
try {
response = await fetch(url, {
headers: {
...(options?.etag && { 'If-None-Match': options.etag }),
...(options?.lastModifiedAfter && {
'If-Modified-Since': options.lastModifiedAfter.toUTCString(),
}),
...(options?.token && { Authorization: `Bearer ${options.token}` }),
},
// TODO(freben): The signal cast is there because pre-3.x versions of
// node-fetch have a very slightly deviating AbortSignal type signature.
// The difference does not affect us in practice however. The cast can
// be removed after we support ESM for CLI dependencies and migrate to
// version 3 of node-fetch.
// https://github.com/backstage/backstage/issues/8242
signal: options?.signal as any,
});
} catch (e) {
throw new Error(`Unable to read ${url}, ${e}`);
let currentUrl = url;
for (
let redirectCount = 0;
redirectCount < MAX_REDIRECTS;
redirectCount += 1
) {
// Validate URL against predicate if configured
const parsedUrl = new URL(currentUrl);
if (!this.#predicate(parsedUrl)) {
throw new Error(
`URL not allowed by backend.reading.allow configuration: ${currentUrl}`,
);
}
let response: Response;
try {
response = await fetch(currentUrl, {
headers: {
...(options?.etag && { 'If-None-Match': options.etag }),
...(options?.lastModifiedAfter && {
'If-Modified-Since': options.lastModifiedAfter.toUTCString(),
}),
...(options?.token && { Authorization: `Bearer ${options.token}` }),
},
// Handle redirects manually to validate targets against the allowlist
redirect: 'manual',
// TODO(freben): The signal cast is there because pre-3.x versions of
// node-fetch have a very slightly deviating AbortSignal type signature.
// The difference does not affect us in practice however. The cast can
// be removed after we support ESM for CLI dependencies and migrate to
// version 3 of node-fetch.
// https://github.com/backstage/backstage/issues/8242
signal: options?.signal as any,
});
} catch (e) {
throw new Error(`Unable to read ${currentUrl}, ${e}`);
}
if (response.ok) {
return ReadUrlResponseFactory.fromResponse(response);
}
if (response.status === 304) {
throw new NotModifiedError();
}
const location = response.headers.get('location');
if (!REDIRECT_STATUS_CODES.includes(response.status) || !location) {
const message = `could not read ${currentUrl}, ${response.status} ${response.statusText}`;
if (response.status === 404) {
throw new NotFoundError(message);
}
throw new Error(message);
}
// Follow the redirect
currentUrl = new URL(location, currentUrl).toString();
}
if (response.status === 304) {
throw new NotModifiedError();
}
if (response.ok) {
return ReadUrlResponseFactory.fromResponse(response);
}
const message = `could not read ${url}, ${response.status} ${response.statusText}`;
if (response.status === 404) {
throw new NotFoundError(message);
}
throw new Error(message);
throw new Error(
`Too many redirects (max ${MAX_REDIRECTS}) when reading ${url}`,
);
}
async readTree(): Promise<UrlReaderServiceReadTreeResponse> {
@@ -16,10 +16,7 @@
import { createTemplateAction } from '@backstage/plugin-scaffolder-node';
import { InputError } from '@backstage/errors';
import {
isChildPath,
resolveSafeChildPath,
} from '@backstage/backend-plugin-api';
import { resolveSafeChildPath } from '@backstage/backend-plugin-api';
import fs from 'fs-extra';
import globby from 'globby';
import { examples } from './delete.examples';
+3 -1
View File
@@ -53,7 +53,9 @@ export async function fetchContents(options: {
if (!fetchUrlIsAbsolute && baseUrl?.startsWith('file://')) {
const basePath = baseUrl.slice('file://'.length);
const srcDir = resolveSafeChildPath(path.dirname(basePath), fetchUrl);
await fs.copy(srcDir, outputPath, { filter: src => isChildPath(srcDir, src) });
await fs.copy(srcDir, outputPath, {
filter: src => isChildPath(srcDir, src),
});
} else {
const readUrl = getReadUrl(fetchUrl, baseUrl, integrations);