chunk up refs in getEntitiesByRefs

Signed-off-by: Fredrik Adelöw <freben@gmail.com>
This commit is contained in:
Fredrik Adelöw
2024-10-16 09:52:03 +02:00
parent 605bdc01cf
commit 31c4fe0c17
4 changed files with 219 additions and 20 deletions
+5
View File
@@ -0,0 +1,5 @@
---
'@backstage/catalog-client': minor
---
The client now automatically splits up very large `getEntitiesByRefs` calls into several smaller requests behind the scenes when needed. This ensures that each individual request does not exceed common Express.js request body limits or overload the server.
+26 -20
View File
@@ -41,7 +41,7 @@ import {
QueryEntitiesResponse,
ValidateEntityResponse,
} from './types/api';
import { isQueryEntitiesInitialRequest } from './utils';
import { isQueryEntitiesInitialRequest, splitRefsIntoChunks } from './utils';
import { DefaultApiClient, TypedResponse } from './generated';
/**
@@ -151,28 +151,34 @@ export class CatalogClient implements CatalogApi {
request: GetEntitiesByRefsRequest,
options?: CatalogRequestOptions,
): Promise<GetEntitiesByRefsResponse> {
const response = await this.apiClient.getEntitiesByRefs(
{
body: {
entityRefs: request.entityRefs,
fields: request.fields,
const getOneChunk = async (refs: string[]) => {
const response = await this.apiClient.getEntitiesByRefs(
{
body: { entityRefs: refs, fields: request.fields },
query: { filter: this.getFilterValue(request.filter) },
},
query: {
filter: this.getFilterValue(request.filter),
},
},
options,
);
if (!response.ok) {
throw await ResponseError.fromResponse(response);
}
const { items } = (await response.json()) as {
items: Array<Entity | null>;
options,
);
if (!response.ok) {
throw await ResponseError.fromResponse(response);
}
const body = (await response.json()) as {
items: Array<Entity | null>;
};
return body.items.map(i => i ?? undefined);
};
return { items: items.map(i => i ?? undefined) };
let result: Array<Entity | undefined> | undefined;
for (const refs of splitRefsIntoChunks(request.entityRefs)) {
const entities = await getOneChunk(refs);
if (!result) {
result = entities;
} else {
result.push(...entities);
}
}
return { items: result ?? [] };
}
/**
+124
View File
@@ -0,0 +1,124 @@
/*
* Copyright 2024 The Backstage Authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { splitRefsIntoChunks } from './utils';
describe('splitRefsIntoChunks', () => {
it('splits by count limit', () => {
expect(
splitRefsIntoChunks(['a', 'b', 'c', 'd'], {
maxCountPerChunk: 0,
}),
).toEqual([['a'], ['b'], ['c'], ['d']]);
expect(
splitRefsIntoChunks(['a', 'b', 'c', 'd'], {
maxCountPerChunk: 1,
}),
).toEqual([['a'], ['b'], ['c'], ['d']]);
expect(
splitRefsIntoChunks(['a', 'b', 'c', 'd'], {
maxCountPerChunk: 2,
}),
).toEqual([
['a', 'b'],
['c', 'd'],
]);
expect(
splitRefsIntoChunks(['a', 'b', 'c', 'd'], {
maxCountPerChunk: 3,
}),
).toEqual([['a', 'b', 'c'], ['d']]);
expect(
splitRefsIntoChunks(['a', 'b', 'c', 'd'], {
maxCountPerChunk: 4,
}),
).toEqual([['a', 'b', 'c', 'd']]);
expect(
splitRefsIntoChunks(['a', 'b', 'c', 'd'], {
maxCountPerChunk: 5,
}),
).toEqual([['a', 'b', 'c', 'd']]);
expect(
splitRefsIntoChunks(['a', 'b', 'c', 'd'], {
maxCountPerChunk: 5,
maxStringLengthPerChunk: 3, // the stricter limit now
extraStringLengthPerRef: 0,
}),
).toEqual([['a', 'b', 'c'], ['d']]);
});
it('splits by length limit', () => {
expect(
splitRefsIntoChunks(['aa', 'b', 'c'], {
maxStringLengthPerChunk: 0,
extraStringLengthPerRef: 0,
}),
).toEqual([['aa'], ['b'], ['c']]);
expect(
splitRefsIntoChunks(['aa', 'b', 'c'], {
maxStringLengthPerChunk: 1,
extraStringLengthPerRef: 0,
}),
).toEqual([['aa'], ['b'], ['c']]);
expect(
splitRefsIntoChunks(['aa', 'b', 'c'], {
maxStringLengthPerChunk: 2,
extraStringLengthPerRef: 0,
}),
).toEqual([['aa'], ['b', 'c']]);
expect(
splitRefsIntoChunks(['aa', 'b', 'c'], {
maxStringLengthPerChunk: 3,
extraStringLengthPerRef: 0,
}),
).toEqual([['aa', 'b'], ['c']]);
expect(
splitRefsIntoChunks(['aa', 'b', 'c'], {
maxStringLengthPerChunk: 3,
extraStringLengthPerRef: 0,
maxCountPerChunk: 1, // the stricter limit now
}),
).toEqual([['aa'], ['b'], ['c']]);
});
it('splits while the extra length is taken into account', () => {
expect(
splitRefsIntoChunks(['aaa', 'bbb', 'ccc'], {
maxStringLengthPerChunk: 9,
extraStringLengthPerRef: 0,
}),
).toEqual([['aaa', 'bbb', 'ccc']]);
expect(
splitRefsIntoChunks(['aaa', 'bbb', 'ccc'], {
maxStringLengthPerChunk: 9,
extraStringLengthPerRef: 1,
}),
).toEqual([['aaa', 'bbb'], ['ccc']]);
expect(
splitRefsIntoChunks(['aaa', 'bbb', 'ccc'], {
maxStringLengthPerChunk: 9,
extraStringLengthPerRef: 2,
}),
).toEqual([['aaa'], ['bbb'], ['ccc']]);
expect(
splitRefsIntoChunks(['aaa', 'bbb', 'ccc'], {
maxStringLengthPerChunk: 9,
extraStringLengthPerRef: 0,
maxCountPerChunk: 2, // the stricter limit now
}),
).toEqual([['aaa', 'bbb'], ['ccc']]);
});
});
+64
View File
@@ -24,3 +24,67 @@ export function isQueryEntitiesInitialRequest(
): request is QueryEntitiesInitialRequest {
return !(request as QueryEntitiesCursorRequest).cursor;
}
/**
* Takes a set of entity refs, and splits them into chunks (groups) such that
* the total string length in each chunk does not exceed the default Express.js
* request body limit of 100 kB (with some margin) when JSON encoded as an
* array.
*/
export function splitRefsIntoChunks(
refs: string[],
options?: {
// No chunk has more than this many refs, no matter what
maxCountPerChunk?: number;
// The total string length (taking the extraStringLengthPerRef into account)
// of each chunk never exceeds this many characters, no matter what
maxStringLengthPerChunk?: number;
// Add this many characters to the length of each ref when calculating
// (default is 3, since eacn array entry is surrounded by quotes and a
// comma)
extraStringLengthPerRef?: number;
},
): string[][] {
if (!refs.length) {
return [];
}
const {
maxCountPerChunk = 1000,
maxStringLengthPerChunk = 90 * 2 ** 10,
extraStringLengthPerRef = 3,
} = options ?? {};
const chunks: string[][] = [];
let currentChunkStart = 0;
let currentChunkStringLength = 0;
let currentChunkSize = 0;
for (let i = 0; i < refs.length; ++i) {
const refLength = refs[i].length + extraStringLengthPerRef;
// always allow at least one element per chunk even in abnormal situations
if (currentChunkSize > 0) {
// emit chunk and start over if either the string length or the count
// limit would be reached
if (
currentChunkStringLength + refLength > maxStringLengthPerChunk ||
currentChunkSize + 1 > maxCountPerChunk
) {
chunks.push(refs.slice(currentChunkStart, i));
currentChunkStart = i;
currentChunkStringLength = 0;
currentChunkSize = 0;
}
}
currentChunkStringLength += refLength;
currentChunkSize += 1;
}
// emit whatever is left as the last chunk
chunks.push(refs.slice(currentChunkStart, refs.length));
return chunks;
}