catalog-backend: optimize entity filter queries with EXISTS
Switch filter query builders from IN (subquery) to EXISTS (correlated subquery) patterns. This enables PostgreSQL semi-join optimizations (stops at first match) and replaces NOT IN with NOT EXISTS (faster, no NULL-semantics pitfalls). Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com> Signed-off-by: Fredrik Adelöw <freben@spotify.com>
This commit is contained in:
@@ -0,0 +1,5 @@
|
||||
---
|
||||
'@backstage/plugin-catalog-backend': patch
|
||||
---
|
||||
|
||||
Improved catalog entity filter query performance by switching from `IN (subquery)` to `EXISTS (correlated subquery)` patterns. This enables PostgreSQL semi-join optimizations and fixes `NOT IN` NULL-semantics pitfalls by using `NOT EXISTS` instead.
|
||||
@@ -20,9 +20,11 @@ import {
|
||||
} from '@backstage/plugin-catalog-node';
|
||||
import { FilterPredicate } from '@backstage/filter-predicates';
|
||||
import { Knex } from 'knex';
|
||||
import { DbSearchRow } from '../../database/tables';
|
||||
import { applyPredicateEntityFilterToQuery } from './applyPredicateEntityFilterToQuery';
|
||||
|
||||
// Alias used for the search table in EXISTS subqueries
|
||||
const S = 'search_flt';
|
||||
|
||||
function isEntitiesSearchFilter(
|
||||
filter: EntitiesSearchFilter | EntityFilter,
|
||||
): filter is EntitiesSearchFilter {
|
||||
@@ -82,21 +84,20 @@ function applyInStrategy(
|
||||
if (isEntitiesSearchFilter(filter)) {
|
||||
const key = filter.key.toLowerCase();
|
||||
const values = filter.values?.map(v => v.toLowerCase());
|
||||
const matchQuery = knex<DbSearchRow>('search')
|
||||
.select('search.entity_id')
|
||||
.where({ key })
|
||||
const subquery = knex(`search as ${S}`)
|
||||
.select(knex.raw('1'))
|
||||
.whereRaw('?? = ??', [`${S}.entity_id`, onEntityIdField])
|
||||
.where(`${S}.key`, key)
|
||||
.andWhere(function keyFilter() {
|
||||
if (values?.length === 1) {
|
||||
this.where({ value: values.at(0) });
|
||||
this.where(`${S}.value`, values.at(0));
|
||||
} else if (values) {
|
||||
this.andWhere('value', 'in', values);
|
||||
this.whereIn(`${S}.value`, values);
|
||||
}
|
||||
});
|
||||
return targetQuery.andWhere(
|
||||
onEntityIdField,
|
||||
negate ? 'not in' : 'in',
|
||||
matchQuery,
|
||||
);
|
||||
return negate
|
||||
? targetQuery.whereNotExists(subquery)
|
||||
: targetQuery.whereExists(subquery);
|
||||
}
|
||||
|
||||
return targetQuery[negate ? 'andWhereNot' : 'andWhere'](
|
||||
|
||||
@@ -21,7 +21,6 @@ import {
|
||||
} from '@backstage/filter-predicates';
|
||||
import { InputError } from '@backstage/errors';
|
||||
import { Knex } from 'knex';
|
||||
import { DbSearchRow } from '../../database/tables';
|
||||
|
||||
function isPrimitive(value: unknown): value is FilterPredicatePrimitive {
|
||||
return (
|
||||
@@ -35,6 +34,20 @@ function isObject(value: unknown): value is Record<string, unknown> {
|
||||
return typeof value === 'object' && value !== null && !Array.isArray(value);
|
||||
}
|
||||
|
||||
// Alias used for the search table in EXISTS subqueries, to avoid ambiguity
|
||||
// when the outer query is also on the search table (e.g. facets queries).
|
||||
const S = 'search_flt';
|
||||
|
||||
/**
|
||||
* Creates an EXISTS subquery base against the search table, correlated on
|
||||
* entity_id with the outer query's entity id field.
|
||||
*/
|
||||
function searchExists(knex: Knex, onEntityIdField: string): Knex.QueryBuilder {
|
||||
return knex(`search as ${S}`)
|
||||
.select(knex.raw('1'))
|
||||
.whereRaw('?? = ??', [`${S}.entity_id`, onEntityIdField]);
|
||||
}
|
||||
|
||||
export function applyPredicateEntityFilterToQuery(options: {
|
||||
filter: FilterPredicate;
|
||||
targetQuery: Knex.QueryBuilder;
|
||||
@@ -128,44 +141,45 @@ function applyFieldCondition(options: {
|
||||
const { key, value, targetQuery, onEntityIdField, knex } = options;
|
||||
|
||||
if (isPrimitive(value)) {
|
||||
const matchQuery = knex<DbSearchRow>('search')
|
||||
.select('search.entity_id')
|
||||
.where({
|
||||
key,
|
||||
value: String(value).toLocaleLowerCase('en-US'),
|
||||
});
|
||||
return targetQuery.andWhere(onEntityIdField, 'in', matchQuery);
|
||||
return targetQuery.whereExists(
|
||||
searchExists(knex, onEntityIdField)
|
||||
.where(`${S}.key`, key)
|
||||
.where(`${S}.value`, String(value).toLocaleLowerCase('en-US')),
|
||||
);
|
||||
}
|
||||
|
||||
if (isObject(value)) {
|
||||
if ('$exists' in value) {
|
||||
const existsQuery = knex<DbSearchRow>('search')
|
||||
.select('search.entity_id')
|
||||
.where({ key });
|
||||
return targetQuery.andWhere(
|
||||
onEntityIdField,
|
||||
value.$exists ? 'in' : 'not in',
|
||||
existsQuery,
|
||||
const subquery = searchExists(knex, onEntityIdField).where(
|
||||
`${S}.key`,
|
||||
key,
|
||||
);
|
||||
return value.$exists
|
||||
? targetQuery.whereExists(subquery)
|
||||
: targetQuery.whereNotExists(subquery);
|
||||
}
|
||||
|
||||
if ('$in' in value) {
|
||||
const values = value.$in.map(v => String(v).toLocaleLowerCase('en-US'));
|
||||
const matchQuery = knex<DbSearchRow>('search')
|
||||
.select('search.entity_id')
|
||||
.where({ key })
|
||||
.whereIn('value', values);
|
||||
return targetQuery.andWhere(onEntityIdField, 'in', matchQuery);
|
||||
return targetQuery.whereExists(
|
||||
searchExists(knex, onEntityIdField)
|
||||
.where(`${S}.key`, key)
|
||||
.whereIn(`${S}.value`, values),
|
||||
);
|
||||
}
|
||||
|
||||
if ('$hasPrefix' in value) {
|
||||
const prefix = value.$hasPrefix.toLocaleLowerCase('en-US');
|
||||
const escaped = prefix.replace(/[%_\\]/g, c => `\\${c}`);
|
||||
const matchQuery = knex<DbSearchRow>('search')
|
||||
.select('search.entity_id')
|
||||
.where({ key })
|
||||
.andWhereRaw('?? like ? escape ?', ['value', `${escaped}%`, '\\']);
|
||||
return targetQuery.andWhere(onEntityIdField, 'in', matchQuery);
|
||||
return targetQuery.whereExists(
|
||||
searchExists(knex, onEntityIdField)
|
||||
.where(`${S}.key`, key)
|
||||
.andWhereRaw('?? like ? escape ?', [
|
||||
`${S}.value`,
|
||||
`${escaped}%`,
|
||||
'\\',
|
||||
]),
|
||||
);
|
||||
}
|
||||
|
||||
if ('$contains' in value) {
|
||||
@@ -182,13 +196,11 @@ function applyFieldCondition(options: {
|
||||
// "b" key with a primitive value. We'll consider that an acceptable
|
||||
// tradeoff though.
|
||||
if (isPrimitive(target)) {
|
||||
const matchQuery = knex<DbSearchRow>('search')
|
||||
.select('search.entity_id')
|
||||
.where({
|
||||
key,
|
||||
value: String(target).toLocaleLowerCase('en-US'),
|
||||
});
|
||||
return targetQuery.andWhere(onEntityIdField, 'in', matchQuery);
|
||||
return targetQuery.whereExists(
|
||||
searchExists(knex, onEntityIdField)
|
||||
.where(`${S}.key`, key)
|
||||
.where(`${S}.value`, String(target).toLocaleLowerCase('en-US')),
|
||||
);
|
||||
}
|
||||
|
||||
// Object form of $contains - currently only supports relation-style
|
||||
@@ -317,13 +329,14 @@ function applyContainsRelation(options: {
|
||||
);
|
||||
}
|
||||
|
||||
const matchQuery = knex<DbSearchRow>('search')
|
||||
.select('search.entity_id')
|
||||
.where({ key: `relations.${type.toLocaleLowerCase('en-US')}` });
|
||||
const subquery = searchExists(knex, onEntityIdField).where(
|
||||
`${S}.key`,
|
||||
`relations.${type.toLocaleLowerCase('en-US')}`,
|
||||
);
|
||||
|
||||
if (targetRef) {
|
||||
matchQuery.whereIn('value', targetRef);
|
||||
subquery.whereIn(`${S}.value`, targetRef);
|
||||
}
|
||||
|
||||
return targetQuery.andWhere(onEntityIdField, 'in', matchQuery);
|
||||
return targetQuery.whereExists(subquery);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user