From dfd5e81721960d8d70ac5a5459b2aff72a4a2dd6 Mon Sep 17 00:00:00 2001 From: Jussi Hallila Date: Mon, 10 Jan 2022 15:38:16 +0100 Subject: [PATCH] Adds a configuration option to fact retrievers to define lifecycle for facts the retriever persists. Possible values are either 'items-to-live' or 'time-to-live'. The former will only n number of items in to the database for each fact per entity. The latter will remove all facts that are older than the TTL value. Possible values: * { itl: 5 } // Deletes all facts for the retriever/entity pair, apart from the last five * { ttl: 1209600000 } // (2 weeks) Deletes all facts older than 2 weeks for the retriever/entity pair * { ttl: { weeks: 2 } } // Deletes all facts older than 2 weeks for the retriever/entity pair Signed-off-by: Jussi Hallila --- .changeset/six-coins-admire.md | 12 ++ plugins/tech-insights-backend/README.md | 10 +- plugins/tech-insights-backend/api-report.md | 2 + .../src/service/fact/FactRetrieverEngine.ts | 12 +- .../src/service/fact/createFactRetriever.ts | 8 ++ .../src/service/fact/factRetrievers/utils.ts | 9 ++ .../persistence/TechInsightsDatabase.test.ts | 105 +++++++++++++++++- .../persistence/TechInsightsDatabase.ts | 79 ++++++++++++- plugins/tech-insights-node/api-report.md | 21 +++- plugins/tech-insights-node/src/facts.ts | 39 ++++++- plugins/tech-insights-node/src/persistence.ts | 8 +- 11 files changed, 286 insertions(+), 19 deletions(-) create mode 100644 .changeset/six-coins-admire.md diff --git a/.changeset/six-coins-admire.md b/.changeset/six-coins-admire.md new file mode 100644 index 0000000000..539c6e470d --- /dev/null +++ b/.changeset/six-coins-admire.md @@ -0,0 +1,12 @@ +--- +'@backstage/plugin-tech-insights-backend': patch +'@backstage/plugin-tech-insights-node': patch +--- + +Adds a configuration option to fact retrievers to define lifecycle for facts the retriever persists. Possible values are either 'items-to-live' or 'time-to-live'. The former will only n number of items in to the database for each fact per entity. The latter will remove all facts that are older than the TTL value. + +Possible values: + +- { itl: 5 } // Deletes all facts for the retriever/entity pair, apart from the last five +- { ttl: 1209600000 } // (2 weeks) Deletes all facts older than 2 weeks for the retriever/entity pair +- { ttl: { weeks: 2 } } // Deletes all facts older than 2 weeks for the retriever/entity pair diff --git a/plugins/tech-insights-backend/README.md b/plugins/tech-insights-backend/README.md index 31f5e402b0..ea76cf2541 100644 --- a/plugins/tech-insights-backend/README.md +++ b/plugins/tech-insights-backend/README.md @@ -91,7 +91,15 @@ const myFactRetrieverRegistration = createFactRetrieverRegistration( ); ``` -Then you can modify the example `techInsights.ts` file shown above like this: +FactRetrieverRegistration also accepts an optional `lifecycle` configuration value. This can be either ITL (items to live) or TTL (time to live). Valid options for this value are either a number for itl or a Luxon duration like object for ttl. For example: + +```ts +const itl = { itl: 7 }; // Deletes all but 7 latest facts for each id/entity pair +const ttl = { ttl: 1209600000 }; // (2 weeks) Deletes items older than 2 weeks +const ttlWithAHumanReadableValue = { ttl: { weeks: 2 } }; // Deletes items older than 2 weeks +``` + +To register these fact retrievers to your application you can modify the example `techInsights.ts` file shown above like this: ```diff const builder = new DefaultTechInsightsBuilder({ diff --git a/plugins/tech-insights-backend/api-report.md b/plugins/tech-insights-backend/api-report.md index ac2d1d1170..c5cefd2619 100644 --- a/plugins/tech-insights-backend/api-report.md +++ b/plugins/tech-insights-backend/api-report.md @@ -8,6 +8,7 @@ import { Config } from '@backstage/config'; import express from 'express'; import { FactChecker } from '@backstage/plugin-tech-insights-node'; import { FactCheckerFactory } from '@backstage/plugin-tech-insights-node'; +import { FactLifecycle } from '@backstage/plugin-tech-insights-node'; import { FactRetriever } from '@backstage/plugin-tech-insights-node'; import { FactRetrieverRegistration } from '@backstage/plugin-tech-insights-node'; import { Logger as Logger_2 } from 'winston'; @@ -28,6 +29,7 @@ export const buildTechInsightsContext: < export function createFactRetrieverRegistration( cadence: string, factRetriever: FactRetriever, + lifecycle?: FactLifecycle, ): FactRetrieverRegistration; // @public diff --git a/plugins/tech-insights-backend/src/service/fact/FactRetrieverEngine.ts b/plugins/tech-insights-backend/src/service/fact/FactRetrieverEngine.ts index 11fd0d29b8..51a68635b1 100644 --- a/plugins/tech-insights-backend/src/service/fact/FactRetrieverEngine.ts +++ b/plugins/tech-insights-backend/src/service/fact/FactRetrieverEngine.ts @@ -14,6 +14,7 @@ * limitations under the License. */ import { + FactLifecycle, FactRetriever, FactRetrieverContext, TechInsightsStore, @@ -75,7 +76,7 @@ export class FactRetrieverEngine { const registrations = this.factRetrieverRegistry.listRegistrations(); const newRegs: string[] = []; registrations.forEach(registration => { - const { factRetriever, cadence } = registration; + const { factRetriever, cadence, lifecycle } = registration; if (!this.scheduledJobs.has(factRetriever.id)) { const cronExpression = cadence || this.defaultCadence || randomDailyCron(); @@ -87,7 +88,7 @@ export class FactRetrieverEngine { } const job = schedule( cronExpression, - this.createFactRetrieverHandler(factRetriever), + this.createFactRetrieverHandler(factRetriever, lifecycle), ); this.scheduledJobs.set(factRetriever.id, job); newRegs.push(factRetriever.id); @@ -102,7 +103,10 @@ export class FactRetrieverEngine { return this.scheduledJobs.get(ref); } - private createFactRetrieverHandler(factRetriever: FactRetriever) { + private createFactRetrieverHandler( + factRetriever: FactRetriever, + lifecycle?: FactLifecycle, + ) { return async () => { const startTimestamp = process.hrtime(); this.logger.info( @@ -121,7 +125,7 @@ export class FactRetrieverEngine { } try { - await this.repository.insertFacts(factRetriever.id, facts); + await this.repository.insertFacts(factRetriever.id, facts, lifecycle); this.logger.info( `Stored ${facts.length} facts for fact retriever ${ factRetriever.id diff --git a/plugins/tech-insights-backend/src/service/fact/createFactRetriever.ts b/plugins/tech-insights-backend/src/service/fact/createFactRetriever.ts index 655736400a..1e3f7f8904 100644 --- a/plugins/tech-insights-backend/src/service/fact/createFactRetriever.ts +++ b/plugins/tech-insights-backend/src/service/fact/createFactRetriever.ts @@ -14,6 +14,7 @@ * limitations under the License. */ import { + FactLifecycle, FactRetriever, FactRetrieverRegistration, } from '@backstage/plugin-tech-insights-node'; @@ -25,6 +26,7 @@ import { * * @param cadence - cron expression to indicate when the fact retriever should be triggered * @param factRetriever - Implementation of fact retriever consisting of at least id, version, schema and handler + * @param lifecycle - Optional lifecycle definition indicating the cleanup logic of facts when this retriever is run * * * @remarks @@ -40,13 +42,19 @@ import { # │ │ │ │ │ │ # * * * * * * * + * Valid lifecycle values: + * \{ ttl: \{ weeks: 2 \} \} -- This fact retriever will remove items that are older than 2 weeks when it is run + * \{ itl: 7 \} -- This fact retriever will leave 7 newest items in the database when it is run + * */ export function createFactRetrieverRegistration( cadence: string, factRetriever: FactRetriever, + lifecycle?: FactLifecycle, ): FactRetrieverRegistration { return { cadence, factRetriever, + lifecycle, }; } diff --git a/plugins/tech-insights-backend/src/service/fact/factRetrievers/utils.ts b/plugins/tech-insights-backend/src/service/fact/factRetrievers/utils.ts index 7967502a63..b01bbe6248 100644 --- a/plugins/tech-insights-backend/src/service/fact/factRetrievers/utils.ts +++ b/plugins/tech-insights-backend/src/service/fact/factRetrievers/utils.ts @@ -16,9 +16,18 @@ import camelCase from 'lodash/camelCase'; import { Entity } from '@backstage/catalog-model'; import { get } from 'lodash'; +import { FactLifecycle, ITL, TTL } from '@backstage/plugin-tech-insights-node'; export const generateAnnotationFactName = (annotation: string) => camelCase(`hasAnnotation-${annotation}`); export const entityHasAnnotation = (entity: Entity, annotation: string) => Boolean(get(entity, ['metadata', 'annotations', annotation])); + +export const isTtl = (lifecycle: FactLifecycle): lifecycle is TTL => { + return !!(lifecycle as TTL).ttl; +}; + +export const isItl = (lifecycle: FactLifecycle): lifecycle is ITL => { + return !!(lifecycle as ITL).itl; +}; diff --git a/plugins/tech-insights-backend/src/service/persistence/TechInsightsDatabase.test.ts b/plugins/tech-insights-backend/src/service/persistence/TechInsightsDatabase.test.ts index 43d00e2425..a1bb2dc299 100644 --- a/plugins/tech-insights-backend/src/service/persistence/TechInsightsDatabase.test.ts +++ b/plugins/tech-insights-backend/src/service/persistence/TechInsightsDatabase.test.ts @@ -126,10 +126,15 @@ describe('Tech Insights database', () => { logger: getVoidLogger(), }) ).techInsightsStore; - + }); + beforeEach(async () => { await testDbClient.batchInsert('fact_schemas', factSchemas); await testDbClient.batchInsert('facts', facts); }); + afterEach(async () => { + await testDbClient('facts').delete(); + await testDbClient('fact_schemas').delete(); + }); const baseAssertionFact = { id: 'test-fact', @@ -183,16 +188,12 @@ describe('Tech Insights database', () => { expect(schemas).toHaveLength(2); expect(schemas[0]).toMatchObject({ id: 'test-fact', - version: '1.2.1-test', + version: '0.0.1-test', entityFilter: [{ kind: 'component' }], testNumberFact: { type: 'integer', description: 'Test fact with a number type', }, - testStringFact: { - type: 'string', - description: 'Test fact with a string type', - }, }); expect(schemas[1]).toMatchObject({ id: 'second', @@ -263,4 +264,96 @@ describe('Tech Insights database', () => { facts: { testNumberFact: 3 }, }); }); + + it('should delete extraneous rows when ITL is defined. Should leave only n latest', async () => { + const deviledFact = (it: {}) => ({ + ...it, + facts: JSON.stringify({ + testNumberFact: 666, + }), + }); + await testDbClient.batchInsert('facts', additionalFacts.map(deviledFact)); + + const preInsertionFacts = await testDbClient('facts').select(); + expect(preInsertionFacts).toHaveLength(3); + + const timestamp = DateTime.now().plus(Duration.fromMillis(1111)); + const factToBeInserted = { + timestamp: timestamp, + entity: { + namespace: 'a', + kind: 'a', + name: 'a', + }, + facts: { + testNumberFact: 555, + }, + }; + const itl = 2; + await store.insertFacts('test-fact', [factToBeInserted], { itl }); + + const afterInsertionFacts = await testDbClient('facts').select(); + expect(afterInsertionFacts).toHaveLength(itl); + expect(afterInsertionFacts[0]).toMatchObject( + deviledFact(additionalFacts[0]), + ); + expect(afterInsertionFacts[1]).toMatchObject({ + id: 'test-fact', + version: '0.0.1-test', + timestamp: timestamp.toISO(), + entity: 'a:a/a', + facts: JSON.stringify({ testNumberFact: 555 }), + }); + }); + + it('should delete extraneous rows when TTL is defined. Should leave only items with timestamp greater than TTL', async () => { + const oldStaledOutFact = (it: {}) => ({ + ...it, + facts: JSON.stringify({ + testNumberFact: 666, + }), + timestamp: DateTime.now().minus({ weeks: 3 }).toISO(), + }); + await testDbClient.batchInsert( + 'facts', + additionalFacts.map(oldStaledOutFact), + ); + + const preInsertionFacts = await testDbClient('facts').select(); + expect(preInsertionFacts).toHaveLength(3); + + const timestamp = DateTime.now().plus(Duration.fromMillis(1111)); + const factToBeInserted = { + timestamp: timestamp, + entity: { + namespace: 'a', + kind: 'a', + name: 'a', + }, + facts: { + testNumberFact: 555, + }, + }; + await store.insertFacts('test-fact', [factToBeInserted], { + ttl: { weeks: 2 }, + }); + + const afterInsertionFacts = await testDbClient('facts') + .select() + .orderBy('timestamp', 'desc'); + expect(afterInsertionFacts).toHaveLength(3); + expect(afterInsertionFacts[0]).toMatchObject({ + id: 'test-fact', + version: '0.0.1-test', + timestamp: timestamp.toISO(), + entity: 'a:a/a', + facts: JSON.stringify({ testNumberFact: 555 }), + }); + expect(afterInsertionFacts[1]).toMatchObject(facts[1]); + expect(afterInsertionFacts[2]).toMatchObject(facts[0]); + + expect(afterInsertionFacts).not.toContainEqual( + oldStaledOutFact(additionalFacts[0]), + ); + }); }); diff --git a/plugins/tech-insights-backend/src/service/persistence/TechInsightsDatabase.ts b/plugins/tech-insights-backend/src/service/persistence/TechInsightsDatabase.ts index 7cbf5671c2..bb29657c07 100644 --- a/plugins/tech-insights-backend/src/service/persistence/TechInsightsDatabase.ts +++ b/plugins/tech-insights-backend/src/service/persistence/TechInsightsDatabase.ts @@ -15,17 +15,20 @@ */ import { Knex } from 'knex'; import { + FactLifecycle, FactSchema, - TechInsightFact, - FlatTechInsightFact, - TechInsightsStore, FactSchemaDefinition, + FlatTechInsightFact, + TechInsightFact, + TechInsightsStore, } from '@backstage/plugin-tech-insights-node'; import { rsort } from 'semver'; import { groupBy, omit } from 'lodash'; import { DateTime } from 'luxon'; import { Logger } from 'winston'; import { parseEntityName, stringifyEntityRef } from '@backstage/catalog-model'; +import { isItl, isTtl } from '../fact/factRetrievers/utils'; +import Transaction = Knex.Transaction; export type RawDbFactRow = { id: string; @@ -84,7 +87,11 @@ export class TechInsightsDatabase implements TechInsightsStore { } } - async insertFacts(id: string, facts: TechInsightFact[]): Promise { + async insertFacts( + id: string, + facts: TechInsightFact[], + lifecycle?: FactLifecycle, + ): Promise { if (facts.length === 0) return; const currentSchema = await this.getLatestSchema(id); const factRows = facts.map(it => { @@ -93,11 +100,21 @@ export class TechInsightsDatabase implements TechInsightsStore { version: currentSchema.version, entity: stringifyEntityRef(it.entity), facts: JSON.stringify(it.facts), - ...(it.timestamp && { timestamp: it.timestamp.toJSDate() }), + ...(it.timestamp && { timestamp: it.timestamp.toISO() }), }; }); + await this.db.transaction(async tx => { await tx.batchInsert('facts', factRows, this.CHUNK_SIZE); + + if (lifecycle && isTtl(lifecycle)) { + const expiration = DateTime.now().minus(lifecycle.ttl); + await this.deleteExpiredFactsByDate(tx, factRows, expiration); + } + if (lifecycle && isItl(lifecycle)) { + const items = lifecycle.itl; + await this.deleteExpiredFactsByNumber(tx, factRows, items); + } }); } @@ -170,6 +187,58 @@ export class TechInsightsDatabase implements TechInsightsStore { return existingSchemas.find(it => it.version === sorted[0])!!; } + private async deleteExpiredFactsByDate( + tx: Transaction, + factRows: { id: string; entity: string }[], + timestamp: DateTime, + ) { + await tx('facts') + .whereIn( + ['id', 'entity'], + factRows.map(it => [it.id, it.entity]), + ) + .and.where('timestamp', '<', timestamp.toISO()) + .delete(); + } + + private async deleteExpiredFactsByNumber( + tx: Transaction, + factRows: { id: string; entity: string }[], + items: number, + ) { + const deletables = await tx('facts') + .whereIn( + ['id', 'entity'], + factRows.map(it => [it.id, it.entity]), + ) + .and.leftJoin( + this.db.raw( + `(select * + from (select id fid, + entity fentity, + timestamp ftimestamp, + row_number() over (partition by id, entity order by timestamp desc) as fact_rank + from facts) ranks + where fact_rank <= ?? ) as filterjoin`, + items, + ), + joinClause => { + joinClause + .on('filterjoin.fid', 'facts.id') + .on('filterjoin.fentity', 'facts.entity') + .on('filterjoin.ftimestamp', 'facts.timestamp'); + }, + ) + .whereNull('filterjoin.fid'); + + await tx('facts') + .whereIn( + ['id', 'entity', 'timestamp'], + deletables.map(it => [it.id, it.entity, it.timestamp]), + ) + .delete(); + } + private dbFactRowsToTechInsightFacts(rows: RawDbFactRow[]) { return rows.reduce((acc, it) => { const { namespace, kind, name } = parseEntityName(it.entity); diff --git a/plugins/tech-insights-node/api-report.md b/plugins/tech-insights-node/api-report.md index fc137da4bc..ec822bf5e2 100644 --- a/plugins/tech-insights-node/api-report.md +++ b/plugins/tech-insights-node/api-report.md @@ -6,6 +6,7 @@ import { CheckResult } from '@backstage/plugin-tech-insights-common'; import { Config } from '@backstage/config'; import { DateTime } from 'luxon'; +import { DurationLike } from 'luxon'; import { Logger as Logger_2 } from 'winston'; import { PluginEndpointDiscovery } from '@backstage/backend-common'; @@ -37,6 +38,9 @@ export interface FactCheckerFactory< ): FactChecker; } +// @public +export type FactLifecycle = TTL | ITL; + // @public export interface FactRetriever { entityFilter?: @@ -62,6 +66,7 @@ export type FactRetrieverContext = { export type FactRetrieverRegistration = { factRetriever: FactRetriever; cadence?: string; + lifecycle?: FactLifecycle; }; // @public @@ -82,6 +87,11 @@ export type FlatTechInsightFact = TechInsightFact & { id: string; }; +// @public +export type ITL = { + itl: number; +}; + // @public export interface TechInsightCheck { description: string; @@ -144,9 +154,18 @@ export interface TechInsightsStore { [factRef: string]: FlatTechInsightFact; }>; getLatestSchemas(ids?: string[]): Promise; - insertFacts(id: string, facts: TechInsightFact[]): Promise; + insertFacts( + id: string, + facts: TechInsightFact[], + lifecycle?: FactLifecycle, + ): Promise; insertFactSchema(schemaDefinition: FactSchemaDefinition): Promise; } +// @public +export type TTL = { + ttl: DurationLike; +}; + // (No @packageDocumentation comment for this package) ``` diff --git a/plugins/tech-insights-node/src/facts.ts b/plugins/tech-insights-node/src/facts.ts index 4a3157d391..34c753486a 100644 --- a/plugins/tech-insights-node/src/facts.ts +++ b/plugins/tech-insights-node/src/facts.ts @@ -13,7 +13,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -import { DateTime } from 'luxon'; +import { DateTime, DurationLike } from 'luxon'; import { Config } from '@backstage/config'; import { PluginEndpointDiscovery } from '@backstage/backend-common'; import { Logger } from 'winston'; @@ -190,6 +190,36 @@ export interface FactRetriever { | Record; } +/** + * @public + * + * A Luxon duration like object for time to live value + * + * @example + * \{ ttl: 1209600000 \} + * \{ ttl: \{ weeks: 4 \} \} + * + **/ +export type TTL = { ttl: DurationLike }; + +/** + * @public + * + * A number for items to live value + * + * @example + * \{ itl: 10 \} + * + **/ +export type ITL = { itl: number }; + +/** + * @public + * + * A fact lifecycle definition. Determines which strategy to use to purge expired facts from the database. + */ +export type FactLifecycle = TTL | ITL; + /** * @public * @@ -216,4 +246,11 @@ export type FactRetrieverRegistration = { * */ cadence?: string; + + /** + * Fact lifecycle definition + * + * If defined this value will be used to determine expired items which will deleted when this fact retriever is run + */ + lifecycle?: FactLifecycle; }; diff --git a/plugins/tech-insights-node/src/persistence.ts b/plugins/tech-insights-node/src/persistence.ts index 444e961008..17a3867347 100644 --- a/plugins/tech-insights-node/src/persistence.ts +++ b/plugins/tech-insights-node/src/persistence.ts @@ -18,6 +18,7 @@ import { TechInsightFact, FlatTechInsightFact, FactSchemaDefinition, + FactLifecycle, } from './facts'; import { DateTime } from 'luxon'; @@ -35,8 +36,13 @@ export interface TechInsightsStore { * * @param id - Unique identifier of the fact retriever these facts relate to * @param facts - A collection of TechInsightFacts + * @param lifecycle - (Optional) Fact lifecycle object indicating the expiration logic for these items */ - insertFacts(id: string, facts: TechInsightFact[]): Promise; + insertFacts( + id: string, + facts: TechInsightFact[], + lifecycle?: FactLifecycle, + ): Promise; /** * @param ids - A collection of fact row identifiers