Adds a configuration option to fact retrievers to define lifecycle for facts the retriever persists. Possible values are either 'items-to-live' or 'time-to-live'. The former will only n number of items in to the database for each fact per entity. The latter will remove all facts that are older than the TTL value.

Possible values:
* { itl: 5 } // Deletes all facts for the retriever/entity pair, apart from the last five
* { ttl: 1209600000 } // (2 weeks) Deletes all facts older than 2 weeks for the retriever/entity pair
* { ttl: { weeks: 2 } } // Deletes all facts older than 2 weeks for the retriever/entity pair

Signed-off-by: Jussi Hallila <jussi@hallila.com>
This commit is contained in:
Jussi Hallila
2022-01-10 15:38:16 +01:00
parent bc5fe6c6c4
commit dfd5e81721
11 changed files with 286 additions and 19 deletions
+12
View File
@@ -0,0 +1,12 @@
---
'@backstage/plugin-tech-insights-backend': patch
'@backstage/plugin-tech-insights-node': patch
---
Adds a configuration option to fact retrievers to define lifecycle for facts the retriever persists. Possible values are either 'items-to-live' or 'time-to-live'. The former will only n number of items in to the database for each fact per entity. The latter will remove all facts that are older than the TTL value.
Possible values:
- { itl: 5 } // Deletes all facts for the retriever/entity pair, apart from the last five
- { ttl: 1209600000 } // (2 weeks) Deletes all facts older than 2 weeks for the retriever/entity pair
- { ttl: { weeks: 2 } } // Deletes all facts older than 2 weeks for the retriever/entity pair
+9 -1
View File
@@ -91,7 +91,15 @@ const myFactRetrieverRegistration = createFactRetrieverRegistration(
);
```
Then you can modify the example `techInsights.ts` file shown above like this:
FactRetrieverRegistration also accepts an optional `lifecycle` configuration value. This can be either ITL (items to live) or TTL (time to live). Valid options for this value are either a number for itl or a Luxon duration like object for ttl. For example:
```ts
const itl = { itl: 7 }; // Deletes all but 7 latest facts for each id/entity pair
const ttl = { ttl: 1209600000 }; // (2 weeks) Deletes items older than 2 weeks
const ttlWithAHumanReadableValue = { ttl: { weeks: 2 } }; // Deletes items older than 2 weeks
```
To register these fact retrievers to your application you can modify the example `techInsights.ts` file shown above like this:
```diff
const builder = new DefaultTechInsightsBuilder({
@@ -8,6 +8,7 @@ import { Config } from '@backstage/config';
import express from 'express';
import { FactChecker } from '@backstage/plugin-tech-insights-node';
import { FactCheckerFactory } from '@backstage/plugin-tech-insights-node';
import { FactLifecycle } from '@backstage/plugin-tech-insights-node';
import { FactRetriever } from '@backstage/plugin-tech-insights-node';
import { FactRetrieverRegistration } from '@backstage/plugin-tech-insights-node';
import { Logger as Logger_2 } from 'winston';
@@ -28,6 +29,7 @@ export const buildTechInsightsContext: <
export function createFactRetrieverRegistration(
cadence: string,
factRetriever: FactRetriever,
lifecycle?: FactLifecycle,
): FactRetrieverRegistration;
// @public
@@ -14,6 +14,7 @@
* limitations under the License.
*/
import {
FactLifecycle,
FactRetriever,
FactRetrieverContext,
TechInsightsStore,
@@ -75,7 +76,7 @@ export class FactRetrieverEngine {
const registrations = this.factRetrieverRegistry.listRegistrations();
const newRegs: string[] = [];
registrations.forEach(registration => {
const { factRetriever, cadence } = registration;
const { factRetriever, cadence, lifecycle } = registration;
if (!this.scheduledJobs.has(factRetriever.id)) {
const cronExpression =
cadence || this.defaultCadence || randomDailyCron();
@@ -87,7 +88,7 @@ export class FactRetrieverEngine {
}
const job = schedule(
cronExpression,
this.createFactRetrieverHandler(factRetriever),
this.createFactRetrieverHandler(factRetriever, lifecycle),
);
this.scheduledJobs.set(factRetriever.id, job);
newRegs.push(factRetriever.id);
@@ -102,7 +103,10 @@ export class FactRetrieverEngine {
return this.scheduledJobs.get(ref);
}
private createFactRetrieverHandler(factRetriever: FactRetriever) {
private createFactRetrieverHandler(
factRetriever: FactRetriever,
lifecycle?: FactLifecycle,
) {
return async () => {
const startTimestamp = process.hrtime();
this.logger.info(
@@ -121,7 +125,7 @@ export class FactRetrieverEngine {
}
try {
await this.repository.insertFacts(factRetriever.id, facts);
await this.repository.insertFacts(factRetriever.id, facts, lifecycle);
this.logger.info(
`Stored ${facts.length} facts for fact retriever ${
factRetriever.id
@@ -14,6 +14,7 @@
* limitations under the License.
*/
import {
FactLifecycle,
FactRetriever,
FactRetrieverRegistration,
} from '@backstage/plugin-tech-insights-node';
@@ -25,6 +26,7 @@ import {
*
* @param cadence - cron expression to indicate when the fact retriever should be triggered
* @param factRetriever - Implementation of fact retriever consisting of at least id, version, schema and handler
* @param lifecycle - Optional lifecycle definition indicating the cleanup logic of facts when this retriever is run
*
*
* @remarks
@@ -40,13 +42,19 @@ import {
#
# * * * * * *
*
* Valid lifecycle values:
* \{ ttl: \{ weeks: 2 \} \} -- This fact retriever will remove items that are older than 2 weeks when it is run
* \{ itl: 7 \} -- This fact retriever will leave 7 newest items in the database when it is run
*
*/
export function createFactRetrieverRegistration(
cadence: string,
factRetriever: FactRetriever,
lifecycle?: FactLifecycle,
): FactRetrieverRegistration {
return {
cadence,
factRetriever,
lifecycle,
};
}
@@ -16,9 +16,18 @@
import camelCase from 'lodash/camelCase';
import { Entity } from '@backstage/catalog-model';
import { get } from 'lodash';
import { FactLifecycle, ITL, TTL } from '@backstage/plugin-tech-insights-node';
export const generateAnnotationFactName = (annotation: string) =>
camelCase(`hasAnnotation-${annotation}`);
export const entityHasAnnotation = (entity: Entity, annotation: string) =>
Boolean(get(entity, ['metadata', 'annotations', annotation]));
export const isTtl = (lifecycle: FactLifecycle): lifecycle is TTL => {
return !!(lifecycle as TTL).ttl;
};
export const isItl = (lifecycle: FactLifecycle): lifecycle is ITL => {
return !!(lifecycle as ITL).itl;
};
@@ -126,10 +126,15 @@ describe('Tech Insights database', () => {
logger: getVoidLogger(),
})
).techInsightsStore;
});
beforeEach(async () => {
await testDbClient.batchInsert('fact_schemas', factSchemas);
await testDbClient.batchInsert('facts', facts);
});
afterEach(async () => {
await testDbClient('facts').delete();
await testDbClient('fact_schemas').delete();
});
const baseAssertionFact = {
id: 'test-fact',
@@ -183,16 +188,12 @@ describe('Tech Insights database', () => {
expect(schemas).toHaveLength(2);
expect(schemas[0]).toMatchObject({
id: 'test-fact',
version: '1.2.1-test',
version: '0.0.1-test',
entityFilter: [{ kind: 'component' }],
testNumberFact: {
type: 'integer',
description: 'Test fact with a number type',
},
testStringFact: {
type: 'string',
description: 'Test fact with a string type',
},
});
expect(schemas[1]).toMatchObject({
id: 'second',
@@ -263,4 +264,96 @@ describe('Tech Insights database', () => {
facts: { testNumberFact: 3 },
});
});
it('should delete extraneous rows when ITL is defined. Should leave only n latest', async () => {
const deviledFact = (it: {}) => ({
...it,
facts: JSON.stringify({
testNumberFact: 666,
}),
});
await testDbClient.batchInsert('facts', additionalFacts.map(deviledFact));
const preInsertionFacts = await testDbClient('facts').select();
expect(preInsertionFacts).toHaveLength(3);
const timestamp = DateTime.now().plus(Duration.fromMillis(1111));
const factToBeInserted = {
timestamp: timestamp,
entity: {
namespace: 'a',
kind: 'a',
name: 'a',
},
facts: {
testNumberFact: 555,
},
};
const itl = 2;
await store.insertFacts('test-fact', [factToBeInserted], { itl });
const afterInsertionFacts = await testDbClient('facts').select();
expect(afterInsertionFacts).toHaveLength(itl);
expect(afterInsertionFacts[0]).toMatchObject(
deviledFact(additionalFacts[0]),
);
expect(afterInsertionFacts[1]).toMatchObject({
id: 'test-fact',
version: '0.0.1-test',
timestamp: timestamp.toISO(),
entity: 'a:a/a',
facts: JSON.stringify({ testNumberFact: 555 }),
});
});
it('should delete extraneous rows when TTL is defined. Should leave only items with timestamp greater than TTL', async () => {
const oldStaledOutFact = (it: {}) => ({
...it,
facts: JSON.stringify({
testNumberFact: 666,
}),
timestamp: DateTime.now().minus({ weeks: 3 }).toISO(),
});
await testDbClient.batchInsert(
'facts',
additionalFacts.map(oldStaledOutFact),
);
const preInsertionFacts = await testDbClient('facts').select();
expect(preInsertionFacts).toHaveLength(3);
const timestamp = DateTime.now().plus(Duration.fromMillis(1111));
const factToBeInserted = {
timestamp: timestamp,
entity: {
namespace: 'a',
kind: 'a',
name: 'a',
},
facts: {
testNumberFact: 555,
},
};
await store.insertFacts('test-fact', [factToBeInserted], {
ttl: { weeks: 2 },
});
const afterInsertionFacts = await testDbClient('facts')
.select()
.orderBy('timestamp', 'desc');
expect(afterInsertionFacts).toHaveLength(3);
expect(afterInsertionFacts[0]).toMatchObject({
id: 'test-fact',
version: '0.0.1-test',
timestamp: timestamp.toISO(),
entity: 'a:a/a',
facts: JSON.stringify({ testNumberFact: 555 }),
});
expect(afterInsertionFacts[1]).toMatchObject(facts[1]);
expect(afterInsertionFacts[2]).toMatchObject(facts[0]);
expect(afterInsertionFacts).not.toContainEqual(
oldStaledOutFact(additionalFacts[0]),
);
});
});
@@ -15,17 +15,20 @@
*/
import { Knex } from 'knex';
import {
FactLifecycle,
FactSchema,
TechInsightFact,
FlatTechInsightFact,
TechInsightsStore,
FactSchemaDefinition,
FlatTechInsightFact,
TechInsightFact,
TechInsightsStore,
} from '@backstage/plugin-tech-insights-node';
import { rsort } from 'semver';
import { groupBy, omit } from 'lodash';
import { DateTime } from 'luxon';
import { Logger } from 'winston';
import { parseEntityName, stringifyEntityRef } from '@backstage/catalog-model';
import { isItl, isTtl } from '../fact/factRetrievers/utils';
import Transaction = Knex.Transaction;
export type RawDbFactRow = {
id: string;
@@ -84,7 +87,11 @@ export class TechInsightsDatabase implements TechInsightsStore {
}
}
async insertFacts(id: string, facts: TechInsightFact[]): Promise<void> {
async insertFacts(
id: string,
facts: TechInsightFact[],
lifecycle?: FactLifecycle,
): Promise<void> {
if (facts.length === 0) return;
const currentSchema = await this.getLatestSchema(id);
const factRows = facts.map(it => {
@@ -93,11 +100,21 @@ export class TechInsightsDatabase implements TechInsightsStore {
version: currentSchema.version,
entity: stringifyEntityRef(it.entity),
facts: JSON.stringify(it.facts),
...(it.timestamp && { timestamp: it.timestamp.toJSDate() }),
...(it.timestamp && { timestamp: it.timestamp.toISO() }),
};
});
await this.db.transaction(async tx => {
await tx.batchInsert<RawDbFactRow>('facts', factRows, this.CHUNK_SIZE);
if (lifecycle && isTtl(lifecycle)) {
const expiration = DateTime.now().minus(lifecycle.ttl);
await this.deleteExpiredFactsByDate(tx, factRows, expiration);
}
if (lifecycle && isItl(lifecycle)) {
const items = lifecycle.itl;
await this.deleteExpiredFactsByNumber(tx, factRows, items);
}
});
}
@@ -170,6 +187,58 @@ export class TechInsightsDatabase implements TechInsightsStore {
return existingSchemas.find(it => it.version === sorted[0])!!;
}
private async deleteExpiredFactsByDate(
tx: Transaction,
factRows: { id: string; entity: string }[],
timestamp: DateTime,
) {
await tx<RawDbFactRow>('facts')
.whereIn(
['id', 'entity'],
factRows.map(it => [it.id, it.entity]),
)
.and.where('timestamp', '<', timestamp.toISO())
.delete();
}
private async deleteExpiredFactsByNumber(
tx: Transaction,
factRows: { id: string; entity: string }[],
items: number,
) {
const deletables = await tx<RawDbFactRow>('facts')
.whereIn(
['id', 'entity'],
factRows.map(it => [it.id, it.entity]),
)
.and.leftJoin(
this.db.raw(
`(select *
from (select id fid,
entity fentity,
timestamp ftimestamp,
row_number() over (partition by id, entity order by timestamp desc) as fact_rank
from facts) ranks
where fact_rank <= ?? ) as filterjoin`,
items,
),
joinClause => {
joinClause
.on('filterjoin.fid', 'facts.id')
.on('filterjoin.fentity', 'facts.entity')
.on('filterjoin.ftimestamp', 'facts.timestamp');
},
)
.whereNull('filterjoin.fid');
await tx('facts')
.whereIn(
['id', 'entity', 'timestamp'],
deletables.map(it => [it.id, it.entity, it.timestamp]),
)
.delete();
}
private dbFactRowsToTechInsightFacts(rows: RawDbFactRow[]) {
return rows.reduce((acc, it) => {
const { namespace, kind, name } = parseEntityName(it.entity);
+20 -1
View File
@@ -6,6 +6,7 @@
import { CheckResult } from '@backstage/plugin-tech-insights-common';
import { Config } from '@backstage/config';
import { DateTime } from 'luxon';
import { DurationLike } from 'luxon';
import { Logger as Logger_2 } from 'winston';
import { PluginEndpointDiscovery } from '@backstage/backend-common';
@@ -37,6 +38,9 @@ export interface FactCheckerFactory<
): FactChecker<CheckType, CheckResultType>;
}
// @public
export type FactLifecycle = TTL | ITL;
// @public
export interface FactRetriever {
entityFilter?:
@@ -62,6 +66,7 @@ export type FactRetrieverContext = {
export type FactRetrieverRegistration = {
factRetriever: FactRetriever;
cadence?: string;
lifecycle?: FactLifecycle;
};
// @public
@@ -82,6 +87,11 @@ export type FlatTechInsightFact = TechInsightFact & {
id: string;
};
// @public
export type ITL = {
itl: number;
};
// @public
export interface TechInsightCheck {
description: string;
@@ -144,9 +154,18 @@ export interface TechInsightsStore {
[factRef: string]: FlatTechInsightFact;
}>;
getLatestSchemas(ids?: string[]): Promise<FactSchema[]>;
insertFacts(id: string, facts: TechInsightFact[]): Promise<void>;
insertFacts(
id: string,
facts: TechInsightFact[],
lifecycle?: FactLifecycle,
): Promise<void>;
insertFactSchema(schemaDefinition: FactSchemaDefinition): Promise<void>;
}
// @public
export type TTL = {
ttl: DurationLike;
};
// (No @packageDocumentation comment for this package)
```
+38 -1
View File
@@ -13,7 +13,7 @@
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import { DateTime } from 'luxon';
import { DateTime, DurationLike } from 'luxon';
import { Config } from '@backstage/config';
import { PluginEndpointDiscovery } from '@backstage/backend-common';
import { Logger } from 'winston';
@@ -190,6 +190,36 @@ export interface FactRetriever {
| Record<string, string | symbol | (string | symbol)[]>;
}
/**
* @public
*
* A Luxon duration like object for time to live value
*
* @example
* \{ ttl: 1209600000 \}
* \{ ttl: \{ weeks: 4 \} \}
*
**/
export type TTL = { ttl: DurationLike };
/**
* @public
*
* A number for items to live value
*
* @example
* \{ itl: 10 \}
*
**/
export type ITL = { itl: number };
/**
* @public
*
* A fact lifecycle definition. Determines which strategy to use to purge expired facts from the database.
*/
export type FactLifecycle = TTL | ITL;
/**
* @public
*
@@ -216,4 +246,11 @@ export type FactRetrieverRegistration = {
*
*/
cadence?: string;
/**
* Fact lifecycle definition
*
* If defined this value will be used to determine expired items which will deleted when this fact retriever is run
*/
lifecycle?: FactLifecycle;
};
@@ -18,6 +18,7 @@ import {
TechInsightFact,
FlatTechInsightFact,
FactSchemaDefinition,
FactLifecycle,
} from './facts';
import { DateTime } from 'luxon';
@@ -35,8 +36,13 @@ export interface TechInsightsStore {
*
* @param id - Unique identifier of the fact retriever these facts relate to
* @param facts - A collection of TechInsightFacts
* @param lifecycle - (Optional) Fact lifecycle object indicating the expiration logic for these items
*/
insertFacts(id: string, facts: TechInsightFact[]): Promise<void>;
insertFacts(
id: string,
facts: TechInsightFact[],
lifecycle?: FactLifecycle,
): Promise<void>;
/**
* @param ids - A collection of fact row identifiers