enhance gitlab discovery to use group search API (#31993)

* feat(gitlab): update discovery by allowing API search

instead of parsing groups/subgroups this change allows to use
the dedicated gitlab search API https://docs.gitlab.com/api/search/#scope-blobs.

this API is restricted to 'Premium' and 'Ultimate' gitlab client.

Signed-off-by: Matthieu Brouillard <m.brouillard@lectra.com>

* tests(gitlab): add tests for entity discovery by search API

Signed-off-by: Matthieu Brouillard <m.brouillard@lectra.com>

* docs(gitlab): update discovery documentation to add 'useSearch' configuration parameter

Signed-off-by: Matthieu Brouillard <m.brouillard@lectra.com>

* docs(gitlab): update report api

Signed-off-by: Matthieu Brouillard <m.brouillard@lectra.com>

* docs(gitlab): add generated changeset

Signed-off-by: Matthieu Brouillard <m.brouillard@lectra.com>

* fix(gitlab): use parameter 'filename:' in group search

Signed-off-by: Matthieu Brouillard <m.brouillard@lectra.com>

---------

Signed-off-by: Matthieu Brouillard <m.brouillard@lectra.com>
This commit is contained in:
Matthieu Brouillard
2026-02-03 15:02:08 +01:00
committed by GitHub
parent 1f20b9f6a7
commit 2f51676452
11 changed files with 381 additions and 11 deletions
+5
View File
@@ -0,0 +1,5 @@
---
'@backstage/plugin-catalog-backend-module-gitlab': minor
---
allow entity discoverability via gitlab search API
+2 -1
View File
@@ -154,11 +154,12 @@ catalog:
fallbackBranch: master # Optional. Fallback to be used if there is no default branch configured at the Gitlab repository. It is only used, if `branch` is undefined. Uses `master` as default
skipForkedRepos: false # Optional. If the project is a fork, skip repository
includeArchivedRepos: false # Optional. If project is archived, include repository
group: example-group # Optional. Group and subgroup (if needed) to look for repositories. If not present the whole instance will be scanned
group: example-group # Optional (unless useSearch is true). Group and subgroup (if needed) to look for repositories. If not present the whole instance will be scanned
groupPattern: # Optional. Filters for groups based on a list of RegEx. Default, no filters.
- '^somegroup$'
- 'anothergroup'
entityFilename: catalog-info.yaml # Optional. Defaults to `catalog-info.yaml`
useSearch: false # Optional. Whether to use the GitLab group search API to find files. Requires Gitlab 'Premium' or 'Ultimate' licenses. Defaults to `false`
projectPattern: '[\s\S]*' # Optional. Filters found projects based on provided pattern. Defaults to `[\s\S]*`, which means to not filter anything
excludeRepos: [] # Optional. A list of project paths that should be excluded from discovery, e.g. group/subgroup/repo. Should not start or end with a slash.
schedule: # Same options as in SchedulerServiceTaskScheduleDefinition. Optional for the Legacy Backend System
@@ -113,6 +113,7 @@ export type GitlabProviderConfig = {
userPattern: RegExp;
groupPattern: RegExp | RegExp[];
allowInherited?: boolean;
useSearch?: boolean;
relations?: string[];
orgEnabled?: boolean;
schedule?: SchedulerServiceTaskScheduleDefinition;
@@ -33,6 +33,7 @@ import {
all_saas_subgroup_1_members,
all_saas_subgroup_2_members,
group_with_subgroups_response,
projects_with_catalog_info_yaml,
} from './mocks';
const httpHandlers = [
@@ -197,6 +198,32 @@ const httpGroupFindByEncodedPathDynamic = all_groups_response.flatMap(group => [
),
]);
const httpSearchFilesInGroupDynamic = all_groups_response.map(group => {
return rest.get(
`${apiBaseUrl}/groups/${encodeURIComponent(group.full_path)}/search`,
(_, res, ctx) => {
const searchResults = projects_with_catalog_info_yaml
.filter(project =>
project.path_with_namespace?.startsWith(group.full_path),
)
.map(project => {
return {
basename: 'catalog-info',
data: 'catalog-info.yaml',
path: 'catalog-info.yaml',
filename: 'catalog-info.yaml',
id: null,
ref: project.default_branch,
startline: 0,
project_id: project.id,
};
});
return res(ctx.json(searchResults));
},
);
});
const httpGroupFindByIdDynamic = all_groups_response.map(group => {
return rest.get(`${apiBaseUrl}/groups/${group.id}`, (_, res, ctx) => {
return res(ctx.json(all_groups_response.find(g => g.id === group.id)));
@@ -741,4 +768,5 @@ export const handlers = [
...httpGroupListDescendantProjectsByFullPath,
...graphqlHandlers,
...httpGroupFindByEncodedPathDynamic,
...httpSearchFilesInGroupDynamic,
];
@@ -57,6 +57,18 @@ export const config_saas: MockObject = {
baseUrl: 'https://gitlab.com',
};
export const config_partial_add_search: MockObject = {
catalog: {
providers: {
gitlab: {
'test-id': {
useSearch: true,
},
},
},
},
};
export const config_no_org_integration: MockObject = {
integrations: {
gitlab: [
@@ -162,6 +174,47 @@ export const config_github_host: MockObject = {
},
};
export const config_partial_test_id_use_search: MockObject = {
catalog: {
providers: {
gitlab: {
'test-id': {
useSearch: true,
},
},
},
},
};
export const config_single_integration_with_search: MockObject = {
integrations: {
gitlab: [
{
host: 'example.com',
apiBaseUrl: 'https://example.com/api/v4',
token: '1234',
},
],
},
catalog: {
providers: {
gitlab: {
'test-id': {
host: 'example.com',
group: 'group1',
entityFilename: 'catalog-info.yaml',
useSearch: true,
skipForkedRepos: true,
schedule: {
frequency: 'PT30M',
timeout: 'PT3M',
},
},
},
},
},
};
export const config_single_integration: MockObject = {
integrations: {
gitlab: [
@@ -1377,6 +1430,16 @@ export const expectedSaasGroup: MockObject[] = [
},
];
/**
* Subset of projects for which we consider they will have a catalog-info.yaml in their default branch.
* Used for search tests.
*/
export const projects_with_catalog_info_yaml: MockObject[] =
all_projects_response
.filter(project => !project.archived)
.filter(project => project.path_with_namespace?.startsWith('group1/'))
.filter(project => project.default_branch === 'main');
/**
* GitLab Events
*/
@@ -1842,6 +1905,35 @@ export const push_modif_event: EventParams = {
* Expected Backstage entities
*/
export const expected_location_from_search_on_group_1: MockObject[] =
projects_with_catalog_info_yaml
.filter(project => !project.forked_from_project)
.map(project => {
const targetUrl = `https://example.com/${project.path_with_namespace}/-/blob/${project.default_branch}/catalog-info.yaml`;
return {
entity: {
apiVersion: 'backstage.io/v1alpha1',
kind: 'Location',
metadata: {
annotations: {
'backstage.io/managed-by-location': `url:${targetUrl}`,
'backstage.io/managed-by-origin-location': `url:${targetUrl}`,
},
name: locationSpecToMetadataName({
target: targetUrl,
type: 'url',
}),
},
spec: {
presence: 'optional',
target: targetUrl,
type: 'url',
},
},
locationKey: 'GitlabDiscoveryEntityProvider:test-id',
};
});
// includes only projects that have a default branch (for when the branch and fallback branch were not set in the config)
export const expected_location_entities_default_branch: MockObject[] =
all_projects_response
@@ -2958,3 +3050,79 @@ export const all_self_hosted_group1_members: MockObject[] = [
web_url: 'https://gitlab.example/mario_mario',
},
];
export const expected_projects_with_catalog_info_yaml_entities: MockObject[] = [
{
entity: {
apiVersion: 'backstage.io/v1alpha1',
kind: 'User',
metadata: {
annotations: {
'backstage.io/managed-by-location': 'url:https://example.com/JohnDoe',
'backstage.io/managed-by-origin-location':
'url:https://example.com/JohnDoe',
'example.com/user-login': 'https://gitlab.example/john_doe',
},
name: 'JohnDoe',
},
spec: {
memberOf: ['subgroup1', 'group1'],
profile: {
displayName: 'John Doe',
email: 'john.doe@company.com',
picture: 'https://secure.gravatar.com/',
},
},
},
locationKey: 'GitlabOrgDiscoveryEntityProvider:test-id',
},
{
entity: {
apiVersion: 'backstage.io/v1alpha1',
kind: 'Group',
metadata: {
annotations: {
'backstage.io/managed-by-location':
'url:https://example.com/group1/subgroup1',
'backstage.io/managed-by-origin-location':
'url:https://example.com/group1/subgroup1',
'example.com/team-path': 'group1/subgroup1',
},
name: 'subgroup1',
description: 'description1',
},
spec: {
children: [],
profile: {
displayName: 'subgroup1',
},
type: 'team',
},
},
locationKey: 'GitlabOrgDiscoveryEntityProvider:test-id',
},
{
entity: {
apiVersion: 'backstage.io/v1alpha1',
kind: 'Group',
metadata: {
annotations: {
'backstage.io/managed-by-location': 'url:https://example.com/group1',
'backstage.io/managed-by-origin-location':
'url:https://example.com/group1',
'example.com/team-path': 'group1',
},
name: 'group1',
description: 'description1',
},
spec: {
children: [],
profile: {
displayName: 'group1',
},
type: 'team',
},
},
locationKey: 'GitlabOrgDiscoveryEntityProvider:test-id',
},
];
@@ -24,6 +24,7 @@ import {
import { LoggerService } from '@backstage/backend-plugin-api';
import {
GitLabDescendantGroupsResponse,
GitLabFile,
GitLabGroup,
GitLabGroupMembersResponse,
GitLabProject,
@@ -46,6 +47,11 @@ interface ListProjectOptions extends CommonListOptions {
simple?: boolean;
}
interface ListFilesOptions extends CommonListOptions {
group?: string;
search?: string;
}
interface UserListOptions extends CommonListOptions {
without_project_bots?: boolean | undefined;
exclude_internal?: boolean | undefined;
@@ -171,6 +177,24 @@ export class GitLabClient {
return this.pagedRequest(`/groups`, options);
}
async listFiles(
options?: ListFilesOptions,
): Promise<PagedResponse<GitLabFile>> {
if (options?.group && options?.search) {
return this.pagedRequest(
`/groups/${encodeURIComponent(options?.group)}/search`,
{
...options,
scope: 'blob',
},
);
}
return {
items: [],
};
}
// https://docs.gitlab.com/ee/api/groups.html#list-group-details
// id can either be group id or encoded full path
async getGroupByPath(
@@ -84,6 +84,17 @@ export type GitLabGroup = {
parent_id?: number;
};
/**
* Representation of a GitLab file inside a project
*
* @public
*/
export type GitLabFile = {
path: string;
ref: string;
project_id: number;
};
export type GitLabGroupMembersResponse = {
errors: { message: string }[];
data: {
@@ -200,6 +211,11 @@ export type GitlabProviderConfig = {
**/
allowInherited?: boolean;
/**
* If true, use the GitLab search API to find projects locations.
*/
useSearch?: boolean;
/**
* Specifies the types of group membership relations that should be included when ingesting data.
*
@@ -336,6 +336,31 @@ describe('GitlabDiscoveryEntityProvider - refresh', () => {
});
});
// should use search to find entities to process
it('should find catalog from finding projects', async () => {
const config = new ConfigReader(mock.config_single_integration_with_search);
const schedule = new PersistingTaskRunner();
const entityProviderConnection: EntityProviderConnection = {
applyMutation: jest.fn(),
refresh: jest.fn(),
};
const provider = GitlabDiscoveryEntityProvider.fromConfig(config, {
logger,
schedule,
})[0];
expect((provider as any).config.useSearch).toBe(true);
await provider.connect(entityProviderConnection);
await provider.refresh(logger);
expect(entityProviderConnection.applyMutation).toHaveBeenCalledWith({
type: 'full',
entities: mock.expected_location_from_search_on_group_1,
});
});
// branch was set in the config
it('should ingest catalog from specific branch only', async () => {
const config = new ConfigReader(
@@ -208,7 +208,15 @@ export class GitlabDiscoveryEntityProvider implements EntityProvider {
);
}
const locations = await this.getEntities();
this.logger.info(
`Refreshing Gitlab entity discovery using ${
this.config.useSearch ? 'search' : 'discovery'
} mode`,
);
const locations = this.config.useSearch
? await this.searchEntities()
: await this.getEntities();
await this.connection.applyMutation({
type: 'full',
@@ -221,6 +229,56 @@ export class GitlabDiscoveryEntityProvider implements EntityProvider {
logger.info(`Processed ${locations.length} locations`);
}
/**
* Determine the location on GitLab to be ingested.
* Uses GitLab's search API to find projects matching provided configuration.
*
* @returns A list of location to be ingested
*/
private async searchEntities() {
const locations: LocationSpec[] = [];
let foundProjects = 0;
this.logger.info(`Using gitlab search API to lookup projects`);
const foundFiles = paginated(
options => this.gitLabClient.listFiles(options),
{
group: this.config.group,
search: `filename:${this.config.catalogFile}`,
page: 1,
per_page: 50,
},
);
for await (const foundFile of foundFiles) {
const project = await this.gitLabClient.getProjectById(
foundFile.project_id,
);
foundProjects++;
if (
project &&
this.isProjectCompliant(project) &&
this.isGroupCompliant(project.path_with_namespace)
) {
locations.push(
this.createLocationSpecFromParams(
project.web_url,
foundFile.ref,
foundFile.path,
),
);
}
}
this.logger.info(
`Processed ${locations.length} from ${foundProjects} found projects on API.`,
);
return locations;
}
/**
* Determine the location on GitLab to be ingested base on configured groups and filters.
*
@@ -345,17 +403,29 @@ export class GitlabDiscoveryEntityProvider implements EntityProvider {
return res;
}
private createLocationSpecFromParams(
projectURL: string,
branch: string,
catalogFile: string,
): LocationSpec {
return {
type: 'url',
target: `${projectURL}/-/blob/${branch}/${catalogFile}`,
presence: 'optional',
};
}
private createLocationSpec(project: GitLabProject): LocationSpec {
const project_branch =
this.config.branch ??
project.default_branch ??
this.config.fallbackBranch;
return {
type: 'url',
target: `${project.web_url}/-/blob/${project_branch}/${this.config.catalogFile}`,
presence: 'optional',
};
return this.createLocationSpecFromParams(
project.web_url,
project_branch,
this.config.catalogFile,
);
}
/**
@@ -546,10 +616,7 @@ export class GitlabDiscoveryEntityProvider implements EntityProvider {
});
}
private async shouldProcessProject(
project: GitLabProject,
client: GitLabClient,
): Promise<boolean> {
private isProjectCompliant(project: GitLabProject): boolean {
if (!this.config.projectPattern.test(project.path_with_namespace ?? '')) {
this.logger.debug(
`Skipping project ${project.path_with_namespace} as it does not match the project pattern ${this.config.projectPattern}.`,
@@ -584,6 +651,17 @@ export class GitlabDiscoveryEntityProvider implements EntityProvider {
return false;
}
return true;
}
private async shouldProcessProject(
project: GitLabProject,
client: GitLabClient,
): Promise<boolean> {
if (!this.isProjectCompliant(project)) {
return false;
}
const project_branch =
this.config.branch ??
project.default_branch ??
@@ -597,4 +675,16 @@ export class GitlabDiscoveryEntityProvider implements EntityProvider {
return hasFile;
}
private isGroupCompliant(name: string | undefined) {
const groupRegexes = Array.isArray(this.config.groupPattern)
? this.config.groupPattern
: [this.config.groupPattern];
if (name) {
return groupRegexes.some(reg => reg.test(name));
}
return false;
}
}
@@ -67,6 +67,7 @@ describe('config', () => {
includeUsersWithoutSeat: false,
membership: undefined,
topics: undefined,
useSearch: false,
}),
);
});
@@ -113,6 +114,7 @@ describe('config', () => {
includeUsersWithoutSeat: true,
membership: undefined,
topics: undefined,
useSearch: false,
}),
);
});
@@ -159,6 +161,7 @@ describe('config', () => {
includeUsersWithoutSeat: false,
membership: undefined,
topics: undefined,
useSearch: false,
}),
);
});
@@ -205,6 +208,7 @@ describe('config', () => {
includeUsersWithoutSeat: false,
membership: undefined,
topics: undefined,
useSearch: false,
}),
);
});
@@ -252,6 +256,7 @@ describe('config', () => {
includeUsersWithoutSeat: false,
membership: undefined,
topics: undefined,
useSearch: false,
}),
);
});
@@ -299,6 +304,7 @@ describe('config', () => {
includeUsersWithoutSeat: false,
membership: undefined,
topics: undefined,
useSearch: false,
schedule: {
frequency: { minutes: 30 },
timeout: {
@@ -391,6 +397,7 @@ describe('config', () => {
includeArchivedRepos: false,
membership: true,
topics: undefined,
useSearch: false,
}),
);
});
@@ -437,6 +444,7 @@ describe('config', () => {
includeArchivedRepos: false,
membership: undefined,
topics: undefined,
useSearch: false,
}),
);
});
@@ -483,6 +491,7 @@ describe('config', () => {
includeArchivedRepos: false,
membership: undefined,
topics: 'topic1',
useSearch: false,
}),
);
});
@@ -529,6 +538,7 @@ describe('config', () => {
includeArchivedRepos: false,
membership: undefined,
topics: 'topic1,topic2,topic3',
useSearch: false,
}),
);
});
@@ -54,6 +54,7 @@ function readGitlabConfig(id: string, config: Config): GitlabProviderConfig {
groupPattern = new RegExp(/[\s\S]*/);
}
const useSearch: boolean = config.getOptionalBoolean('useSearch') ?? false;
const orgEnabled: boolean = config.getOptionalBoolean('orgEnabled') ?? false;
const allowInherited: boolean =
config.getOptionalBoolean('allowInherited') ?? false;
@@ -96,6 +97,7 @@ function readGitlabConfig(id: string, config: Config): GitlabProviderConfig {
schedule,
orgEnabled,
allowInherited,
useSearch,
relations,
skipForkedRepos,
includeArchivedRepos,