diff --git a/.changeset/red-rivers-make.md b/.changeset/red-rivers-make.md new file mode 100644 index 0000000000..0c3a1835a4 --- /dev/null +++ b/.changeset/red-rivers-make.md @@ -0,0 +1,5 @@ +--- +'@backstage/plugin-catalog-backend-module-gitlab': minor +--- + +allow entity discoverability via gitlab search API diff --git a/docs/integrations/gitlab/discovery.md b/docs/integrations/gitlab/discovery.md index 3d47edba6d..4d801bdd77 100644 --- a/docs/integrations/gitlab/discovery.md +++ b/docs/integrations/gitlab/discovery.md @@ -154,11 +154,12 @@ catalog: fallbackBranch: master # Optional. Fallback to be used if there is no default branch configured at the Gitlab repository. It is only used, if `branch` is undefined. Uses `master` as default skipForkedRepos: false # Optional. If the project is a fork, skip repository includeArchivedRepos: false # Optional. If project is archived, include repository - group: example-group # Optional. Group and subgroup (if needed) to look for repositories. If not present the whole instance will be scanned + group: example-group # Optional (unless useSearch is true). Group and subgroup (if needed) to look for repositories. If not present the whole instance will be scanned groupPattern: # Optional. Filters for groups based on a list of RegEx. Default, no filters. - '^somegroup$' - 'anothergroup' entityFilename: catalog-info.yaml # Optional. Defaults to `catalog-info.yaml` + useSearch: false # Optional. Whether to use the GitLab group search API to find files. Requires Gitlab 'Premium' or 'Ultimate' licenses. Defaults to `false` projectPattern: '[\s\S]*' # Optional. Filters found projects based on provided pattern. Defaults to `[\s\S]*`, which means to not filter anything excludeRepos: [] # Optional. A list of project paths that should be excluded from discovery, e.g. group/subgroup/repo. Should not start or end with a slash. schedule: # Same options as in SchedulerServiceTaskScheduleDefinition. Optional for the Legacy Backend System diff --git a/plugins/catalog-backend-module-gitlab/report.api.md b/plugins/catalog-backend-module-gitlab/report.api.md index f5048835c1..95a1f43808 100644 --- a/plugins/catalog-backend-module-gitlab/report.api.md +++ b/plugins/catalog-backend-module-gitlab/report.api.md @@ -113,6 +113,7 @@ export type GitlabProviderConfig = { userPattern: RegExp; groupPattern: RegExp | RegExp[]; allowInherited?: boolean; + useSearch?: boolean; relations?: string[]; orgEnabled?: boolean; schedule?: SchedulerServiceTaskScheduleDefinition; diff --git a/plugins/catalog-backend-module-gitlab/src/__testUtils__/handlers.ts b/plugins/catalog-backend-module-gitlab/src/__testUtils__/handlers.ts index 2b28381ceb..16adb01c10 100644 --- a/plugins/catalog-backend-module-gitlab/src/__testUtils__/handlers.ts +++ b/plugins/catalog-backend-module-gitlab/src/__testUtils__/handlers.ts @@ -33,6 +33,7 @@ import { all_saas_subgroup_1_members, all_saas_subgroup_2_members, group_with_subgroups_response, + projects_with_catalog_info_yaml, } from './mocks'; const httpHandlers = [ @@ -197,6 +198,32 @@ const httpGroupFindByEncodedPathDynamic = all_groups_response.flatMap(group => [ ), ]); +const httpSearchFilesInGroupDynamic = all_groups_response.map(group => { + return rest.get( + `${apiBaseUrl}/groups/${encodeURIComponent(group.full_path)}/search`, + (_, res, ctx) => { + const searchResults = projects_with_catalog_info_yaml + .filter(project => + project.path_with_namespace?.startsWith(group.full_path), + ) + .map(project => { + return { + basename: 'catalog-info', + data: 'catalog-info.yaml', + path: 'catalog-info.yaml', + filename: 'catalog-info.yaml', + id: null, + ref: project.default_branch, + startline: 0, + project_id: project.id, + }; + }); + + return res(ctx.json(searchResults)); + }, + ); +}); + const httpGroupFindByIdDynamic = all_groups_response.map(group => { return rest.get(`${apiBaseUrl}/groups/${group.id}`, (_, res, ctx) => { return res(ctx.json(all_groups_response.find(g => g.id === group.id))); @@ -741,4 +768,5 @@ export const handlers = [ ...httpGroupListDescendantProjectsByFullPath, ...graphqlHandlers, ...httpGroupFindByEncodedPathDynamic, + ...httpSearchFilesInGroupDynamic, ]; diff --git a/plugins/catalog-backend-module-gitlab/src/__testUtils__/mocks.ts b/plugins/catalog-backend-module-gitlab/src/__testUtils__/mocks.ts index e306090f19..ba326971c6 100644 --- a/plugins/catalog-backend-module-gitlab/src/__testUtils__/mocks.ts +++ b/plugins/catalog-backend-module-gitlab/src/__testUtils__/mocks.ts @@ -57,6 +57,18 @@ export const config_saas: MockObject = { baseUrl: 'https://gitlab.com', }; +export const config_partial_add_search: MockObject = { + catalog: { + providers: { + gitlab: { + 'test-id': { + useSearch: true, + }, + }, + }, + }, +}; + export const config_no_org_integration: MockObject = { integrations: { gitlab: [ @@ -162,6 +174,47 @@ export const config_github_host: MockObject = { }, }; +export const config_partial_test_id_use_search: MockObject = { + catalog: { + providers: { + gitlab: { + 'test-id': { + useSearch: true, + }, + }, + }, + }, +}; + +export const config_single_integration_with_search: MockObject = { + integrations: { + gitlab: [ + { + host: 'example.com', + apiBaseUrl: 'https://example.com/api/v4', + token: '1234', + }, + ], + }, + catalog: { + providers: { + gitlab: { + 'test-id': { + host: 'example.com', + group: 'group1', + entityFilename: 'catalog-info.yaml', + useSearch: true, + skipForkedRepos: true, + schedule: { + frequency: 'PT30M', + timeout: 'PT3M', + }, + }, + }, + }, + }, +}; + export const config_single_integration: MockObject = { integrations: { gitlab: [ @@ -1377,6 +1430,16 @@ export const expectedSaasGroup: MockObject[] = [ }, ]; +/** + * Subset of projects for which we consider they will have a catalog-info.yaml in their default branch. + * Used for search tests. + */ +export const projects_with_catalog_info_yaml: MockObject[] = + all_projects_response + .filter(project => !project.archived) + .filter(project => project.path_with_namespace?.startsWith('group1/')) + .filter(project => project.default_branch === 'main'); + /** * GitLab Events */ @@ -1842,6 +1905,35 @@ export const push_modif_event: EventParams = { * Expected Backstage entities */ +export const expected_location_from_search_on_group_1: MockObject[] = + projects_with_catalog_info_yaml + .filter(project => !project.forked_from_project) + .map(project => { + const targetUrl = `https://example.com/${project.path_with_namespace}/-/blob/${project.default_branch}/catalog-info.yaml`; + return { + entity: { + apiVersion: 'backstage.io/v1alpha1', + kind: 'Location', + metadata: { + annotations: { + 'backstage.io/managed-by-location': `url:${targetUrl}`, + 'backstage.io/managed-by-origin-location': `url:${targetUrl}`, + }, + name: locationSpecToMetadataName({ + target: targetUrl, + type: 'url', + }), + }, + spec: { + presence: 'optional', + target: targetUrl, + type: 'url', + }, + }, + locationKey: 'GitlabDiscoveryEntityProvider:test-id', + }; + }); + // includes only projects that have a default branch (for when the branch and fallback branch were not set in the config) export const expected_location_entities_default_branch: MockObject[] = all_projects_response @@ -2958,3 +3050,79 @@ export const all_self_hosted_group1_members: MockObject[] = [ web_url: 'https://gitlab.example/mario_mario', }, ]; + +export const expected_projects_with_catalog_info_yaml_entities: MockObject[] = [ + { + entity: { + apiVersion: 'backstage.io/v1alpha1', + kind: 'User', + metadata: { + annotations: { + 'backstage.io/managed-by-location': 'url:https://example.com/JohnDoe', + 'backstage.io/managed-by-origin-location': + 'url:https://example.com/JohnDoe', + 'example.com/user-login': 'https://gitlab.example/john_doe', + }, + name: 'JohnDoe', + }, + spec: { + memberOf: ['subgroup1', 'group1'], + profile: { + displayName: 'John Doe', + email: 'john.doe@company.com', + picture: 'https://secure.gravatar.com/', + }, + }, + }, + locationKey: 'GitlabOrgDiscoveryEntityProvider:test-id', + }, + { + entity: { + apiVersion: 'backstage.io/v1alpha1', + kind: 'Group', + metadata: { + annotations: { + 'backstage.io/managed-by-location': + 'url:https://example.com/group1/subgroup1', + 'backstage.io/managed-by-origin-location': + 'url:https://example.com/group1/subgroup1', + 'example.com/team-path': 'group1/subgroup1', + }, + name: 'subgroup1', + description: 'description1', + }, + spec: { + children: [], + profile: { + displayName: 'subgroup1', + }, + type: 'team', + }, + }, + locationKey: 'GitlabOrgDiscoveryEntityProvider:test-id', + }, + { + entity: { + apiVersion: 'backstage.io/v1alpha1', + kind: 'Group', + metadata: { + annotations: { + 'backstage.io/managed-by-location': 'url:https://example.com/group1', + 'backstage.io/managed-by-origin-location': + 'url:https://example.com/group1', + 'example.com/team-path': 'group1', + }, + name: 'group1', + description: 'description1', + }, + spec: { + children: [], + profile: { + displayName: 'group1', + }, + type: 'team', + }, + }, + locationKey: 'GitlabOrgDiscoveryEntityProvider:test-id', + }, +]; diff --git a/plugins/catalog-backend-module-gitlab/src/lib/client.ts b/plugins/catalog-backend-module-gitlab/src/lib/client.ts index 5c373dcf4a..4f220deb68 100644 --- a/plugins/catalog-backend-module-gitlab/src/lib/client.ts +++ b/plugins/catalog-backend-module-gitlab/src/lib/client.ts @@ -24,6 +24,7 @@ import { import { LoggerService } from '@backstage/backend-plugin-api'; import { GitLabDescendantGroupsResponse, + GitLabFile, GitLabGroup, GitLabGroupMembersResponse, GitLabProject, @@ -46,6 +47,11 @@ interface ListProjectOptions extends CommonListOptions { simple?: boolean; } +interface ListFilesOptions extends CommonListOptions { + group?: string; + search?: string; +} + interface UserListOptions extends CommonListOptions { without_project_bots?: boolean | undefined; exclude_internal?: boolean | undefined; @@ -171,6 +177,24 @@ export class GitLabClient { return this.pagedRequest(`/groups`, options); } + async listFiles( + options?: ListFilesOptions, + ): Promise> { + if (options?.group && options?.search) { + return this.pagedRequest( + `/groups/${encodeURIComponent(options?.group)}/search`, + { + ...options, + scope: 'blob', + }, + ); + } + + return { + items: [], + }; + } + // https://docs.gitlab.com/ee/api/groups.html#list-group-details // id can either be group id or encoded full path async getGroupByPath( diff --git a/plugins/catalog-backend-module-gitlab/src/lib/types.ts b/plugins/catalog-backend-module-gitlab/src/lib/types.ts index aa2f73b35d..0d00c956e5 100644 --- a/plugins/catalog-backend-module-gitlab/src/lib/types.ts +++ b/plugins/catalog-backend-module-gitlab/src/lib/types.ts @@ -84,6 +84,17 @@ export type GitLabGroup = { parent_id?: number; }; +/** + * Representation of a GitLab file inside a project + * + * @public + */ +export type GitLabFile = { + path: string; + ref: string; + project_id: number; +}; + export type GitLabGroupMembersResponse = { errors: { message: string }[]; data: { @@ -200,6 +211,11 @@ export type GitlabProviderConfig = { **/ allowInherited?: boolean; + /** + * If true, use the GitLab search API to find projects locations. + */ + useSearch?: boolean; + /** * Specifies the types of group membership relations that should be included when ingesting data. * diff --git a/plugins/catalog-backend-module-gitlab/src/providers/GitlabDiscoveryEntityProvider.test.ts b/plugins/catalog-backend-module-gitlab/src/providers/GitlabDiscoveryEntityProvider.test.ts index 31ac7df6f1..3dc979e776 100644 --- a/plugins/catalog-backend-module-gitlab/src/providers/GitlabDiscoveryEntityProvider.test.ts +++ b/plugins/catalog-backend-module-gitlab/src/providers/GitlabDiscoveryEntityProvider.test.ts @@ -336,6 +336,31 @@ describe('GitlabDiscoveryEntityProvider - refresh', () => { }); }); + // should use search to find entities to process + it('should find catalog from finding projects', async () => { + const config = new ConfigReader(mock.config_single_integration_with_search); + const schedule = new PersistingTaskRunner(); + const entityProviderConnection: EntityProviderConnection = { + applyMutation: jest.fn(), + refresh: jest.fn(), + }; + const provider = GitlabDiscoveryEntityProvider.fromConfig(config, { + logger, + schedule, + })[0]; + + expect((provider as any).config.useSearch).toBe(true); + + await provider.connect(entityProviderConnection); + + await provider.refresh(logger); + + expect(entityProviderConnection.applyMutation).toHaveBeenCalledWith({ + type: 'full', + entities: mock.expected_location_from_search_on_group_1, + }); + }); + // branch was set in the config it('should ingest catalog from specific branch only', async () => { const config = new ConfigReader( diff --git a/plugins/catalog-backend-module-gitlab/src/providers/GitlabDiscoveryEntityProvider.ts b/plugins/catalog-backend-module-gitlab/src/providers/GitlabDiscoveryEntityProvider.ts index 81816cdebd..06b3f8c158 100644 --- a/plugins/catalog-backend-module-gitlab/src/providers/GitlabDiscoveryEntityProvider.ts +++ b/plugins/catalog-backend-module-gitlab/src/providers/GitlabDiscoveryEntityProvider.ts @@ -208,7 +208,15 @@ export class GitlabDiscoveryEntityProvider implements EntityProvider { ); } - const locations = await this.getEntities(); + this.logger.info( + `Refreshing Gitlab entity discovery using ${ + this.config.useSearch ? 'search' : 'discovery' + } mode`, + ); + + const locations = this.config.useSearch + ? await this.searchEntities() + : await this.getEntities(); await this.connection.applyMutation({ type: 'full', @@ -221,6 +229,56 @@ export class GitlabDiscoveryEntityProvider implements EntityProvider { logger.info(`Processed ${locations.length} locations`); } + /** + * Determine the location on GitLab to be ingested. + * Uses GitLab's search API to find projects matching provided configuration. + * + * @returns A list of location to be ingested + */ + private async searchEntities() { + const locations: LocationSpec[] = []; + let foundProjects = 0; + + this.logger.info(`Using gitlab search API to lookup projects`); + + const foundFiles = paginated( + options => this.gitLabClient.listFiles(options), + { + group: this.config.group, + search: `filename:${this.config.catalogFile}`, + page: 1, + per_page: 50, + }, + ); + + for await (const foundFile of foundFiles) { + const project = await this.gitLabClient.getProjectById( + foundFile.project_id, + ); + foundProjects++; + + if ( + project && + this.isProjectCompliant(project) && + this.isGroupCompliant(project.path_with_namespace) + ) { + locations.push( + this.createLocationSpecFromParams( + project.web_url, + foundFile.ref, + foundFile.path, + ), + ); + } + } + + this.logger.info( + `Processed ${locations.length} from ${foundProjects} found projects on API.`, + ); + + return locations; + } + /** * Determine the location on GitLab to be ingested base on configured groups and filters. * @@ -345,17 +403,29 @@ export class GitlabDiscoveryEntityProvider implements EntityProvider { return res; } + private createLocationSpecFromParams( + projectURL: string, + branch: string, + catalogFile: string, + ): LocationSpec { + return { + type: 'url', + target: `${projectURL}/-/blob/${branch}/${catalogFile}`, + presence: 'optional', + }; + } + private createLocationSpec(project: GitLabProject): LocationSpec { const project_branch = this.config.branch ?? project.default_branch ?? this.config.fallbackBranch; - return { - type: 'url', - target: `${project.web_url}/-/blob/${project_branch}/${this.config.catalogFile}`, - presence: 'optional', - }; + return this.createLocationSpecFromParams( + project.web_url, + project_branch, + this.config.catalogFile, + ); } /** @@ -546,10 +616,7 @@ export class GitlabDiscoveryEntityProvider implements EntityProvider { }); } - private async shouldProcessProject( - project: GitLabProject, - client: GitLabClient, - ): Promise { + private isProjectCompliant(project: GitLabProject): boolean { if (!this.config.projectPattern.test(project.path_with_namespace ?? '')) { this.logger.debug( `Skipping project ${project.path_with_namespace} as it does not match the project pattern ${this.config.projectPattern}.`, @@ -584,6 +651,17 @@ export class GitlabDiscoveryEntityProvider implements EntityProvider { return false; } + return true; + } + + private async shouldProcessProject( + project: GitLabProject, + client: GitLabClient, + ): Promise { + if (!this.isProjectCompliant(project)) { + return false; + } + const project_branch = this.config.branch ?? project.default_branch ?? @@ -597,4 +675,16 @@ export class GitlabDiscoveryEntityProvider implements EntityProvider { return hasFile; } + + private isGroupCompliant(name: string | undefined) { + const groupRegexes = Array.isArray(this.config.groupPattern) + ? this.config.groupPattern + : [this.config.groupPattern]; + + if (name) { + return groupRegexes.some(reg => reg.test(name)); + } + + return false; + } } diff --git a/plugins/catalog-backend-module-gitlab/src/providers/config.test.ts b/plugins/catalog-backend-module-gitlab/src/providers/config.test.ts index 8790a3fe53..f6c8021655 100644 --- a/plugins/catalog-backend-module-gitlab/src/providers/config.test.ts +++ b/plugins/catalog-backend-module-gitlab/src/providers/config.test.ts @@ -67,6 +67,7 @@ describe('config', () => { includeUsersWithoutSeat: false, membership: undefined, topics: undefined, + useSearch: false, }), ); }); @@ -113,6 +114,7 @@ describe('config', () => { includeUsersWithoutSeat: true, membership: undefined, topics: undefined, + useSearch: false, }), ); }); @@ -159,6 +161,7 @@ describe('config', () => { includeUsersWithoutSeat: false, membership: undefined, topics: undefined, + useSearch: false, }), ); }); @@ -205,6 +208,7 @@ describe('config', () => { includeUsersWithoutSeat: false, membership: undefined, topics: undefined, + useSearch: false, }), ); }); @@ -252,6 +256,7 @@ describe('config', () => { includeUsersWithoutSeat: false, membership: undefined, topics: undefined, + useSearch: false, }), ); }); @@ -299,6 +304,7 @@ describe('config', () => { includeUsersWithoutSeat: false, membership: undefined, topics: undefined, + useSearch: false, schedule: { frequency: { minutes: 30 }, timeout: { @@ -391,6 +397,7 @@ describe('config', () => { includeArchivedRepos: false, membership: true, topics: undefined, + useSearch: false, }), ); }); @@ -437,6 +444,7 @@ describe('config', () => { includeArchivedRepos: false, membership: undefined, topics: undefined, + useSearch: false, }), ); }); @@ -483,6 +491,7 @@ describe('config', () => { includeArchivedRepos: false, membership: undefined, topics: 'topic1', + useSearch: false, }), ); }); @@ -529,6 +538,7 @@ describe('config', () => { includeArchivedRepos: false, membership: undefined, topics: 'topic1,topic2,topic3', + useSearch: false, }), ); }); diff --git a/plugins/catalog-backend-module-gitlab/src/providers/config.ts b/plugins/catalog-backend-module-gitlab/src/providers/config.ts index f635d4c432..48fc1052b3 100644 --- a/plugins/catalog-backend-module-gitlab/src/providers/config.ts +++ b/plugins/catalog-backend-module-gitlab/src/providers/config.ts @@ -54,6 +54,7 @@ function readGitlabConfig(id: string, config: Config): GitlabProviderConfig { groupPattern = new RegExp(/[\s\S]*/); } + const useSearch: boolean = config.getOptionalBoolean('useSearch') ?? false; const orgEnabled: boolean = config.getOptionalBoolean('orgEnabled') ?? false; const allowInherited: boolean = config.getOptionalBoolean('allowInherited') ?? false; @@ -96,6 +97,7 @@ function readGitlabConfig(id: string, config: Config): GitlabProviderConfig { schedule, orgEnabled, allowInherited, + useSearch, relations, skipForkedRepos, includeArchivedRepos,