feat(sanitizer): add support for allowlisting additional URI protocols

Signed-off-by: Nick Hudkins <nick@nickhudkins.com>
This commit is contained in:
Nick Hudkins
2025-07-17 12:39:23 -04:00
parent e656551f8f
commit cb0541f294
5 changed files with 87 additions and 0 deletions
+5
View File
@@ -0,0 +1,5 @@
---
'@backstage/plugin-techdocs': minor
---
Adds `additionalAllowedURIProtocols` to sanitizer config
+19
View File
@@ -606,6 +606,25 @@ techdocs:
This way, custom element like `<backstage-element attribute1="value"></backstage-element>` will be allowed in the result HTML.
## How to allow additional URI protocols in TechDocs
TechDocs uses the [DOMPurify](https://github.com/cure53/DOMPurify) library to
sanitize HTML and prevent XSS attacks.
It's possible to allow additional URI protocols based on a list of protocols. To do
this, add the allowed protocols in the `techdocs.sanitizer.additionalAllowedURIProtocols`
and `additionalAllowedURIProtocols` configuration of your `app-config.yaml`.
For example:
```yaml
techdocs:
sanitizer:
additionalAllowedURIProtocols: ["vscode"],
```
This way, links like `<a href="vscode://settings/">VSCode Settings<a>` will be allowed in the result HTML
## How to render PlantUML diagram in TechDocs
PlantUML allows you to create diagrams from plain text language. Each diagram description begins with the keyword - (@startXYZ and @endXYZ, depending on the kind of diagram). For UML Diagrams, Keywords @startuml & @enduml should be used. Further details for all types of diagrams can be found at [PlantUML Language Reference Guide](https://plantuml.com/guide).
+10
View File
@@ -58,6 +58,16 @@ export interface Config {
* @visibility frontend
*/
allowedCustomElementAttributeNameRegExp?: string;
/**
* Allows listed protocols in attributes with URI values
* Example:
* additionalAllowedURIProtocols: ['vscode']
* this will allow all attributes with URI values to have `vscode` protocol like `vscode://some/path` in addition to the default protocols
* matched by DOMPurify's IS_ALLOWED_URI RegExp:
* @see: https://raw.githubusercontent.com/cure53/DOMPurify/master/src/regexp.ts
* @visibility frontend
*/
additionalAllowedURIProtocols?: string;
};
};
}
@@ -28,6 +28,7 @@ const configApiMock: ConfigApi = new ConfigReader({
sanitizer: {
allowedCustomElementTagNameRegExp: '^backstage-',
allowedCustomElementAttributeNameRegExp: 'attribute1|attribute2',
additionalAllowedURIProtocols: ['permitted'],
},
},
});
@@ -39,6 +40,28 @@ const wrapper: FC<PropsWithChildren<{}>> = ({ children }) => (
);
describe('Transformers > Html > Sanitizer Custom Elements', () => {
it('allows additional protocols in URIs when provided via config', async () => {
const { result } = renderHook(() => useSanitizerTransformer(), { wrapper });
const dirtyDom = document.createElement('html');
const dirtyHTML = `
<body>
<a href="permitted:mcp/install">Yep</a>
<a href="nope://not-allowed">Nope</a>
<a href="https://example.com">Example</a>
</body>`;
dirtyDom.innerHTML = dirtyHTML;
const clearDom = await result.current(dirtyDom); // calling html transformer
const elements = Array.from(
clearDom.querySelectorAll<HTMLAnchorElement>('body > a'),
);
expect(elements).toHaveLength(3);
expect(elements[0].getAttribute('href')).toEqual('permitted:mcp/install');
expect(elements[1].getAttribute('href')).toBeNull();
expect(elements[2].getAttribute('href')).toEqual('https://example.com');
});
it('should return a function that allows custom elements matching the pattern in the given dom element', async () => {
const { result } = renderHook(() => useSanitizerTransformer(), { wrapper });
@@ -78,6 +78,35 @@ export const useSanitizerTransformer = (): Transformer => {
const attributeNameCheck = config?.getOptionalString(
'allowedCustomElementAttributeNameRegExp',
);
const additionalAllowedURIProtocols =
config?.getOptionalStringArray('additionalAllowedURIProtocols') || [];
// Define allowed URI protocols, including any additional ones from the config.
// The default protocols are based on the DOMPurify defaults.
const allowedURIProtocols = [
'callto',
'cid',
'ftp',
'ftps',
'http',
'https',
'mailto',
'matrix',
'sms',
'tel',
'xmpp',
...additionalAllowedURIProtocols,
].filter(Boolean);
const allowedURIRegExp = new RegExp(
// This regex is not exposed by DOMPurify, so we need to define it ourselves.
// It is possible for this to drift from the default in future versions of DOMPurify.
// See: https://raw.githubusercontent.com/cure53/DOMPurify/master/src/regexp.ts
`^(?:${allowedURIProtocols.join(
'|',
)}:|[^a-z]|[a-z+.-]+(?:[^a-z+.\\-:]|$))`,
'i',
);
// using outerHTML as we want to preserve the html tag attributes (lang)
return DOMPurify.sanitize(dom.outerHTML, {
@@ -86,6 +115,7 @@ export const useSanitizerTransformer = (): Transformer => {
ADD_ATTR: ['http-equiv', 'content', 'dominant-baseline'],
WHOLE_DOCUMENT: true,
RETURN_DOM: true,
ALLOWED_URI_REGEXP: allowedURIRegExp,
CUSTOM_ELEMENT_HANDLING: {
tagNameCheck: tagNameCheck ? new RegExp(tagNameCheck) : undefined,
attributeNameCheck: attributeNameCheck