Skip to content

Commit

Permalink
Single style capture (#1437)
Browse files Browse the repository at this point in the history
Support a contrived/rare case where a <style> element has multiple text node children (this is usually only possible to recreate via javascript append) ... this PR fixes cases where there are subsequent text mutations to these nodes; previously these would have been lost

* In this scenario, a new CSS comment may now be inserted into the captured `_cssText` for a <style> element to show where it should be broken up into text elements upon replay: `/* rr_split */`
* The new 'can record and replay style mutations' test is the principal way to the problematic scenarios, and is a detailed 'catch-all' test with many checks to cover most of the ways things can fail
* There are new tests for splitting/rebuilding the css using the rr_split marker
* The prior 'dynamic stylesheet' route is now the main route for serializing a stylesheet; dynamic stylesheet were missed out in #1533 but that case is now covered with this PR

This PR was originally extracted from #1475 so the  initial motivation was to change the approach on stringifying <style> elements to do so in a single place.  This is also the motivating factor for always serializing <style> elements via the `_cssText` attribute rather than in it's childNodes; in #1475 we will be delaying populating `_cssText` for performance and instead recorrding them as assets.

Thanks for the detailed review to  Justin Halsall <[email protected]> & Yun Feng <https://github.com/YunFeng0817>
  • Loading branch information
eoghanmurray authored Aug 6, 2024
1 parent 8837fe3 commit 5fbb904
Show file tree
Hide file tree
Showing 19 changed files with 1,606 additions and 398 deletions.
6 changes: 6 additions & 0 deletions .changeset/single-style-capture.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
---
"rrweb-snapshot": patch
"rrweb": patch
---

Edge case: Provide support for mutations on a <style> element which (unusually) has multiple text nodes
1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
"eslint-plugin-compat": "^5.0.0",
"eslint-plugin-jest": "^27.6.0",
"eslint-plugin-tsdoc": "^0.2.17",
"happy-dom": "^14.12.0",
"markdownlint": "^0.25.1",
"markdownlint-cli": "^0.31.1",
"prettier": "2.8.4",
Expand Down
1 change: 0 additions & 1 deletion packages/rrdom/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@
"@typescript-eslint/eslint-plugin": "^5.23.0",
"@typescript-eslint/parser": "^5.23.0",
"eslint": "^8.15.0",
"happy-dom": "^14.12.0",
"puppeteer": "^17.1.3",
"typescript": "^5.4.5",
"vite": "^5.3.1",
Expand Down
98 changes: 85 additions & 13 deletions packages/rrweb-snapshot/src/rebuild.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { mediaSelectorPlugin, pseudoClassPlugin } from './css';
import {
type serializedNodeWithId,
type serializedElementNodeWithId,
type serializedTextNodeWithId,
NodeType,
type tagMap,
type elementNode,
Expand Down Expand Up @@ -78,6 +80,77 @@ export function createCache(): BuildCache {
};
}

/**
* undo splitCssText/markCssSplits
* (would move to utils.ts but uses `adaptCssForReplay`)
*/
export function applyCssSplits(
n: serializedElementNodeWithId,
cssText: string,
hackCss: boolean,
cache: BuildCache,
): void {
const childTextNodes: serializedTextNodeWithId[] = [];
for (const scn of n.childNodes) {
if (scn.type === NodeType.Text) {
childTextNodes.push(scn);
}
}
const cssTextSplits = cssText.split('/* rr_split */');
while (
cssTextSplits.length > 1 &&
cssTextSplits.length > childTextNodes.length
) {
// unexpected: remerge the last two so that we don't discard any css
cssTextSplits.splice(-2, 2, cssTextSplits.slice(-2).join(''));
}
for (let i = 0; i < childTextNodes.length; i++) {
const childTextNode = childTextNodes[i];
const cssTextSection = cssTextSplits[i];
if (childTextNode && cssTextSection) {
// id will be assigned when these child nodes are
// iterated over in buildNodeWithSN
childTextNode.textContent = hackCss
? adaptCssForReplay(cssTextSection, cache)
: cssTextSection;
}
}
}

/**
* Normally a <style> element has a single textNode containing the rules.
* During serialization, we bypass this (`styleEl.sheet`) to get the rules the
* browser sees and serialize this to a special _cssText attribute, blanking
* out any text nodes. This function reverses that and also handles cases where
* there were no textNode children present (dynamic css/or a <link> element) as
* well as multiple textNodes, which need to be repopulated (based on presence of
* a special `rr_split` marker in case they are modified by subsequent mutations.
*/
export function buildStyleNode(
n: serializedElementNodeWithId,
styleEl: HTMLStyleElement, // when inlined, a <link type="stylesheet"> also gets rebuilt as a <style>
cssText: string,
options: {
doc: Document;
hackCss: boolean;
cache: BuildCache;
},
) {
const { doc, hackCss, cache } = options;
if (n.childNodes.length) {
applyCssSplits(n, cssText, hackCss, cache);
} else {
if (hackCss) {
cssText = adaptCssForReplay(cssText, cache);
}
/**
<link> element or dynamic <style> are serialized without any child nodes
we create the text node without an ID or presence in mirror as it can't
*/
styleEl.appendChild(doc.createTextNode(cssText));
}
}

function buildNode(
n: serializedNodeWithId,
options: {
Expand Down Expand Up @@ -154,14 +227,13 @@ function buildNode(
continue;
}

const isTextarea = tagName === 'textarea' && name === 'value';
const isRemoteOrDynamicCss = tagName === 'style' && name === '_cssText';
if (isRemoteOrDynamicCss && hackCss && typeof value === 'string') {
value = adaptCssForReplay(value, cache);
}
if ((isTextarea || isRemoteOrDynamicCss) && typeof value === 'string') {
// https://github.com/rrweb-io/rrweb/issues/112
// https://github.com/rrweb-io/rrweb/pull/1351
if (typeof value !== 'string') {
// pass
} else if (tagName === 'style' && name === '_cssText') {
buildStyleNode(n, node as HTMLStyleElement, value, options);
continue; // no need to set _cssText as attribute
} else if (tagName === 'textarea' && name === 'value') {
// create without an ID or presence in mirror
node.appendChild(doc.createTextNode(value));
n.childNodes = []; // value overrides childNodes
continue;
Expand Down Expand Up @@ -317,11 +389,11 @@ function buildNode(
return node;
}
case NodeType.Text:
return doc.createTextNode(
n.isStyle && hackCss
? adaptCssForReplay(n.textContent, cache)
: n.textContent,
);
if (n.isStyle && hackCss) {
// support legacy style
return doc.createTextNode(adaptCssForReplay(n.textContent, cache));
}
return doc.createTextNode(n.textContent);
case NodeType.CDATA:
return doc.createCDATASection(n.textContent);
case NodeType.Comment:
Expand Down
75 changes: 38 additions & 37 deletions packages/rrweb-snapshot/src/snapshot.ts
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ import {
toLowerCase,
extractFileExtension,
absolutifyURLs,
markCssSplits,
} from './utils';
import dom from '@rrweb/utils';

Expand Down Expand Up @@ -403,6 +404,7 @@ function serializeNode(
* `newlyAddedElement: true` skips scrollTop and scrollLeft check
*/
newlyAddedElement?: boolean;
cssCaptured?: boolean;
},
): serializedNode | false {
const {
Expand All @@ -420,6 +422,7 @@ function serializeNode(
recordCanvas,
keepIframeSrcFn,
newlyAddedElement = false,
cssCaptured = false,
} = options;
// Only record root id when document object is not the base document
const rootId = getRootId(doc, mirror);
Expand Down Expand Up @@ -466,6 +469,7 @@ function serializeNode(
needsMask,
maskTextFn,
rootId,
cssCaptured,
});
case n.CDATA_SECTION_NODE:
return {
Expand Down Expand Up @@ -497,48 +501,38 @@ function serializeTextNode(
needsMask: boolean;
maskTextFn: MaskTextFn | undefined;
rootId: number | undefined;
cssCaptured?: boolean;
},
): serializedNode {
const { needsMask, maskTextFn, rootId } = options;
const { needsMask, maskTextFn, rootId, cssCaptured } = options;
// The parent node may not be a html element which has a tagName attribute.
// So just let it be undefined which is ok in this use case.
const parent = dom.parentNode(n);
const parentTagName = parent && (parent as HTMLElement).tagName;
let text = dom.textContent(n);
let textContent: string | null = '';
const isStyle = parentTagName === 'STYLE' ? true : undefined;
const isScript = parentTagName === 'SCRIPT' ? true : undefined;
if (isStyle && text) {
try {
// try to read style sheet
if (n.nextSibling || n.previousSibling) {
// This is not the only child of the stylesheet.
// We can't read all of the sheet's .cssRules and expect them
// to _only_ include the current rule(s) added by the text node.
// So we'll be conservative and keep textContent as-is.
} else if ((parent as HTMLStyleElement).sheet?.cssRules) {
text = stringifyStylesheet((parent as HTMLStyleElement).sheet!);
}
} catch (err) {
console.warn(
`Cannot get CSS styles from text's parentNode. Error: ${err as string}`,
n,
);
}
text = absolutifyURLs(text, getHref(options.doc));
}
if (isScript) {
text = 'SCRIPT_PLACEHOLDER';
textContent = 'SCRIPT_PLACEHOLDER';
} else if (!cssCaptured) {
textContent = dom.textContent(n);
if (isStyle && textContent) {
// mutation only: we don't need to use stringifyStylesheet
// as a <style> text node mutation obliterates any previous
// programmatic rule manipulation (.insertRule etc.)
// so the current textContent represents the most up to date state
textContent = absolutifyURLs(textContent, getHref(options.doc));
}
}
if (!isStyle && !isScript && text && needsMask) {
text = maskTextFn
? maskTextFn(text, dom.parentElement(n))
: text.replace(/[\S]/g, '*');
if (!isStyle && !isScript && textContent && needsMask) {
textContent = maskTextFn
? maskTextFn(textContent, dom.parentElement(n))
: textContent.replace(/[\S]/g, '*');
}

return {
type: NodeType.Text,
textContent: text || '',
isStyle,
textContent: textContent || '',
rootId,
};
}
Expand Down Expand Up @@ -608,17 +602,14 @@ function serializeElementNode(
attributes._cssText = cssText;
}
}
// dynamic stylesheet
if (
tagName === 'style' &&
(n as HTMLStyleElement).sheet &&
// TODO: Currently we only try to get dynamic stylesheet when it is an empty style element
!(n.innerText || dom.textContent(n) || '').trim().length
) {
const cssText = stringifyStylesheet(
if (tagName === 'style' && (n as HTMLStyleElement).sheet) {
let cssText = stringifyStylesheet(
(n as HTMLStyleElement).sheet as CSSStyleSheet,
);
if (cssText) {
if (n.childNodes.length > 1) {
cssText = markCssSplits(cssText, n as HTMLStyleElement);
}
attributes._cssText = cssText;
}
}
Expand Down Expand Up @@ -937,6 +928,7 @@ export function serializeNodeWithId(
node: serializedElementNodeWithId,
) => unknown;
stylesheetLoadTimeout?: number;
cssCaptured?: boolean;
},
): serializedNodeWithId | null {
const {
Expand All @@ -962,6 +954,7 @@ export function serializeNodeWithId(
stylesheetLoadTimeout = 5000,
keepIframeSrcFn = () => false,
newlyAddedElement = false,
cssCaptured = false,
} = options;
let { needsMask } = options;
let { preserveWhiteSpace = true } = options;
Expand Down Expand Up @@ -992,6 +985,7 @@ export function serializeNodeWithId(
recordCanvas,
keepIframeSrcFn,
newlyAddedElement,
cssCaptured,
});
if (!_serializedNode) {
// TODO: dev only
Expand All @@ -1007,7 +1001,6 @@ export function serializeNodeWithId(
slimDOMExcluded(_serializedNode, slimDOMOptions) ||
(!preserveWhiteSpace &&
_serializedNode.type === NodeType.Text &&
!_serializedNode.isStyle &&
!_serializedNode.textContent.replace(/^\s+|\s+$/gm, '').length)
) {
id = IGNORED_NODE;
Expand Down Expand Up @@ -1072,6 +1065,7 @@ export function serializeNodeWithId(
onStylesheetLoad,
stylesheetLoadTimeout,
keepIframeSrcFn,
cssCaptured: false,
};

if (
Expand All @@ -1081,6 +1075,13 @@ export function serializeNodeWithId(
) {
// value parameter in DOM reflects the correct value, so ignore childNode
} else {
if (
serializedNode.type === NodeType.Element &&
(serializedNode as elementNode).attributes._cssText !== undefined &&
typeof serializedNode.attributes._cssText === 'string'
) {
bypassOptions.cssCaptured = true;
}
for (const childN of Array.from(dom.childNodes(n))) {
const serializedChildNode = serializeNodeWithId(childN, bypassOptions);
if (serializedChildNode) {
Expand Down
22 changes: 20 additions & 2 deletions packages/rrweb-snapshot/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,18 @@ export type documentTypeNode = {
systemId: string;
};

export type attributes = {
[key: string]: string | number | true | null;
type cssTextKeyAttr = {
_cssText?: string;
};

export type attributes = cssTextKeyAttr & {
[key: string]:
| string
| number // properties e.g. rr_scrollLeft or rr_mediaCurrentTime
| true // e.g. checked on <input type="radio">
| null; // an indication that an attribute was removed (during a mutation)
};

export type legacyAttributes = {
/**
* @deprecated old bug in rrweb was causing these to always be set
Expand All @@ -45,6 +54,10 @@ export type elementNode = {
export type textNode = {
type: NodeType.Text;
textContent: string;
/**
* @deprecated styles are now always snapshotted against parent <style> element
* style mutations can still happen via an added textNode, but they don't need this attribute for correct replay
*/
isStyle?: true;
};

Expand Down Expand Up @@ -78,6 +91,11 @@ export type serializedElementNodeWithId = Extract<
Record<'type', NodeType.Element>
>;

export type serializedTextNodeWithId = Extract<
serializedNodeWithId,
Record<'type', NodeType.Text>
>;

export type tagMap = {
[key: string]: string;
};
Expand Down
Loading

0 comments on commit 5fbb904

Please sign in to comment.