Fix a variety of issues with HTML → Markdown conversion (#8004)

* Fix a variety of issues with HTML → Markdown conversion

Signed-off-by: Robin Townsend <robin@robin.town>

* Fix lint

Signed-off-by: Robin Townsend <robin@robin.town>

* Fix @room pill formatting not being applied to link text

Signed-off-by: Robin Townsend <robin@robin.town>
This commit is contained in:
Robin 2022-03-09 07:43:05 -05:00 committed by GitHub
parent 65691202f7
commit c10ac9e4a0
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 422 additions and 260 deletions

View File

@ -17,190 +17,110 @@ limitations under the License.
import { MatrixEvent } from "matrix-js-sdk/src/models/event";
import { walkDOMDepthFirst } from "./dom";
import { checkBlockNode } from "../HtmlUtils";
import { getPrimaryPermalinkEntity } from "../utils/permalinks/Permalinks";
import { Part, PartCreator, Type } from "./parts";
import SdkConfig from "../SdkConfig";
import { textToHtmlRainbow } from "../utils/colour";
function parseAtRoomMentions(text: string, partCreator: PartCreator): Part[] {
const LIST_TYPES = ["UL", "OL", "LI"];
// Escapes all markup in the given text
function escape(text: string): string {
return text.replace(/[\\*_[\]`<]|^>/g, match => `\\${match}`);
}
// Finds the length of the longest backtick sequence in the given text, used for
// escaping backticks in code blocks
function longestBacktickSequence(text: string): number {
let length = 0;
let currentLength = 0;
for (const c of text) {
if (c === "`") {
currentLength++;
} else {
length = Math.max(length, currentLength);
currentLength = 0;
}
}
return Math.max(length, currentLength);
}
function isListChild(n: Node): boolean {
return LIST_TYPES.includes(n.parentNode?.nodeName);
}
function parseAtRoomMentions(text: string, pc: PartCreator): Part[] {
const ATROOM = "@room";
const parts: Part[] = [];
text.split(ATROOM).forEach((textPart, i, arr) => {
if (textPart.length) {
parts.push(...partCreator.plainWithEmoji(textPart));
parts.push(...pc.plainWithEmoji(escape(textPart)));
}
// it's safe to never append @room after the last textPart
// as split will report an empty string at the end if
// `text` ended in @room.
const isLast = i === arr.length - 1;
if (!isLast) {
parts.push(partCreator.atRoomPill(ATROOM));
parts.push(pc.atRoomPill(ATROOM));
}
});
return parts;
}
function parseLink(a: HTMLAnchorElement, partCreator: PartCreator): Part[] {
const { href } = a;
function parseLink(n: Node, pc: PartCreator): Part[] {
const { href } = n as HTMLAnchorElement;
const resourceId = getPrimaryPermalinkEntity(href); // The room/user ID
const prefix = resourceId ? resourceId[0] : undefined; // First character of ID
switch (prefix) {
case "@":
return [partCreator.userPill(a.textContent, resourceId)];
case "#":
return [partCreator.roomPill(resourceId)];
default: {
if (href === a.textContent) {
return partCreator.plainWithEmoji(a.textContent);
} else {
return partCreator.plainWithEmoji(`[${a.textContent.replace(/[[\\\]]/g, c => "\\" + c)}](${href})`);
}
}
switch (resourceId?.[0]) {
case "@": return [pc.userPill(n.textContent, resourceId)];
case "#": return [pc.roomPill(resourceId)];
}
const children = Array.from(n.childNodes);
if (href === n.textContent && children.every(c => c.nodeType === Node.TEXT_NODE)) {
return parseAtRoomMentions(n.textContent, pc);
} else {
return [pc.plain("["), ...parseChildren(n, pc), pc.plain(`](${href})`)];
}
}
function parseImage(img: HTMLImageElement, partCreator: PartCreator): Part[] {
const { src } = img;
return partCreator.plainWithEmoji(`![${img.alt.replace(/[[\\\]]/g, c => "\\" + c)}](${src})`);
function parseImage(n: Node, pc: PartCreator): Part[] {
const { alt, src } = n as HTMLImageElement;
return pc.plainWithEmoji(`![${escape(alt)}](${src})`);
}
function parseCodeBlock(n: HTMLElement, partCreator: PartCreator): Part[] {
const parts: Part[] = [];
function parseCodeBlock(n: Node, pc: PartCreator): Part[] {
let language = "";
if (n.firstChild && n.firstChild.nodeName === "CODE") {
for (const className of (<HTMLElement>n.firstChild).classList) {
if (n.firstChild?.nodeName === "CODE") {
for (const className of (n.firstChild as HTMLElement).classList) {
if (className.startsWith("language-") && !className.startsWith("language-_")) {
language = className.substr("language-".length);
break;
}
}
}
const preLines = ("```" + language + "\n" + n.textContent + "```").split("\n");
preLines.forEach((l, i) => {
parts.push(...partCreator.plainWithEmoji(l));
if (i < preLines.length - 1) {
parts.push(partCreator.newline());
}
const text = n.textContent.replace(/\n$/, "");
// Escape backticks by using even more backticks for the fence if necessary
const fence = "`".repeat(Math.max(3, longestBacktickSequence(text) + 1));
const parts: Part[] = [...pc.plainWithEmoji(fence + language), pc.newline()];
text.split("\n").forEach(line => {
parts.push(...pc.plainWithEmoji(line));
parts.push(pc.newline());
});
parts.push(pc.plain(fence));
return parts;
}
function parseHeader(el: HTMLElement, partCreator: PartCreator): Part {
const depth = parseInt(el.nodeName.substr(1), 10);
return partCreator.plain("#".repeat(depth) + " ");
}
interface IState {
listIndex: number[];
listDepth?: number;
}
function parseElement(
n: HTMLElement,
partCreator: PartCreator,
lastNode: Node | undefined,
state: IState,
): Part | Part[] {
switch (n.nodeName) {
case "H1":
case "H2":
case "H3":
case "H4":
case "H5":
case "H6":
return parseHeader(n, partCreator);
case "A":
return parseLink(<HTMLAnchorElement>n, partCreator);
case "IMG":
return parseImage(<HTMLImageElement>n, partCreator);
case "BR":
return partCreator.newline();
case "HR":
// the newline arrangement here is quite specific otherwise it may be misconstrued as marking the previous
// text line as a header instead of acting as a horizontal rule.
return [
partCreator.newline(),
partCreator.plain("---"),
partCreator.newline(),
];
case "EM":
return partCreator.plainWithEmoji(`_${n.textContent}_`);
case "STRONG":
return partCreator.plainWithEmoji(`**${n.textContent}**`);
case "PRE":
return parseCodeBlock(n, partCreator);
case "CODE":
return partCreator.plainWithEmoji(`\`${n.textContent}\``);
case "DEL":
return partCreator.plainWithEmoji(`<del>${n.textContent}</del>`);
case "SUB":
return partCreator.plainWithEmoji(`<sub>${n.textContent}</sub>`);
case "SUP":
return partCreator.plainWithEmoji(`<sup>${n.textContent}</sup>`);
case "U":
return partCreator.plainWithEmoji(`<u>${n.textContent}</u>`);
case "LI": {
const BASE_INDENT = 4;
const depth = state.listDepth - 1;
const indent = " ".repeat(BASE_INDENT * depth);
if (n.parentElement.nodeName === "OL") {
// The markdown parser doesn't do nested indexed lists at all, but this supports it anyway.
const index = state.listIndex[state.listIndex.length - 1];
state.listIndex[state.listIndex.length - 1] += 1;
return partCreator.plain(`${indent}${index}. `);
} else {
return partCreator.plain(`${indent}- `);
}
}
case "P": {
if (lastNode) {
return partCreator.newline();
}
break;
}
case "DIV":
case "SPAN": {
// math nodes are translated back into delimited latex strings
if (n.hasAttribute("data-mx-maths")) {
const delimLeft = (n.nodeName == "SPAN") ?
((SdkConfig.get()['latex_maths_delims'] || {})['inline'] || {})['left'] || "\\(" :
((SdkConfig.get()['latex_maths_delims'] || {})['display'] || {})['left'] || "\\[";
const delimRight = (n.nodeName == "SPAN") ?
((SdkConfig.get()['latex_maths_delims'] || {})['inline'] || {})['right'] || "\\)" :
((SdkConfig.get()['latex_maths_delims'] || {})['display'] || {})['right'] || "\\]";
const tex = n.getAttribute("data-mx-maths");
return partCreator.plainWithEmoji(delimLeft + tex + delimRight);
} else if (!checkDescendInto(n)) {
return partCreator.plainWithEmoji(n.textContent);
}
break;
}
case "OL":
state.listIndex.push((<HTMLOListElement>n).start || 1);
/* falls through */
case "UL":
state.listDepth = (state.listDepth || 0) + 1;
/* falls through */
default:
// don't textify block nodes we'll descend into
if (!checkDescendInto(n)) {
return partCreator.plainWithEmoji(n.textContent);
}
}
}
function checkDescendInto(node) {
switch (node.nodeName) {
case "PRE":
// a code block is textified in parseCodeBlock
// as we don't want to preserve markup in it,
// so no need to descend into it
return false;
default:
return checkBlockNode(node);
}
function parseHeader(n: Node, pc: PartCreator): Part[] {
const depth = parseInt(n.nodeName.substr(1), 10);
const prefix = pc.plain("#".repeat(depth) + " ");
return [prefix, ...parseChildren(n, pc)];
}
function checkIgnored(n) {
@ -214,144 +134,169 @@ function checkIgnored(n) {
return true;
}
const QUOTE_LINE_PREFIX = "> ";
function prefixQuoteLines(isFirstNode, parts, partCreator) {
// a newline (to append a > to) wouldn't be added to parts for the first line
// if there was no content before the BLOCKQUOTE, so handle that
if (isFirstNode) {
parts.splice(0, 0, partCreator.plain(QUOTE_LINE_PREFIX));
}
for (let i = 0; i < parts.length; i += 1) {
function prefixLines(parts: Part[], prefix: string, pc: PartCreator) {
parts.unshift(pc.plain(prefix));
for (let i = 0; i < parts.length; i++) {
if (parts[i].type === Type.Newline) {
parts.splice(i + 1, 0, partCreator.plain(QUOTE_LINE_PREFIX));
parts.splice(i + 1, 0, pc.plain(prefix));
i += 1;
}
}
}
function parseHtmlMessage(html: string, partCreator: PartCreator, isQuotedMessage: boolean): Part[] {
function parseChildren(n: Node, pc: PartCreator, mkListItem?: (li: Node) => Part[]): Part[] {
let prev;
return Array.from(n.childNodes).flatMap(c => {
const parsed = parseNode(c, pc, mkListItem);
if (parsed.length && prev && (checkBlockNode(prev) || checkBlockNode(c))) {
if (isListChild(c)) {
// Use tighter spacing within lists
parsed.unshift(pc.newline());
} else {
parsed.unshift(pc.newline(), pc.newline());
}
}
if (parsed.length) prev = c;
return parsed;
});
}
function parseNode(n: Node, pc: PartCreator, mkListItem?: (li: Node) => Part[]): Part[] {
if (checkIgnored(n)) return [];
switch (n.nodeType) {
case Node.TEXT_NODE:
return parseAtRoomMentions(n.nodeValue, pc);
case Node.ELEMENT_NODE:
switch (n.nodeName) {
case "H1":
case "H2":
case "H3":
case "H4":
case "H5":
case "H6":
return parseHeader(n, pc);
case "A":
return parseLink(n, pc);
case "IMG":
return parseImage(n, pc);
case "BR":
return [pc.newline()];
case "HR":
return [pc.plain("---")];
case "EM":
return [pc.plain("_"), ...parseChildren(n, pc), pc.plain("_")];
case "STRONG":
return [pc.plain("**"), ...parseChildren(n, pc), pc.plain("**")];
case "DEL":
return [pc.plain("<del>"), ...parseChildren(n, pc), pc.plain("</del>")];
case "SUB":
return [pc.plain("<sub>"), ...parseChildren(n, pc), pc.plain("</sub>")];
case "SUP":
return [pc.plain("<sup>"), ...parseChildren(n, pc), pc.plain("</sup>")];
case "U":
return [pc.plain("<u>"), ...parseChildren(n, pc), pc.plain("</u>")];
case "PRE":
return parseCodeBlock(n, pc);
case "CODE": {
// Escape backticks by using multiple backticks for the fence if necessary
const fence = "`".repeat(longestBacktickSequence(n.textContent) + 1);
return pc.plainWithEmoji(`${fence}${n.textContent}${fence}`);
}
case "BLOCKQUOTE": {
const parts = parseChildren(n, pc);
prefixLines(parts, "> ", pc);
return parts;
}
case "LI":
return mkListItem?.(n) ?? parseChildren(n, pc);
case "UL": {
const parts = parseChildren(n, pc, li => [pc.plain("- "), ...parseChildren(li, pc)]);
if (isListChild(n)) {
prefixLines(parts, " ", pc);
}
return parts;
}
case "OL": {
let counter = 1;
const parts = parseChildren(n, pc, li => {
const parts = [pc.plain(`${counter}. `), ...parseChildren(li, pc)];
counter++;
return parts;
});
if (isListChild(n)) {
prefixLines(parts, " ", pc);
}
return parts;
}
case "DIV":
case "SPAN":
// Math nodes are translated back into delimited latex strings
if ((n as Element).hasAttribute("data-mx-maths")) {
const delims = SdkConfig.get().latex_maths_delims;
const delimLeft = (n.nodeName === "SPAN") ?
delims?.inline?.left ?? "\\(" :
delims?.display?.left ?? "\\[";
const delimRight = (n.nodeName === "SPAN") ?
delims?.inline?.right ?? "\\)" :
delims?.display?.right ?? "\\]";
const tex = (n as Element).getAttribute("data-mx-maths");
return pc.plainWithEmoji(`${delimLeft}${tex}${delimRight}`);
}
}
}
return parseChildren(n, pc);
}
function parseHtmlMessage(html: string, pc: PartCreator, isQuotedMessage: boolean): Part[] {
// no nodes from parsing here should be inserted in the document,
// as scripts in event handlers, etc would be executed then.
// we're only taking text, so that is fine
const rootNode = new DOMParser().parseFromString(html, "text/html").body;
const parts: Part[] = [];
let lastNode: Node;
let inQuote = isQuotedMessage;
const state: IState = {
listIndex: [],
};
function onNodeEnter(n: Node) {
if (checkIgnored(n)) {
return false;
}
if (n.nodeName === "BLOCKQUOTE") {
inQuote = true;
}
const newParts: Part[] = [];
if (lastNode && (checkBlockNode(lastNode) || checkBlockNode(n))) {
newParts.push(partCreator.newline());
}
if (n.nodeType === Node.TEXT_NODE) {
let { nodeValue } = n;
// Sometimes commonmark adds a newline at the end of the list item text
if (n.parentNode.nodeName === "LI") {
nodeValue = nodeValue.trimEnd();
}
newParts.push(...parseAtRoomMentions(nodeValue, partCreator));
const grandParent = n.parentNode.parentNode;
const isTight = n.parentNode.nodeName !== "P" || grandParent?.nodeName !== "LI";
if (!isTight) {
newParts.push(partCreator.newline());
}
} else if (n.nodeType === Node.ELEMENT_NODE) {
const parseResult = parseElement(n as HTMLElement, partCreator, lastNode, state);
if (parseResult) {
if (Array.isArray(parseResult)) {
newParts.push(...parseResult);
} else {
newParts.push(parseResult);
}
}
}
if (newParts.length && inQuote) {
const isFirstPart = parts.length === 0;
prefixQuoteLines(isFirstPart, newParts, partCreator);
}
parts.push(...newParts);
const descend = checkDescendInto(n);
// when not descending (like for PRE), onNodeLeave won't be called to set lastNode
// so do that here.
lastNode = descend ? null : n;
return descend;
const parts = parseNode(new DOMParser().parseFromString(html, "text/html").body, pc);
if (isQuotedMessage) {
prefixLines(parts, "> ", pc);
}
function onNodeLeave(n: Node) {
if (checkIgnored(n)) {
return;
}
switch (n.nodeName) {
case "BLOCKQUOTE":
inQuote = false;
break;
case "OL":
state.listIndex.pop();
/* falls through */
case "UL":
state.listDepth -= 1;
break;
}
lastNode = n;
}
walkDOMDepthFirst(rootNode, onNodeEnter, onNodeLeave);
return parts;
}
export function parsePlainTextMessage(body: string, partCreator: PartCreator, isQuotedMessage?: boolean): Part[] {
export function parsePlainTextMessage(body: string, pc: PartCreator, isQuotedMessage?: boolean): Part[] {
const lines = body.split(/\r\n|\r|\n/g); // split on any new-line combination not just \n, collapses \r\n
return lines.reduce((parts, line, i) => {
if (isQuotedMessage) {
parts.push(partCreator.plain(QUOTE_LINE_PREFIX));
parts.push(pc.plain("> "));
}
parts.push(...parseAtRoomMentions(line, partCreator));
parts.push(...parseAtRoomMentions(line, pc));
const isLast = i === lines.length - 1;
if (!isLast) {
parts.push(partCreator.newline());
parts.push(pc.newline());
}
return parts;
}, [] as Part[]);
}
export function parseEvent(event: MatrixEvent, partCreator: PartCreator, { isQuotedMessage = false } = {}) {
export function parseEvent(event: MatrixEvent, pc: PartCreator, { isQuotedMessage = false } = {}) {
const content = event.getContent();
let parts: Part[];
const isEmote = content.msgtype === "m.emote";
let isRainbow = false;
if (content.format === "org.matrix.custom.html") {
parts = parseHtmlMessage(content.formatted_body || "", partCreator, isQuotedMessage);
parts = parseHtmlMessage(content.formatted_body || "", pc, isQuotedMessage);
if (content.body && content.formatted_body && textToHtmlRainbow(content.body) === content.formatted_body) {
isRainbow = true;
}
} else {
parts = parsePlainTextMessage(content.body || "", partCreator, isQuotedMessage);
parts = parsePlainTextMessage(content.body || "", pc, isQuotedMessage);
}
if (isEmote && isRainbow) {
parts.unshift(partCreator.plain("/rainbowme "));
parts.unshift(pc.plain("/rainbowme "));
} else if (isRainbow) {
parts.unshift(partCreator.plain("/rainbow "));
parts.unshift(pc.plain("/rainbow "));
} else if (isEmote) {
parts.unshift(partCreator.plain("/me "));
parts.unshift(pc.plain("/me "));
}
return parts;

View File

@ -0,0 +1,178 @@
// Jest Snapshot v1, https://goo.gl/fbAQLP
exports[`editor/deserialize html messages escapes angle brackets 1`] = `
Array [
Object {
"text": "\\\\> \\\\\\\\<del>no formatting here\\\\\\\\</del>",
"type": "plain",
},
]
`;
exports[`editor/deserialize html messages escapes asterisks 1`] = `
Array [
Object {
"text": "\\\\*hello\\\\*",
"type": "plain",
},
]
`;
exports[`editor/deserialize html messages escapes backslashes 1`] = `
Array [
Object {
"text": "C:\\\\\\\\My Documents",
"type": "plain",
},
]
`;
exports[`editor/deserialize html messages escapes backticks in code blocks 1`] = `
Array [
Object {
"text": "\`\`this → \` is a backtick\`\`",
"type": "plain",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "\`\`\`\`",
"type": "plain",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "and here are 3 of them:",
"type": "plain",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "\`\`\`",
"type": "plain",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "\`\`\`\`",
"type": "plain",
},
]
`;
exports[`editor/deserialize html messages escapes backticks outside of code blocks 1`] = `
Array [
Object {
"text": "some \\\\\`backticks\\\\\`",
"type": "plain",
},
]
`;
exports[`editor/deserialize html messages escapes square brackets 1`] = `
Array [
Object {
"text": "\\\\[not an actual link\\\\](https://example.org)",
"type": "plain",
},
]
`;
exports[`editor/deserialize html messages escapes underscores 1`] = `
Array [
Object {
"text": "\\\\_\\\\_emphasis\\\\_\\\\_",
"type": "plain",
},
]
`;
exports[`editor/deserialize html messages preserves nested formatting 1`] = `
Array [
Object {
"text": "a<sub>b_c**d<u>e</u>**_</sub>",
"type": "plain",
},
]
`;
exports[`editor/deserialize html messages preserves nested quotes 1`] = `
Array [
Object {
"text": "> foo",
"type": "plain",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "> ",
"type": "plain",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "> > bar",
"type": "plain",
},
]
`;
exports[`editor/deserialize html messages surrounds lists with newlines 1`] = `
Array [
Object {
"text": "foo",
"type": "plain",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "- bar",
"type": "plain",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "
",
"type": "newline",
},
Object {
"text": "baz",
"type": "plain",
},
]
`;

View File

@ -237,18 +237,6 @@ describe('editor/deserialize', function() {
expect(parts[3]).toStrictEqual({ type: "newline", text: "\n" });
expect(parts[4]).toStrictEqual({ type: "plain", text: "3. Finish" });
});
it('non tight lists', () => {
const html = "<ol><li><p>Start</p></li><li><p>Continue</p></li><li><p>Finish</p></li></ol>";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts.length).toBe(8);
expect(parts[0]).toStrictEqual({ type: "plain", text: "1. Start" });
expect(parts[1]).toStrictEqual({ type: "newline", text: "\n" });
expect(parts[2]).toStrictEqual({ type: "newline", text: "\n" });
expect(parts[3]).toStrictEqual({ type: "plain", text: "2. Continue" });
expect(parts[4]).toStrictEqual({ type: "newline", text: "\n" });
expect(parts[5]).toStrictEqual({ type: "newline", text: "\n" });
expect(parts[6]).toStrictEqual({ type: "plain", text: "3. Finish" });
});
it('nested unordered lists', () => {
const html = "<ul><li>Oak<ul><li>Spruce<ul><li>Birch</li></ul></li></ul></li></ul>";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
@ -269,13 +257,13 @@ describe('editor/deserialize', function() {
expect(parts[3]).toStrictEqual({ type: "newline", text: "\n" });
expect(parts[4]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES.repeat(2)}1. Birch` });
});
it('nested tight lists', () => {
it('nested lists', () => {
const html = "<ol><li>Oak\n<ol><li>Spruce\n<ol><li>Birch</li></ol></li></ol></li></ol>";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts.length).toBe(5);
expect(parts[0]).toStrictEqual({ type: "plain", text: "1. Oak" });
expect(parts[0]).toStrictEqual({ type: "plain", text: "1. Oak\n" });
expect(parts[1]).toStrictEqual({ type: "newline", text: "\n" });
expect(parts[2]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES}1. Spruce` });
expect(parts[2]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES}1. Spruce\n` });
expect(parts[3]).toStrictEqual({ type: "newline", text: "\n" });
expect(parts[4]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES.repeat(2)}1. Birch` });
});
@ -291,5 +279,56 @@ describe('editor/deserialize', function() {
expect(parts.length).toBe(1);
expect(parts[0]).toStrictEqual({ type: "plain", text: "/me says _DON'T SHOUT_!" });
});
it('preserves nested quotes', () => {
const html = "<blockquote>foo<blockquote>bar</blockquote></blockquote>";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
it('surrounds lists with newlines', () => {
const html = "foo<ul><li>bar</li></ul>baz";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
it('preserves nested formatting', () => {
const html = "a<sub>b<em>c<strong>d<u>e</u></strong></em></sub>";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
it('escapes backticks in code blocks', () => {
const html = "<p><code>this → ` is a backtick</code></p>" +
"<pre><code>and here are 3 of them:\n```</code></pre>";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
it('escapes backticks outside of code blocks', () => {
const html = "some `backticks`";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
it('escapes backslashes', () => {
const html = "C:\\My Documents";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
it('escapes asterisks', () => {
const html = "*hello*";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
it('escapes underscores', () => {
const html = "__emphasis__";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
it('escapes square brackets', () => {
const html = "[not an actual link](https://example.org)";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
it('escapes angle brackets', () => {
const html = "> \\<del>no formatting here\\</del>";
const parts = normalize(parseEvent(htmlMessage(html), createPartCreator()));
expect(parts).toMatchSnapshot();
});
});
});