From c10ac9e4a06bbe50628dcc8a645ee699b630030d Mon Sep 17 00:00:00 2001 From: Robin Date: Wed, 9 Mar 2022 07:43:05 -0500 Subject: [PATCH] =?UTF-8?q?Fix=20a=20variety=20of=20issues=20with=20HTML?= =?UTF-8?q?=20=E2=86=92=20Markdown=20conversion=20(#8004)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Fix a variety of issues with HTML → Markdown conversion Signed-off-by: Robin Townsend * Fix lint Signed-off-by: Robin Townsend * Fix @room pill formatting not being applied to link text Signed-off-by: Robin Townsend --- src/editor/deserialize.ts | 435 ++++++++---------- .../__snapshots__/deserialize-test.js.snap | 178 +++++++ test/editor/deserialize-test.js | 69 ++- 3 files changed, 422 insertions(+), 260 deletions(-) create mode 100644 test/editor/__snapshots__/deserialize-test.js.snap diff --git a/src/editor/deserialize.ts b/src/editor/deserialize.ts index f016a1f61c..f0fff51db2 100644 --- a/src/editor/deserialize.ts +++ b/src/editor/deserialize.ts @@ -17,190 +17,110 @@ limitations under the License. import { MatrixEvent } from "matrix-js-sdk/src/models/event"; -import { walkDOMDepthFirst } from "./dom"; import { checkBlockNode } from "../HtmlUtils"; import { getPrimaryPermalinkEntity } from "../utils/permalinks/Permalinks"; import { Part, PartCreator, Type } from "./parts"; import SdkConfig from "../SdkConfig"; import { textToHtmlRainbow } from "../utils/colour"; -function parseAtRoomMentions(text: string, partCreator: PartCreator): Part[] { +const LIST_TYPES = ["UL", "OL", "LI"]; + +// Escapes all markup in the given text +function escape(text: string): string { + return text.replace(/[\\*_[\]`<]|^>/g, match => `\\${match}`); +} + +// Finds the length of the longest backtick sequence in the given text, used for +// escaping backticks in code blocks +function longestBacktickSequence(text: string): number { + let length = 0; + let currentLength = 0; + + for (const c of text) { + if (c === "`") { + currentLength++; + } else { + length = Math.max(length, currentLength); + currentLength = 0; + } + } + + return Math.max(length, currentLength); +} + +function isListChild(n: Node): boolean { + return LIST_TYPES.includes(n.parentNode?.nodeName); +} + +function parseAtRoomMentions(text: string, pc: PartCreator): Part[] { const ATROOM = "@room"; const parts: Part[] = []; text.split(ATROOM).forEach((textPart, i, arr) => { if (textPart.length) { - parts.push(...partCreator.plainWithEmoji(textPart)); + parts.push(...pc.plainWithEmoji(escape(textPart))); } // it's safe to never append @room after the last textPart // as split will report an empty string at the end if // `text` ended in @room. const isLast = i === arr.length - 1; if (!isLast) { - parts.push(partCreator.atRoomPill(ATROOM)); + parts.push(pc.atRoomPill(ATROOM)); } }); return parts; } -function parseLink(a: HTMLAnchorElement, partCreator: PartCreator): Part[] { - const { href } = a; +function parseLink(n: Node, pc: PartCreator): Part[] { + const { href } = n as HTMLAnchorElement; const resourceId = getPrimaryPermalinkEntity(href); // The room/user ID - const prefix = resourceId ? resourceId[0] : undefined; // First character of ID - switch (prefix) { - case "@": - return [partCreator.userPill(a.textContent, resourceId)]; - case "#": - return [partCreator.roomPill(resourceId)]; - default: { - if (href === a.textContent) { - return partCreator.plainWithEmoji(a.textContent); - } else { - return partCreator.plainWithEmoji(`[${a.textContent.replace(/[[\\\]]/g, c => "\\" + c)}](${href})`); - } - } + + switch (resourceId?.[0]) { + case "@": return [pc.userPill(n.textContent, resourceId)]; + case "#": return [pc.roomPill(resourceId)]; + } + + const children = Array.from(n.childNodes); + if (href === n.textContent && children.every(c => c.nodeType === Node.TEXT_NODE)) { + return parseAtRoomMentions(n.textContent, pc); + } else { + return [pc.plain("["), ...parseChildren(n, pc), pc.plain(`](${href})`)]; } } -function parseImage(img: HTMLImageElement, partCreator: PartCreator): Part[] { - const { src } = img; - return partCreator.plainWithEmoji(`![${img.alt.replace(/[[\\\]]/g, c => "\\" + c)}](${src})`); +function parseImage(n: Node, pc: PartCreator): Part[] { + const { alt, src } = n as HTMLImageElement; + return pc.plainWithEmoji(`![${escape(alt)}](${src})`); } -function parseCodeBlock(n: HTMLElement, partCreator: PartCreator): Part[] { - const parts: Part[] = []; +function parseCodeBlock(n: Node, pc: PartCreator): Part[] { let language = ""; - if (n.firstChild && n.firstChild.nodeName === "CODE") { - for (const className of (n.firstChild).classList) { + if (n.firstChild?.nodeName === "CODE") { + for (const className of (n.firstChild as HTMLElement).classList) { if (className.startsWith("language-") && !className.startsWith("language-_")) { language = className.substr("language-".length); break; } } } - const preLines = ("```" + language + "\n" + n.textContent + "```").split("\n"); - preLines.forEach((l, i) => { - parts.push(...partCreator.plainWithEmoji(l)); - if (i < preLines.length - 1) { - parts.push(partCreator.newline()); - } + + const text = n.textContent.replace(/\n$/, ""); + // Escape backticks by using even more backticks for the fence if necessary + const fence = "`".repeat(Math.max(3, longestBacktickSequence(text) + 1)); + const parts: Part[] = [...pc.plainWithEmoji(fence + language), pc.newline()]; + + text.split("\n").forEach(line => { + parts.push(...pc.plainWithEmoji(line)); + parts.push(pc.newline()); }); + + parts.push(pc.plain(fence)); return parts; } -function parseHeader(el: HTMLElement, partCreator: PartCreator): Part { - const depth = parseInt(el.nodeName.substr(1), 10); - return partCreator.plain("#".repeat(depth) + " "); -} - -interface IState { - listIndex: number[]; - listDepth?: number; -} - -function parseElement( - n: HTMLElement, - partCreator: PartCreator, - lastNode: Node | undefined, - state: IState, -): Part | Part[] { - switch (n.nodeName) { - case "H1": - case "H2": - case "H3": - case "H4": - case "H5": - case "H6": - return parseHeader(n, partCreator); - case "A": - return parseLink(n, partCreator); - case "IMG": - return parseImage(n, partCreator); - case "BR": - return partCreator.newline(); - case "HR": - // the newline arrangement here is quite specific otherwise it may be misconstrued as marking the previous - // text line as a header instead of acting as a horizontal rule. - return [ - partCreator.newline(), - partCreator.plain("---"), - partCreator.newline(), - ]; - case "EM": - return partCreator.plainWithEmoji(`_${n.textContent}_`); - case "STRONG": - return partCreator.plainWithEmoji(`**${n.textContent}**`); - case "PRE": - return parseCodeBlock(n, partCreator); - case "CODE": - return partCreator.plainWithEmoji(`\`${n.textContent}\``); - case "DEL": - return partCreator.plainWithEmoji(`${n.textContent}`); - case "SUB": - return partCreator.plainWithEmoji(`${n.textContent}`); - case "SUP": - return partCreator.plainWithEmoji(`${n.textContent}`); - case "U": - return partCreator.plainWithEmoji(`${n.textContent}`); - case "LI": { - const BASE_INDENT = 4; - const depth = state.listDepth - 1; - const indent = " ".repeat(BASE_INDENT * depth); - if (n.parentElement.nodeName === "OL") { - // The markdown parser doesn't do nested indexed lists at all, but this supports it anyway. - const index = state.listIndex[state.listIndex.length - 1]; - state.listIndex[state.listIndex.length - 1] += 1; - return partCreator.plain(`${indent}${index}. `); - } else { - return partCreator.plain(`${indent}- `); - } - } - case "P": { - if (lastNode) { - return partCreator.newline(); - } - break; - } - case "DIV": - case "SPAN": { - // math nodes are translated back into delimited latex strings - if (n.hasAttribute("data-mx-maths")) { - const delimLeft = (n.nodeName == "SPAN") ? - ((SdkConfig.get()['latex_maths_delims'] || {})['inline'] || {})['left'] || "\\(" : - ((SdkConfig.get()['latex_maths_delims'] || {})['display'] || {})['left'] || "\\["; - const delimRight = (n.nodeName == "SPAN") ? - ((SdkConfig.get()['latex_maths_delims'] || {})['inline'] || {})['right'] || "\\)" : - ((SdkConfig.get()['latex_maths_delims'] || {})['display'] || {})['right'] || "\\]"; - const tex = n.getAttribute("data-mx-maths"); - return partCreator.plainWithEmoji(delimLeft + tex + delimRight); - } else if (!checkDescendInto(n)) { - return partCreator.plainWithEmoji(n.textContent); - } - break; - } - case "OL": - state.listIndex.push((n).start || 1); - /* falls through */ - case "UL": - state.listDepth = (state.listDepth || 0) + 1; - /* falls through */ - default: - // don't textify block nodes we'll descend into - if (!checkDescendInto(n)) { - return partCreator.plainWithEmoji(n.textContent); - } - } -} - -function checkDescendInto(node) { - switch (node.nodeName) { - case "PRE": - // a code block is textified in parseCodeBlock - // as we don't want to preserve markup in it, - // so no need to descend into it - return false; - default: - return checkBlockNode(node); - } +function parseHeader(n: Node, pc: PartCreator): Part[] { + const depth = parseInt(n.nodeName.substr(1), 10); + const prefix = pc.plain("#".repeat(depth) + " "); + return [prefix, ...parseChildren(n, pc)]; } function checkIgnored(n) { @@ -214,144 +134,169 @@ function checkIgnored(n) { return true; } -const QUOTE_LINE_PREFIX = "> "; -function prefixQuoteLines(isFirstNode, parts, partCreator) { - // a newline (to append a > to) wouldn't be added to parts for the first line - // if there was no content before the BLOCKQUOTE, so handle that - if (isFirstNode) { - parts.splice(0, 0, partCreator.plain(QUOTE_LINE_PREFIX)); - } - for (let i = 0; i < parts.length; i += 1) { +function prefixLines(parts: Part[], prefix: string, pc: PartCreator) { + parts.unshift(pc.plain(prefix)); + for (let i = 0; i < parts.length; i++) { if (parts[i].type === Type.Newline) { - parts.splice(i + 1, 0, partCreator.plain(QUOTE_LINE_PREFIX)); + parts.splice(i + 1, 0, pc.plain(prefix)); i += 1; } } } -function parseHtmlMessage(html: string, partCreator: PartCreator, isQuotedMessage: boolean): Part[] { +function parseChildren(n: Node, pc: PartCreator, mkListItem?: (li: Node) => Part[]): Part[] { + let prev; + return Array.from(n.childNodes).flatMap(c => { + const parsed = parseNode(c, pc, mkListItem); + if (parsed.length && prev && (checkBlockNode(prev) || checkBlockNode(c))) { + if (isListChild(c)) { + // Use tighter spacing within lists + parsed.unshift(pc.newline()); + } else { + parsed.unshift(pc.newline(), pc.newline()); + } + } + if (parsed.length) prev = c; + return parsed; + }); +} + +function parseNode(n: Node, pc: PartCreator, mkListItem?: (li: Node) => Part[]): Part[] { + if (checkIgnored(n)) return []; + + switch (n.nodeType) { + case Node.TEXT_NODE: + return parseAtRoomMentions(n.nodeValue, pc); + case Node.ELEMENT_NODE: + switch (n.nodeName) { + case "H1": + case "H2": + case "H3": + case "H4": + case "H5": + case "H6": + return parseHeader(n, pc); + case "A": + return parseLink(n, pc); + case "IMG": + return parseImage(n, pc); + case "BR": + return [pc.newline()]; + case "HR": + return [pc.plain("---")]; + case "EM": + return [pc.plain("_"), ...parseChildren(n, pc), pc.plain("_")]; + case "STRONG": + return [pc.plain("**"), ...parseChildren(n, pc), pc.plain("**")]; + case "DEL": + return [pc.plain(""), ...parseChildren(n, pc), pc.plain("")]; + case "SUB": + return [pc.plain(""), ...parseChildren(n, pc), pc.plain("")]; + case "SUP": + return [pc.plain(""), ...parseChildren(n, pc), pc.plain("")]; + case "U": + return [pc.plain(""), ...parseChildren(n, pc), pc.plain("")]; + case "PRE": + return parseCodeBlock(n, pc); + case "CODE": { + // Escape backticks by using multiple backticks for the fence if necessary + const fence = "`".repeat(longestBacktickSequence(n.textContent) + 1); + return pc.plainWithEmoji(`${fence}${n.textContent}${fence}`); + } + case "BLOCKQUOTE": { + const parts = parseChildren(n, pc); + prefixLines(parts, "> ", pc); + return parts; + } + case "LI": + return mkListItem?.(n) ?? parseChildren(n, pc); + case "UL": { + const parts = parseChildren(n, pc, li => [pc.plain("- "), ...parseChildren(li, pc)]); + if (isListChild(n)) { + prefixLines(parts, " ", pc); + } + return parts; + } + case "OL": { + let counter = 1; + const parts = parseChildren(n, pc, li => { + const parts = [pc.plain(`${counter}. `), ...parseChildren(li, pc)]; + counter++; + return parts; + }); + if (isListChild(n)) { + prefixLines(parts, " ", pc); + } + return parts; + } + case "DIV": + case "SPAN": + // Math nodes are translated back into delimited latex strings + if ((n as Element).hasAttribute("data-mx-maths")) { + const delims = SdkConfig.get().latex_maths_delims; + const delimLeft = (n.nodeName === "SPAN") ? + delims?.inline?.left ?? "\\(" : + delims?.display?.left ?? "\\["; + const delimRight = (n.nodeName === "SPAN") ? + delims?.inline?.right ?? "\\)" : + delims?.display?.right ?? "\\]"; + const tex = (n as Element).getAttribute("data-mx-maths"); + + return pc.plainWithEmoji(`${delimLeft}${tex}${delimRight}`); + } + } + } + + return parseChildren(n, pc); +} + +function parseHtmlMessage(html: string, pc: PartCreator, isQuotedMessage: boolean): Part[] { // no nodes from parsing here should be inserted in the document, // as scripts in event handlers, etc would be executed then. // we're only taking text, so that is fine - const rootNode = new DOMParser().parseFromString(html, "text/html").body; - const parts: Part[] = []; - let lastNode: Node; - let inQuote = isQuotedMessage; - const state: IState = { - listIndex: [], - }; - - function onNodeEnter(n: Node) { - if (checkIgnored(n)) { - return false; - } - if (n.nodeName === "BLOCKQUOTE") { - inQuote = true; - } - - const newParts: Part[] = []; - if (lastNode && (checkBlockNode(lastNode) || checkBlockNode(n))) { - newParts.push(partCreator.newline()); - } - - if (n.nodeType === Node.TEXT_NODE) { - let { nodeValue } = n; - - // Sometimes commonmark adds a newline at the end of the list item text - if (n.parentNode.nodeName === "LI") { - nodeValue = nodeValue.trimEnd(); - } - newParts.push(...parseAtRoomMentions(nodeValue, partCreator)); - - const grandParent = n.parentNode.parentNode; - const isTight = n.parentNode.nodeName !== "P" || grandParent?.nodeName !== "LI"; - if (!isTight) { - newParts.push(partCreator.newline()); - } - } else if (n.nodeType === Node.ELEMENT_NODE) { - const parseResult = parseElement(n as HTMLElement, partCreator, lastNode, state); - if (parseResult) { - if (Array.isArray(parseResult)) { - newParts.push(...parseResult); - } else { - newParts.push(parseResult); - } - } - } - - if (newParts.length && inQuote) { - const isFirstPart = parts.length === 0; - prefixQuoteLines(isFirstPart, newParts, partCreator); - } - - parts.push(...newParts); - - const descend = checkDescendInto(n); - // when not descending (like for PRE), onNodeLeave won't be called to set lastNode - // so do that here. - lastNode = descend ? null : n; - return descend; + const parts = parseNode(new DOMParser().parseFromString(html, "text/html").body, pc); + if (isQuotedMessage) { + prefixLines(parts, "> ", pc); } - - function onNodeLeave(n: Node) { - if (checkIgnored(n)) { - return; - } - switch (n.nodeName) { - case "BLOCKQUOTE": - inQuote = false; - break; - case "OL": - state.listIndex.pop(); - /* falls through */ - case "UL": - state.listDepth -= 1; - break; - } - lastNode = n; - } - - walkDOMDepthFirst(rootNode, onNodeEnter, onNodeLeave); - return parts; } -export function parsePlainTextMessage(body: string, partCreator: PartCreator, isQuotedMessage?: boolean): Part[] { +export function parsePlainTextMessage(body: string, pc: PartCreator, isQuotedMessage?: boolean): Part[] { const lines = body.split(/\r\n|\r|\n/g); // split on any new-line combination not just \n, collapses \r\n return lines.reduce((parts, line, i) => { if (isQuotedMessage) { - parts.push(partCreator.plain(QUOTE_LINE_PREFIX)); + parts.push(pc.plain("> ")); } - parts.push(...parseAtRoomMentions(line, partCreator)); + parts.push(...parseAtRoomMentions(line, pc)); const isLast = i === lines.length - 1; if (!isLast) { - parts.push(partCreator.newline()); + parts.push(pc.newline()); } return parts; }, [] as Part[]); } -export function parseEvent(event: MatrixEvent, partCreator: PartCreator, { isQuotedMessage = false } = {}) { +export function parseEvent(event: MatrixEvent, pc: PartCreator, { isQuotedMessage = false } = {}) { const content = event.getContent(); let parts: Part[]; const isEmote = content.msgtype === "m.emote"; let isRainbow = false; if (content.format === "org.matrix.custom.html") { - parts = parseHtmlMessage(content.formatted_body || "", partCreator, isQuotedMessage); + parts = parseHtmlMessage(content.formatted_body || "", pc, isQuotedMessage); if (content.body && content.formatted_body && textToHtmlRainbow(content.body) === content.formatted_body) { isRainbow = true; } } else { - parts = parsePlainTextMessage(content.body || "", partCreator, isQuotedMessage); + parts = parsePlainTextMessage(content.body || "", pc, isQuotedMessage); } if (isEmote && isRainbow) { - parts.unshift(partCreator.plain("/rainbowme ")); + parts.unshift(pc.plain("/rainbowme ")); } else if (isRainbow) { - parts.unshift(partCreator.plain("/rainbow ")); + parts.unshift(pc.plain("/rainbow ")); } else if (isEmote) { - parts.unshift(partCreator.plain("/me ")); + parts.unshift(pc.plain("/me ")); } return parts; diff --git a/test/editor/__snapshots__/deserialize-test.js.snap b/test/editor/__snapshots__/deserialize-test.js.snap new file mode 100644 index 0000000000..721ebc3211 --- /dev/null +++ b/test/editor/__snapshots__/deserialize-test.js.snap @@ -0,0 +1,178 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`editor/deserialize html messages escapes angle brackets 1`] = ` +Array [ + Object { + "text": "\\\\> \\\\\\\\no formatting here\\\\\\\\", + "type": "plain", + }, +] +`; + +exports[`editor/deserialize html messages escapes asterisks 1`] = ` +Array [ + Object { + "text": "\\\\*hello\\\\*", + "type": "plain", + }, +] +`; + +exports[`editor/deserialize html messages escapes backslashes 1`] = ` +Array [ + Object { + "text": "C:\\\\\\\\My Documents", + "type": "plain", + }, +] +`; + +exports[`editor/deserialize html messages escapes backticks in code blocks 1`] = ` +Array [ + Object { + "text": "\`\`this → \` is a backtick\`\`", + "type": "plain", + }, + Object { + "text": " +", + "type": "newline", + }, + Object { + "text": " +", + "type": "newline", + }, + Object { + "text": "\`\`\`\`", + "type": "plain", + }, + Object { + "text": " +", + "type": "newline", + }, + Object { + "text": "and here are 3 of them:", + "type": "plain", + }, + Object { + "text": " +", + "type": "newline", + }, + Object { + "text": "\`\`\`", + "type": "plain", + }, + Object { + "text": " +", + "type": "newline", + }, + Object { + "text": "\`\`\`\`", + "type": "plain", + }, +] +`; + +exports[`editor/deserialize html messages escapes backticks outside of code blocks 1`] = ` +Array [ + Object { + "text": "some \\\\\`backticks\\\\\`", + "type": "plain", + }, +] +`; + +exports[`editor/deserialize html messages escapes square brackets 1`] = ` +Array [ + Object { + "text": "\\\\[not an actual link\\\\](https://example.org)", + "type": "plain", + }, +] +`; + +exports[`editor/deserialize html messages escapes underscores 1`] = ` +Array [ + Object { + "text": "\\\\_\\\\_emphasis\\\\_\\\\_", + "type": "plain", + }, +] +`; + +exports[`editor/deserialize html messages preserves nested formatting 1`] = ` +Array [ + Object { + "text": "ab_c**de**_", + "type": "plain", + }, +] +`; + +exports[`editor/deserialize html messages preserves nested quotes 1`] = ` +Array [ + Object { + "text": "> foo", + "type": "plain", + }, + Object { + "text": " +", + "type": "newline", + }, + Object { + "text": "> ", + "type": "plain", + }, + Object { + "text": " +", + "type": "newline", + }, + Object { + "text": "> > bar", + "type": "plain", + }, +] +`; + +exports[`editor/deserialize html messages surrounds lists with newlines 1`] = ` +Array [ + Object { + "text": "foo", + "type": "plain", + }, + Object { + "text": " +", + "type": "newline", + }, + Object { + "text": " +", + "type": "newline", + }, + Object { + "text": "- bar", + "type": "plain", + }, + Object { + "text": " +", + "type": "newline", + }, + Object { + "text": " +", + "type": "newline", + }, + Object { + "text": "baz", + "type": "plain", + }, +] +`; diff --git a/test/editor/deserialize-test.js b/test/editor/deserialize-test.js index 5526ff3df4..050254401d 100644 --- a/test/editor/deserialize-test.js +++ b/test/editor/deserialize-test.js @@ -237,18 +237,6 @@ describe('editor/deserialize', function() { expect(parts[3]).toStrictEqual({ type: "newline", text: "\n" }); expect(parts[4]).toStrictEqual({ type: "plain", text: "3. Finish" }); }); - it('non tight lists', () => { - const html = "
  1. Start

  2. Continue

  3. Finish

"; - const parts = normalize(parseEvent(htmlMessage(html), createPartCreator())); - expect(parts.length).toBe(8); - expect(parts[0]).toStrictEqual({ type: "plain", text: "1. Start" }); - expect(parts[1]).toStrictEqual({ type: "newline", text: "\n" }); - expect(parts[2]).toStrictEqual({ type: "newline", text: "\n" }); - expect(parts[3]).toStrictEqual({ type: "plain", text: "2. Continue" }); - expect(parts[4]).toStrictEqual({ type: "newline", text: "\n" }); - expect(parts[5]).toStrictEqual({ type: "newline", text: "\n" }); - expect(parts[6]).toStrictEqual({ type: "plain", text: "3. Finish" }); - }); it('nested unordered lists', () => { const html = "
  • Oak
    • Spruce
      • Birch
"; const parts = normalize(parseEvent(htmlMessage(html), createPartCreator())); @@ -269,13 +257,13 @@ describe('editor/deserialize', function() { expect(parts[3]).toStrictEqual({ type: "newline", text: "\n" }); expect(parts[4]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES.repeat(2)}1. Birch` }); }); - it('nested tight lists', () => { + it('nested lists', () => { const html = "
  1. Oak\n
    1. Spruce\n
      1. Birch
"; const parts = normalize(parseEvent(htmlMessage(html), createPartCreator())); expect(parts.length).toBe(5); - expect(parts[0]).toStrictEqual({ type: "plain", text: "1. Oak" }); + expect(parts[0]).toStrictEqual({ type: "plain", text: "1. Oak\n" }); expect(parts[1]).toStrictEqual({ type: "newline", text: "\n" }); - expect(parts[2]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES}1. Spruce` }); + expect(parts[2]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES}1. Spruce\n` }); expect(parts[3]).toStrictEqual({ type: "newline", text: "\n" }); expect(parts[4]).toStrictEqual({ type: "plain", text: `${FOUR_SPACES.repeat(2)}1. Birch` }); }); @@ -291,5 +279,56 @@ describe('editor/deserialize', function() { expect(parts.length).toBe(1); expect(parts[0]).toStrictEqual({ type: "plain", text: "/me says _DON'T SHOUT_!" }); }); + it('preserves nested quotes', () => { + const html = "
foo
bar
"; + const parts = normalize(parseEvent(htmlMessage(html), createPartCreator())); + expect(parts).toMatchSnapshot(); + }); + it('surrounds lists with newlines', () => { + const html = "foo
  • bar
baz"; + const parts = normalize(parseEvent(htmlMessage(html), createPartCreator())); + expect(parts).toMatchSnapshot(); + }); + it('preserves nested formatting', () => { + const html = "abcde"; + const parts = normalize(parseEvent(htmlMessage(html), createPartCreator())); + expect(parts).toMatchSnapshot(); + }); + it('escapes backticks in code blocks', () => { + const html = "

this → ` is a backtick

" + + "
and here are 3 of them:\n```
"; + const parts = normalize(parseEvent(htmlMessage(html), createPartCreator())); + expect(parts).toMatchSnapshot(); + }); + it('escapes backticks outside of code blocks', () => { + const html = "some `backticks`"; + const parts = normalize(parseEvent(htmlMessage(html), createPartCreator())); + expect(parts).toMatchSnapshot(); + }); + it('escapes backslashes', () => { + const html = "C:\\My Documents"; + const parts = normalize(parseEvent(htmlMessage(html), createPartCreator())); + expect(parts).toMatchSnapshot(); + }); + it('escapes asterisks', () => { + const html = "*hello*"; + const parts = normalize(parseEvent(htmlMessage(html), createPartCreator())); + expect(parts).toMatchSnapshot(); + }); + it('escapes underscores', () => { + const html = "__emphasis__"; + const parts = normalize(parseEvent(htmlMessage(html), createPartCreator())); + expect(parts).toMatchSnapshot(); + }); + it('escapes square brackets', () => { + const html = "[not an actual link](https://example.org)"; + const parts = normalize(parseEvent(htmlMessage(html), createPartCreator())); + expect(parts).toMatchSnapshot(); + }); + it('escapes angle brackets', () => { + const html = "> \\no formatting here\\"; + const parts = normalize(parseEvent(htmlMessage(html), createPartCreator())); + expect(parts).toMatchSnapshot(); + }); }); });