Fix spurious html tags like <shrug>

* Only render HTML tags in markdown if they're del tags * Consider non-allowed HTML tags as plain text nodes, so a message of just '<shrug>' doesn't need to be sent as HTML * Consequently rewrite isPlaintext to just look at the parse tree rather than making and gutting a renderer to walk the tree (now we're using a library that actually produces a meaningfgul parse tree). * Tweak when we put \n on text output to avoid putting \n on the end of messages. Fixes https://github.com/vector-im/riot-web/issues/3065
2024-11-16 13:14:58 +08:00 · 2017-02-02 14:17:07 +00:00 · 2017-02-02 14:17:07 +00:00 · 853c89dfdc
commit 853c89dfdc
parent 63e47d8677
1 changed files with 78 additions and 39 deletions
--- a/src/Markdown.js
+++ b/src/Markdown.js
@ -15,6 +15,45 @@ limitations under the License.
 */
 import commonmark from 'commonmark';
 import escape from 'lodash/escape';
 const ALLOWED_HTML_TAGS = ['del'];
 // These types of node are definitely text
 const TEXT_NODES = ['text', 'softbreak', 'linebreak', 'paragraph', 'document'];
 function is_allowed_html_tag(node) {
    // Regex won't work for tags with attrs, but we only
    // allow <del> anyway.
    const matches = /^<\/?(.*)>$/.exec(node.literal);
    if (matches && matches.length == 2) {
        const tag = matches[1];
        return ALLOWED_HTML_TAGS.indexOf(tag) > -1;
    }
    return false;
 }
 function html_if_tag_allowed(node) {
    if (is_allowed_html_tag(node)) {
        this.lit(node.literal);
        return;
    } else {
        this.lit(escape(node.literal));
    }
 }
 /*
 * Returns true if the parse output containing the node
 * comprises multiple block level elements (ie. lines),
 * or false if it is only a single line.
 */
 function is_multi_line(node) {
    var par = node;
    while (par.parent) {
        par = par.parent;
    }
    return par.firstChild != par.lastChild;
 }
 /**
 * Class that wraps commonmark, adding the ability to see whether
@ -30,29 +69,26 @@ export default class Markdown {
    }
    isPlainText() {
-        // we determine if the message requires markdown by
+        const walker = this.parsed.walker();
        // running the parser on the tokens with a dummy
        // rendered and seeing if any of the renderer's
        // functions are called other than those noted below.
        // TODO: can't we just examine the output of the parser?
        let is_plain = true;
-        function setNotPlain() {
+        let ev;
-            is_plain = false;
+        while ( (ev = walker.next()) ) {
            const node = ev.node;
            if (TEXT_NODES.indexOf(node.type) > -1) {
                // definitely text
                continue;
            } else if (node.type == 'html_inline' || node.type == 'html_block') {
                // if it's an allowed html tag, we need to render it and therefore
                // we will need to use HTML. If it's not allowed, it's not HTML since
                // we'll just be treating it as text.
                if (is_allowed_html_tag(node)) {
                    return false;
                }
-
+            } else {
-        const dummy_renderer = new commonmark.HtmlRenderer();
+                return false;
        for (const k of Object.keys(commonmark.HtmlRenderer.prototype)) {
            dummy_renderer[k] = setNotPlain;
            }
-        // text and paragraph are just text
+        }
-        dummy_renderer.text = function(t) { return t; };
+        return true;
        dummy_renderer.softbreak = function(t) { return t; };
        dummy_renderer.paragraph = function(t) { return t; };
        dummy_renderer.render(this.parsed);
        return is_plain;
    }
    toHTML() {
@ -65,20 +101,27 @@ export default class Markdown {
            // 'inline', rather than unnecessarily wrapped in its own
            // p tag. If, however, we have multiple nodes, each gets
            // its own p tag to keep them as separate paragraphs.
-            var par = node;
+            if (is_multi_line(node)) {
            while (par.parent) {
                par = par.parent;
            }
            if (par.firstChild != par.lastChild) {
                real_paragraph.call(this, node, entering);
            }
        };
        renderer.html_inline = html_if_tag_allowed;
        renderer.html_block = function(node) {
            // as with `paragraph`, we only insert line breaks
            // if there are multiple lines in the markdown.
            const isMultiLine = is_multi_line(node);
            if (isMultiLine) this.cr();
            html_if_tag_allowed.call(this, node);
            if (isMultiLine) this.cr();
        }
        return renderer.render(this.parsed);
    }
    /*
-     * Render the mrkdown message to plain text. That is, essentially
+     * Render the markdown message to plain text. That is, essentially
     * just remove any backslashes escaping what would otherwise be
     * markdown syntax
     * (to fix https://github.com/vector-im/riot-web/issues/2870)
@ -96,22 +139,18 @@ export default class Markdown {
        };
        renderer.paragraph = function(node, entering) {
-            // If there is only one top level node, just return the
+            // as with toHTML, only append lines to paragraphs if there are
-            // bare text: it's a single line of text and so should be
+            // multiple paragraphs
-            // 'inline', rather than unnecessarily wrapped in its own
+            if (is_multi_line(node)) {
-            // p tag. If, however, we have multiple nodes, each gets
+                if (!entering && node.next) {
            // its own p tag to keep them as separate paragraphs.
            var par = node;
            while (par.parent) {
                node = par;
                par = par.parent;
            }
            if (node != par.lastChild) {
                if (!entering) {
                    this.lit('\n\n');
                }
            }
        };
        renderer.html_block = function(node) {
            this.lit(node.literal);
            if (is_multi_line(node) && node.next) this.lit('\n\n');
        }
        return renderer.render(this.parsed);
    }