From 4d2926485b914843b3b462403c5825f6fa21adf7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20L=C3=B6thberg?= Date: Tue, 29 Nov 2016 20:56:48 +0100 Subject: [PATCH 1/2] Replace marked with commonmark MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Marked has some annoying bugs, and the author is inactive, so replace it with commonmark.js, which is the reference JavaScript implementation of CommonMark. CommonMark is also preferable since it has a specification, and a conformance test suite to make sure that parsers are correct. Signed-off-by: Johannes Löthberg --- package.json | 2 +- src/Markdown.js | 92 +++++++++++++------------------------------------ 2 files changed, 24 insertions(+), 70 deletions(-) diff --git a/package.json b/package.json index a25260e35d..e93aadf8d1 100644 --- a/package.json +++ b/package.json @@ -58,7 +58,7 @@ "isomorphic-fetch": "^2.2.1", "linkifyjs": "^2.1.3", "lodash": "^4.13.1", - "marked": "^0.3.5", + "commonmark": "^0.27.0", "matrix-js-sdk": "matrix-org/matrix-js-sdk#develop", "optimist": "^0.6.1", "q": "^1.4.1", diff --git a/src/Markdown.js b/src/Markdown.js index a7b267b110..80805144e2 100644 --- a/src/Markdown.js +++ b/src/Markdown.js @@ -14,20 +14,7 @@ See the License for the specific language governing permissions and limitations under the License. */ -import marked from 'marked'; - -// marked only applies the default options on the high -// level marked() interface, so we do it here. -const marked_options = Object.assign({}, marked.defaults, { - gfm: true, - tables: true, - breaks: true, - pedantic: false, - sanitize: true, - smartLists: true, - smartypants: false, - xhtml: true, // return self closing tags (ie.
not
) -}); +import commonmark from 'commonmark'; /** * Class that wraps marked, adding the ability to see whether @@ -36,16 +23,7 @@ const marked_options = Object.assign({}, marked.defaults, { */ export default class Markdown { constructor(input) { - const lexer = new marked.Lexer(marked_options); - this.tokens = lexer.lex(input); - } - - _copyTokens() { - // copy tokens (the parser modifies its input arg) - const tokens_copy = this.tokens.slice(); - // it also has a 'links' property, because this is javascript - // and why wouldn't you have an array that also has properties? - return Object.assign(tokens_copy, this.tokens); + this.input = input } isPlainText() { @@ -64,65 +42,41 @@ export default class Markdown { is_plain = false; } - const dummy_renderer = {}; - for (const k of Object.keys(marked.Renderer.prototype)) { + const dummy_renderer = new commonmark.HtmlRenderer(); + for (const k of Object.keys(commonmark.HtmlRenderer.prototype)) { dummy_renderer[k] = setNotPlain; } // text and paragraph are just text - dummy_renderer.text = function(t){return t;} - dummy_renderer.paragraph = function(t){return t;} + dummy_renderer.text = function(t) { return t; } + dummy_renderer.paragraph = function(t) { return t; } - // ignore links where text is just the url: - // this ignores plain URLs that markdown has - // detected whilst preserving markdown syntax links - dummy_renderer.link = function(href, title, text) { - if (text != href) { - is_plain = false; - } - } - - const dummy_options = Object.assign({}, marked_options, { - renderer: dummy_renderer, - }); - const dummy_parser = new marked.Parser(dummy_options); - dummy_parser.parse(this._copyTokens()); + const dummy_parser = new commonmark.Parser(); + dummy_renderer.render(dummy_parser.parse(this.input)); return is_plain; } toHTML() { - const real_renderer = new marked.Renderer(); - real_renderer.link = function(href, title, text) { - // prevent marked from turning plain URLs - // into links, because its algorithm is fairly - // poor. Let's send plain URLs rather than - // badly linkified ones (the linkifier Vector - // uses on message display is way better, eg. - // handles URLs with closing parens at the end). - if (text == href) { - return href; - } - return marked.Renderer.prototype.link.apply(this, arguments); - } + const parser = new commonmark.Parser(); - real_renderer.paragraph = (text) => { - // The tokens at the top level are the 'blocks', so if we - // have more than one, there are multiple 'paragraphs'. - // If there is only one top level token, just return the + const renderer = new commonmark.HtmlRenderer({safe: true}); + const real_paragraph = renderer.paragraph; + renderer.paragraph = function(node, entering) { + // If there is only one top level node, just return the // bare text: it's a single line of text and so should be - // 'inline', rather than necessarily wrapped in its own - // p tag. If, however, we have multiple tokens, each gets + // 'inline', rather than unnecessarily wrapped in its own + // p tag. If, however, we have multiple nodes, each gets // its own p tag to keep them as separate paragraphs. - if (this.tokens.length == 1) { - return text; + var par = node; + while (par.parent) { + par = par.parent + } + if (par.firstChild != par.lastChild) { + real_paragraph.bind(this)(node, entering); } - return '

' + text + '

'; } - const real_options = Object.assign({}, marked_options, { - renderer: real_renderer, - }); - const real_parser = new marked.Parser(real_options); - return real_parser.parse(this._copyTokens()); + var parsed = parser.parse(this.input); + return renderer.render(parsed); } } From 5f160d2e7f86f435db73b7d17799a8a35bb83514 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20L=C3=B6thberg?= Date: Wed, 30 Nov 2016 01:03:05 +0100 Subject: [PATCH 2/2] Markdown: Use .call instead of .bind MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Johannes Löthberg --- src/Markdown.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Markdown.js b/src/Markdown.js index 80805144e2..18c888b541 100644 --- a/src/Markdown.js +++ b/src/Markdown.js @@ -72,7 +72,7 @@ export default class Markdown { par = par.parent } if (par.firstChild != par.lastChild) { - real_paragraph.bind(this)(node, entering); + real_paragraph.call(this, node, entering); } }