Send plain text in the body

According to https://matrix.org/docs/spec/client_server/latest#m-room-message-msgtypes, the plain text version of the HTML should be provided in the body. Also create MarkdownParser class to be able to unit test it.
2024-11-15 01:35:07 +08:00 · 2020-06-03 18:44:21 +02:00 · 2020-06-03 18:44:21 +02:00 · b29c2b2de4
commit b29c2b2de4
parent 40f2d19f81
6 changed files with 383 additions and 52 deletions
--- a/CHANGES.md
+++ b/CHANGES.md
@ -23,7 +23,7 @@ Build 🧱:
 -

 Other changes:
- -
+ - Send plain text in the body of events containing formatted body, as per https://matrix.org/docs/spec/client_server/latest#m-room-message-msgtypes

 Changes in RiotX 0.21.0 (2020-05-28)
 ===================================================
--- a/matrix-sdk-android/src/androidTest/java/im/vector/matrix/android/internal/session/room/send/MarkdownParserTest.kt
+++ b/matrix-sdk-android/src/androidTest/java/im/vector/matrix/android/internal/session/room/send/MarkdownParserTest.kt
@ -0,0 +1,278 @@
+/*
+ * Copyright (c) 2020 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package im.vector.matrix.android.internal.session.room.send
+
+import androidx.test.ext.junit.runners.AndroidJUnit4
+import im.vector.matrix.android.InstrumentedTest
+import org.commonmark.parser.Parser
+import org.commonmark.renderer.html.HtmlRenderer
+import org.commonmark.renderer.text.TextContentRenderer
+import org.junit.Assert.assertEquals
+import org.junit.FixMethodOrder
+import org.junit.Test
+import org.junit.runner.RunWith
+import org.junit.runners.MethodSorters
+
+/**
+ * It will not be possible to test all combinations. For the moment I add a few tests, then, depending on the problem discovered in the wild,
+ * we can add more tests to cover the edge cases.
+ * Some tests are suffixed with `_not_passing`, maybe one day we will fix them...
+ * Riot-Web should be used as a reference for expected results, but not always. Especially Riot-Web add lots of `\n` in the
+ * formatted body, which is quite useless.
+ * Also Riot-Web does not provide plain text body when formatted text is provided. The body contains what the user has entered.
+ * See https://matrix.org/docs/spec/client_server/latest#m-room-message-msgtypes
+ */
+@Suppress("SpellCheckingInspection")
+@RunWith(AndroidJUnit4::class)
+@FixMethodOrder(MethodSorters.JVM)
+class MarkdownParserTest : InstrumentedTest {
+
+    /**
+     * Create the same parser than in the RoomModule
+     */
+    private val markdownParser = MarkdownParser(
+            Parser.builder().build(),
+            HtmlRenderer.builder().build(),
+            TextContentRenderer.builder().build()
+    )
+
+    @Test
+    fun parseNoMarkdown() {
+        testIdentity("")
+        testIdentity("a")
+        testIdentity("1")
+        testIdentity("Lorem ipsum dolor sit amet, consectetur adipiscing elit, sed do eiusmod tempor incididunt ut labore et " +
+                "dolore magna aliqua. Ut enim ad minim veniam, quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea com" +
+                "modo consequat. Duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pari" +
+                "atur. Excepteur sint occaecat cupidatat non proident, sunt in culpa qui officia deserunt mollit anim id est laborum.")
+    }
+
+    @Test
+    fun parseSpaces() {
+        testIdentity(" ")
+        testIdentity("  ")
+        testIdentity("\n")
+    }
+
+    @Test
+    fun parseNewLines() {
+        testIdentity("line1\nline2")
+        testIdentity("line1\nline2\nline3")
+    }
+
+    @Test
+    fun parseBold() {
+        testType(
+                name = "bold",
+                markdownPattern = "**",
+                htmlExpectedTag = "strong"
+        )
+    }
+
+    @Test
+    fun parseItalic() {
+        testType(
+                name = "italic",
+                markdownPattern = "*",
+                htmlExpectedTag = "em"
+        )
+    }
+
+    @Test
+    fun parseItalic2() {
+        // Riot-Web format
+        "_italic_".let { markdownParser.parse(it) }.expect("italic", "<em>italic</em>")
+    }
+
+    /**
+     * Note: the test is not passing, it does not work on Riot-Web neither
+     */
+    @Test
+    fun parseStrike_not_passing() {
+        testType(
+                name = "strike",
+                markdownPattern = "~~",
+                htmlExpectedTag = "del"
+        )
+    }
+
+    @Test
+    fun parseCode() {
+        testType(
+                name = "code",
+                markdownPattern = "`",
+                htmlExpectedTag = "code",
+                plainTextPrefix = "\"",
+                plainTextSuffix = "\""
+        )
+    }
+
+    @Test
+    fun parseCode2() {
+        testType(
+                name = "code",
+                markdownPattern = "``",
+                htmlExpectedTag = "code",
+                plainTextPrefix = "\"",
+                plainTextSuffix = "\""
+        )
+    }
+
+    @Test
+    fun parseCode3() {
+        testType(
+                name = "code",
+                markdownPattern = "```",
+                htmlExpectedTag = "code",
+                plainTextPrefix = "\"",
+                plainTextSuffix = "\""
+        )
+    }
+
+    @Test
+    fun parseUnorderedList() {
+        "- item1".let { markdownParser.parse(it).expect(it, "<ul><li>item1</li></ul>") }
+        "- item1\n- item2".let { markdownParser.parse(it).expect(it, "<ul><li>item1</li><li>item2</li></ul>") }
+    }
+
+    @Test
+    fun parseOrderedList() {
+        "1. item1".let { markdownParser.parse(it).expect(it, "<ol><li>item1</li></ol>") }
+        "1. item1\n2. item2".let { markdownParser.parse(it).expect(it, "<ol><li>item1</li><li>item2</li></ol>") }
+    }
+
+    @Test
+    fun parseHorizontalLine() {
+        "---".let { markdownParser.parse(it) }.expect("***", "<hr />")
+    }
+
+    @Test
+    fun parseH2AndContent() {
+        "a\n---\nb".let { markdownParser.parse(it) }.expect("a\nb", "<h2>a</h2><p>b</p>")
+    }
+
+    @Test
+    fun parseQuote() {
+        "> quoted".let { markdownParser.parse(it) }.expect("«quoted»", "<blockquote><p>quoted</p></blockquote>")
+    }
+
+    @Test
+    fun parseQuote_not_passing() {
+        "> quoted\nline2".let { markdownParser.parse(it) }.expect("«quoted\nline2»", "<blockquote><p>quoted<br/>line2</p></blockquote>")
+    }
+
+    @Test
+    fun parseBoldItalic() {
+        "*italic* **bold**".let { markdownParser.parse(it) }.expect("italic bold", "<em>italic</em> <strong>bold</strong>")
+        "**bold** *italic*".let { markdownParser.parse(it) }.expect("bold italic", "<strong>bold</strong> <em>italic</em>")
+    }
+
+    @Test
+    fun parseHead() {
+        "# head1".let { markdownParser.parse(it) }.expect("head1", "<h1>head1</h1>")
+        "## head2".let { markdownParser.parse(it) }.expect("head2", "<h2>head2</h2>")
+        "### head3".let { markdownParser.parse(it) }.expect("head3", "<h3>head3</h3>")
+        "#### head4".let { markdownParser.parse(it) }.expect("head4", "<h4>head4</h4>")
+        "##### head5".let { markdownParser.parse(it) }.expect("head5", "<h5>head5</h5>")
+        "###### head6".let { markdownParser.parse(it) }.expect("head6", "<h6>head6</h6>")
+    }
+
+    @Test
+    fun parseHeads() {
+        "# head1\n# head2".let { markdownParser.parse(it) }.expect("head1\nhead2", "<h1>head1</h1><h1>head2</h1>")
+    }
+
+    @Test
+    fun parseBoldNewLines_not_passing() {
+        "**bold**\nline2".let { markdownParser.parse(it) }.expect("bold\nline2", "<strong>bold</strong><br />line2")
+    }
+
+    @Test
+    fun parseLinks() {
+        "[link](target)".let { markdownParser.parse(it) }.expect(""""link" (target)""", """<a href="target">link</a>""")
+    }
+
+    @Test
+    fun parseParagraph() {
+        "# head\ncontent".let { markdownParser.parse(it) }.expect("head\ncontent", "<h1>head</h1><p>content</p>")
+    }
+
+    private fun testIdentity(text: String) {
+        markdownParser.parse(text).expect(text, null)
+    }
+
+    private fun testType(name: String,
+                         markdownPattern: String,
+                         htmlExpectedTag: String,
+                         plainTextPrefix: String = "",
+                         plainTextSuffix: String = "") {
+        // Test simple case
+        "$markdownPattern$name$markdownPattern"
+                .let { markdownParser.parse(it) }
+                .expect(expectedText = "$plainTextPrefix$name$plainTextSuffix",
+                        expectedFormattedText = "<$htmlExpectedTag>$name</$htmlExpectedTag>")
+
+        // Test twice the same tag
+        "$markdownPattern$name$markdownPattern and $markdownPattern$name bis$markdownPattern"
+                .let { markdownParser.parse(it) }
+                .expect(expectedText = "$plainTextPrefix$name$plainTextSuffix and $plainTextPrefix$name bis$plainTextSuffix",
+                        expectedFormattedText = "<$htmlExpectedTag>$name</$htmlExpectedTag> and <$htmlExpectedTag>$name bis</$htmlExpectedTag>")
+
+        val textBefore = "a"
+        val textAfter = "b"
+
+        // With sticked text before
+        "$textBefore$markdownPattern$name$markdownPattern"
+                .let { markdownParser.parse(it) }
+                .expect(expectedText = "$textBefore$plainTextPrefix$name$plainTextSuffix",
+                        expectedFormattedText = "$textBefore<$htmlExpectedTag>$name</$htmlExpectedTag>")
+
+        // With text before and space
+        "$textBefore $markdownPattern$name$markdownPattern"
+                .let { markdownParser.parse(it) }
+                .expect(expectedText = "$textBefore $plainTextPrefix$name$plainTextSuffix",
+                        expectedFormattedText = "$textBefore <$htmlExpectedTag>$name</$htmlExpectedTag>")
+
+        // With sticked text after
+        "$markdownPattern$name$markdownPattern$textAfter"
+                .let { markdownParser.parse(it) }
+                .expect(expectedText = "$plainTextPrefix$name$plainTextSuffix$textAfter",
+                        expectedFormattedText = "<$htmlExpectedTag>$name</$htmlExpectedTag>$textAfter")
+
+        // With space and text after
+        "$markdownPattern$name$markdownPattern $textAfter"
+                .let { markdownParser.parse(it) }
+                .expect(expectedText = "$plainTextPrefix$name$plainTextSuffix $textAfter",
+                        expectedFormattedText = "<$htmlExpectedTag>$name</$htmlExpectedTag> $textAfter")
+
+        // With sticked text before and text after
+        "$textBefore$markdownPattern$name$markdownPattern$textAfter"
+                .let { markdownParser.parse(it) }
+                .expect(expectedText = "$textBefore$plainTextPrefix$name$plainTextSuffix$textAfter",
+                        expectedFormattedText = "a<$htmlExpectedTag>$name</$htmlExpectedTag>$textAfter")
+
+        // With text before and after, with spaces
+        "$textBefore $markdownPattern$name$markdownPattern $textAfter"
+                .let { markdownParser.parse(it) }
+                .expect(expectedText = "$textBefore $plainTextPrefix$name$plainTextSuffix $textAfter",
+                        expectedFormattedText = "$textBefore <$htmlExpectedTag>$name</$htmlExpectedTag> $textAfter")
+    }
+
+    private fun TextContent.expect(expectedText: String, expectedFormattedText: String?) {
+        assertEquals("TextContent are not identical", TextContent(expectedText, expectedFormattedText), this)
+    }
+}
--- a/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/extensions/Strings.kt
+++ b/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/extensions/Strings.kt
@ -1,24 +0,0 @@
-/*
- * Copyright (c) 2020 New Vector Ltd
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package im.vector.matrix.android.internal.extensions
-
-/**
- * Ex: "abcdef".subStringBetween("a", "f") -> "bcde"
- * Ex: "abcdefff".subStringBetween("a", "f") -> "bcdeff"
- * Ex: "aaabcdef".subStringBetween("a", "f") -> "aabcde"
- */
-internal fun String.subStringBetween(prefix: String, suffix: String) = substringAfter(prefix).substringBeforeLast(suffix)
--- a/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/session/room/RoomModule.kt
+++ b/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/session/room/RoomModule.kt
@ -66,6 +66,9 @@ import im.vector.matrix.android.internal.session.room.typing.DefaultSendTypingTa
 import im.vector.matrix.android.internal.session.room.typing.SendTypingTask
 import im.vector.matrix.android.internal.session.room.uploads.DefaultGetUploadsTask
 import im.vector.matrix.android.internal.session.room.uploads.GetUploadsTask
+import org.commonmark.parser.Parser
+import org.commonmark.renderer.html.HtmlRenderer
+import org.commonmark.renderer.text.TextContentRenderer
 import retrofit2.Retrofit

@Module
@ -79,6 +82,28 @@ internal abstract class RoomModule {
        fun providesRoomAPI(retrofit: Retrofit): RoomAPI {
            return retrofit.create(RoomAPI::class.java)
        }
+
+        @Provides
+        @JvmStatic
+        fun providesParser(): Parser {
+            return Parser.builder().build()
+        }
+
+        @Provides
+        @JvmStatic
+        fun providesHtmlRenderer(): HtmlRenderer {
+            return HtmlRenderer
+                    .builder()
+                    .build()
+        }
+
+        @Provides
+        @JvmStatic
+        fun providesTextContentRenderer(): TextContentRenderer {
+            return TextContentRenderer
+                    .builder()
+                    .build()
+        }
    }

    @Binds
--- a/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/session/room/send/LocalEchoEventFactory.kt
+++ b/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/session/room/send/LocalEchoEventFactory.kt
@ -58,14 +58,11 @@ import im.vector.matrix.android.api.session.room.model.relation.ReplyToContent
 import im.vector.matrix.android.api.session.room.timeline.TimelineEvent
 import im.vector.matrix.android.api.session.room.timeline.getLastMessageContent
 import im.vector.matrix.android.internal.di.UserId
-import im.vector.matrix.android.internal.extensions.subStringBetween
 import im.vector.matrix.android.internal.session.content.ThumbnailExtractor
 import im.vector.matrix.android.internal.session.room.send.pills.TextPillsUtils
 import im.vector.matrix.android.internal.task.TaskExecutor
 import im.vector.matrix.android.internal.util.StringProvider
 import kotlinx.coroutines.launch
-import org.commonmark.parser.Parser
-import org.commonmark.renderer.html.HtmlRenderer
 import javax.inject.Inject

 /**
@ -81,16 +78,11 @@ internal class LocalEchoEventFactory @Inject constructor(
        private val context: Context,
        @UserId private val userId: String,
        private val stringProvider: StringProvider,
+        private val markdownParser: MarkdownParser,
        private val textPillsUtils: TextPillsUtils,
        private val taskExecutor: TaskExecutor,
        private val localEchoRepository: LocalEchoRepository
 ) {
-    // TODO Inject
-    private val parser = Parser.builder().build()
-
-    // TODO Inject
-    private val renderer = HtmlRenderer.builder().build()
-
    fun createTextEvent(roomId: String, msgType: String, text: CharSequence, autoMarkdown: Boolean): Event {
        if (msgType == MessageType.MSGTYPE_TEXT || msgType == MessageType.MSGTYPE_EMOTE) {
            return createFormattedTextEvent(roomId, createTextContent(text, autoMarkdown), msgType)
@ -101,21 +93,8 @@ internal class LocalEchoEventFactory @Inject constructor(

    private fun createTextContent(text: CharSequence, autoMarkdown: Boolean): TextContent {
        if (autoMarkdown) {
-            val source = textPillsUtils.processSpecialSpansToMarkdown(text)
-                    ?: text.toString()
-            val document = parser.parse(source)
-            val htmlText = renderer.render(document)
-
-            // Cleanup extra paragraph
-            val cleanHtmlText = if (htmlText.startsWith("<p>") && htmlText.endsWith("</p>\n")) {
-                htmlText.subStringBetween("<p>", "</p>\n")
-            } else {
-                htmlText
-            }
-
-            if (isFormattedTextPertinent(source, cleanHtmlText)) {
-                return TextContent(text.toString(), cleanHtmlText)
-            }
+            val source = textPillsUtils.processSpecialSpansToMarkdown(text) ?: text.toString()
+            return markdownParser.parse(source)
        } else {
            // Try to detect pills
            textPillsUtils.processSpecialSpansToHtml(text)?.let {
@ -126,9 +105,6 @@ internal class LocalEchoEventFactory @Inject constructor(
        return TextContent(text.toString())
    }

-    private fun isFormattedTextPertinent(text: String, htmlText: String?) =
-            text != htmlText && htmlText != "<p>${text.trim()}</p>\n"
-
    fun createFormattedTextEvent(roomId: String, textContent: TextContent, msgType: String): Event {
        return createMessageEvent(roomId, textContent.toMessageTextContent(msgType))
    }
--- a/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/session/room/send/MarkdownParser.kt
+++ b/matrix-sdk-android/src/main/java/im/vector/matrix/android/internal/session/room/send/MarkdownParser.kt
@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2020 New Vector Ltd
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package im.vector.matrix.android.internal.session.room.send
+
+import org.commonmark.parser.Parser
+import org.commonmark.renderer.html.HtmlRenderer
+import org.commonmark.renderer.text.TextContentRenderer
+import javax.inject.Inject
+
+/**
+ * This class convert a text to an html text
+ * This class is tested by [MarkdownParserTest].
+ * If any change is required, please add a test covering the problem and make sure all the tests are still passing.
+ */
+internal class MarkdownParser @Inject constructor(
+        private val parser: Parser,
+        private val htmlRenderer: HtmlRenderer,
+        private val textContentRenderer: TextContentRenderer
+) {
+
+    private val mdSpecialChars = "[`_\\-\\*>\\.\\[\\]#~]".toRegex()
+
+    fun parse(text: String): TextContent {
+        // If no special char are detected, just return plain text
+        if (text.contains(mdSpecialChars).not()) {
+            return TextContent(text.toString())
+        }
+
+        val document = parser.parse(text)
+        val htmlText = htmlRenderer.render(document)
+
+        // Cleanup extra paragraph
+        val cleanHtmlText = if (htmlText.lastIndexOf("<p>") == 0) {
+            htmlText.removeSurrounding("<p>", "</p>\n")
+        } else {
+            htmlText
+        }
+
+        return if (isFormattedTextPertinent(text, cleanHtmlText)) {
+            // According to https://matrix.org/docs/spec/client_server/latest#m-room-message-msgtypes:
+            // The plain text version of the HTML should be provided in the body.
+            val plainText = textContentRenderer.render(document)
+            TextContent(plainText, cleanHtmlText.postTreatment())
+        } else {
+            TextContent(text.toString())
+        }
+    }
+
+    private fun isFormattedTextPertinent(text: String, htmlText: String?) =
+            text != htmlText && htmlText != "<p>${text.trim()}</p>\n"
+
+    /**
+     * The parser makes some mistakes, so deal with it here
+     */
+    private fun String.postTreatment(): String {
+        return this
+                // Remove extra space before and after the content
+                .trim()
+                // There is no need to include new line in an html-like source
+                .replace("\n", "")
+    }
+}