From db5f055f0c456bd852601bd0de61336c75bd9bd7 Mon Sep 17 00:00:00 2001
From: Robin Stocker <robin@nibor.org>
Date: Tue, 23 Jan 2024 22:43:20 +1100
Subject: [PATCH] Escape special characters at beginning of line

---
 .../internal/util/AsciiMatcher.java           |  7 +++
 .../markdown/CoreMarkdownNodeRenderer.java    | 58 ++++++++++++++++++-
 .../renderer/markdown/MarkdownWriter.java     |  6 +-
 .../markdown/MarkdownRendererTest.java        | 16 +++++
 .../markdown/SpecMarkdownRendererTest.java    |  2 +-
 5 files changed, 84 insertions(+), 5 deletions(-)

diff --git a/commonmark/src/main/java/org/commonmark/internal/util/AsciiMatcher.java b/commonmark/src/main/java/org/commonmark/internal/util/AsciiMatcher.java
index 82d83ca4..35769f82 100644
--- a/commonmark/src/main/java/org/commonmark/internal/util/AsciiMatcher.java
+++ b/commonmark/src/main/java/org/commonmark/internal/util/AsciiMatcher.java
@@ -29,6 +29,13 @@ public class AsciiMatcher implements CharMatcher {
             this.set = set;
         }
 
+        public Builder anyOf(String s) {
+            for (int i = 0; i < s.length(); i++) {
+                c(s.charAt(i));
+            }
+            return this;
+        }
+
         public Builder c(char c) {
             if (c > 127) {
                 throw new IllegalArgumentException("Can only match ASCII characters");
diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/CoreMarkdownNodeRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/CoreMarkdownNodeRenderer.java
index 813efb16..fe5725ae 100644
--- a/commonmark/src/main/java/org/commonmark/renderer/markdown/CoreMarkdownNodeRenderer.java
+++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/CoreMarkdownNodeRenderer.java
@@ -10,6 +10,8 @@ import java.util.Arrays;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 /**
  * The node renderer that renders all the core nodes (comes last in the order of node renderers).
@@ -20,8 +22,8 @@ import java.util.Set;
  */
 public class CoreMarkdownNodeRenderer extends AbstractVisitor implements NodeRenderer {
 
-    private final CharMatcher textEscape =
-            AsciiMatcher.builder().c('[').c(']').c('<').c('>').c('`').build();
+    private final AsciiMatcher textEscape =
+            AsciiMatcher.builder().anyOf("[]<>`*&").build();
     private final CharMatcher linkDestinationNeedsAngleBrackets =
             AsciiMatcher.builder().c(' ').c('(').c(')').c('<').c('>').c('\\').build();
     private final CharMatcher linkDestinationEscapeInAngleBrackets =
@@ -29,6 +31,8 @@ public class CoreMarkdownNodeRenderer extends AbstractVisitor implements NodeRen
     private final CharMatcher linkTitleEscapeInQuotes =
             AsciiMatcher.builder().c('"').build();
 
+    private final Pattern orderedListMarkerPattern = Pattern.compile("^([0-9]{1,9})([.)])");
+
     protected final MarkdownNodeRendererContext context;
     private final MarkdownWriter writer;
     /**
@@ -319,7 +323,55 @@ public class CoreMarkdownNodeRenderer extends AbstractVisitor implements NodeRen
 
     @Override
     public void visit(Text text) {
-        writer.writeEscaped(text.getLiteral(), textEscape);
+        String literal = text.getLiteral();
+        if (writer.isAtLineStart() && !literal.isEmpty()) {
+            char c = literal.charAt(0);
+            switch (c) {
+                case '-': {
+                    // Would be ambiguous with a bullet list marker, escape
+                    writer.write("\\-");
+                    literal = literal.substring(1);
+                    break;
+                }
+                case '#': {
+                    // Would be ambiguous with an ATX heading, escape
+                    writer.write("\\#");
+                    literal = literal.substring(1);
+                    break;
+                }
+                case '=': {
+                    // Would be ambiguous with a Setext heading, escape
+                    writer.write("\\=");
+                    literal = literal.substring(1);
+                    break;
+                }
+                case '0':
+                case '1':
+                case '2':
+                case '3':
+                case '4':
+                case '5':
+                case '6':
+                case '7':
+                case '8':
+                case '9': {
+                    // Check for ordered list marker
+                    Matcher m = orderedListMarkerPattern.matcher(literal);
+                    if (m.find()) {
+                        writer.write(m.group(1));
+                        writer.write("\\" + m.group(2));
+                        literal = literal.substring(m.end());
+                    }
+                }
+            }
+        }
+
+        if (literal.endsWith("!") && text.getNext() instanceof Link) {
+            writer.writeEscaped(literal.substring(0, literal.length() - 1), textEscape);
+            writer.write("\\!");
+        } else {
+            writer.writeEscaped(literal, textEscape);
+        }
     }
 
     @Override
diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownWriter.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownWriter.java
index ba682d46..bc10f020 100644
--- a/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownWriter.java
+++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownWriter.java
@@ -11,7 +11,7 @@ public class MarkdownWriter {
 
     private int blockSeparator = 0;
     private boolean tight;
-    private char lastChar;
+    private char lastChar = '\n';
     private final LinkedList<String> prefixes = new LinkedList<>();
 
     public MarkdownWriter(Appendable out) {
@@ -22,6 +22,10 @@ public class MarkdownWriter {
         return lastChar;
     }
 
+    public boolean isAtLineStart() {
+        return lastChar == '\n' || blockSeparator > 0;
+    }
+
     public void write(String s) {
         flushBlockSeparator();
         append(s);
diff --git a/commonmark/src/test/java/org/commonmark/renderer/markdown/MarkdownRendererTest.java b/commonmark/src/test/java/org/commonmark/renderer/markdown/MarkdownRendererTest.java
index 76a77dbb..59039799 100644
--- a/commonmark/src/test/java/org/commonmark/renderer/markdown/MarkdownRendererTest.java
+++ b/commonmark/src/test/java/org/commonmark/renderer/markdown/MarkdownRendererTest.java
@@ -127,6 +127,22 @@ public class MarkdownRendererTest {
         // otherwise result in a different parse result (e.g. a link):
         assertRoundTrip("\\[a\\](/uri)\n");
         assertRoundTrip("\\`abc\\`\n");
+
+        // Some characters only need to be escaped at the beginning of the line
+        assertRoundTrip("\\- Test\n");
+        assertRoundTrip("\\-\n");
+        assertRoundTrip("Test -\n");
+        assertRoundTrip("Abc\n\n\\- Test\n");
+        assertRoundTrip("\\# Test\n");
+        assertRoundTrip("\\## Test\n");
+        assertRoundTrip("\\#\n");
+        assertRoundTrip("Foo\n\\===\n");
+
+        // This is a bit more tricky as we need to check for a list start
+        assertRoundTrip("1\\. Foo\n");
+        assertRoundTrip("999\\. Foo\n");
+        assertRoundTrip("1\\.\n");
+        assertRoundTrip("1\\) Foo\n");
     }
 
     @Test
diff --git a/commonmark/src/test/java/org/commonmark/renderer/markdown/SpecMarkdownRendererTest.java b/commonmark/src/test/java/org/commonmark/renderer/markdown/SpecMarkdownRendererTest.java
index 2a0c46c0..632f4acf 100644
--- a/commonmark/src/test/java/org/commonmark/renderer/markdown/SpecMarkdownRendererTest.java
+++ b/commonmark/src/test/java/org/commonmark/renderer/markdown/SpecMarkdownRendererTest.java
@@ -62,7 +62,7 @@ public class SpecMarkdownRendererTest {
             System.out.println();
         }
 
-        int expectedPassed = 630;
+        int expectedPassed = 646;
         assertTrue("Expected at least " + expectedPassed + " examples to pass but was " + passes.size(), passes.size() >= expectedPassed);
     }
 
-- 
GitLab