From db5f055f0c456bd852601bd0de61336c75bd9bd7 Mon Sep 17 00:00:00 2001 From: Robin Stocker <robin@nibor.org> Date: Tue, 23 Jan 2024 22:43:20 +1100 Subject: [PATCH] Escape special characters at beginning of line --- .../internal/util/AsciiMatcher.java | 7 +++ .../markdown/CoreMarkdownNodeRenderer.java | 58 ++++++++++++++++++- .../renderer/markdown/MarkdownWriter.java | 6 +- .../markdown/MarkdownRendererTest.java | 16 +++++ .../markdown/SpecMarkdownRendererTest.java | 2 +- 5 files changed, 84 insertions(+), 5 deletions(-) diff --git a/commonmark/src/main/java/org/commonmark/internal/util/AsciiMatcher.java b/commonmark/src/main/java/org/commonmark/internal/util/AsciiMatcher.java index 82d83ca4..35769f82 100644 --- a/commonmark/src/main/java/org/commonmark/internal/util/AsciiMatcher.java +++ b/commonmark/src/main/java/org/commonmark/internal/util/AsciiMatcher.java @@ -29,6 +29,13 @@ public class AsciiMatcher implements CharMatcher { this.set = set; } + public Builder anyOf(String s) { + for (int i = 0; i < s.length(); i++) { + c(s.charAt(i)); + } + return this; + } + public Builder c(char c) { if (c > 127) { throw new IllegalArgumentException("Can only match ASCII characters"); diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/CoreMarkdownNodeRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/CoreMarkdownNodeRenderer.java index 813efb16..fe5725ae 100644 --- a/commonmark/src/main/java/org/commonmark/renderer/markdown/CoreMarkdownNodeRenderer.java +++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/CoreMarkdownNodeRenderer.java @@ -10,6 +10,8 @@ import java.util.Arrays; import java.util.HashSet; import java.util.List; import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * The node renderer that renders all the core nodes (comes last in the order of node renderers). @@ -20,8 +22,8 @@ import java.util.Set; */ public class CoreMarkdownNodeRenderer extends AbstractVisitor implements NodeRenderer { - private final CharMatcher textEscape = - AsciiMatcher.builder().c('[').c(']').c('<').c('>').c('`').build(); + private final AsciiMatcher textEscape = + AsciiMatcher.builder().anyOf("[]<>`*&").build(); private final CharMatcher linkDestinationNeedsAngleBrackets = AsciiMatcher.builder().c(' ').c('(').c(')').c('<').c('>').c('\\').build(); private final CharMatcher linkDestinationEscapeInAngleBrackets = @@ -29,6 +31,8 @@ public class CoreMarkdownNodeRenderer extends AbstractVisitor implements NodeRen private final CharMatcher linkTitleEscapeInQuotes = AsciiMatcher.builder().c('"').build(); + private final Pattern orderedListMarkerPattern = Pattern.compile("^([0-9]{1,9})([.)])"); + protected final MarkdownNodeRendererContext context; private final MarkdownWriter writer; /** @@ -319,7 +323,55 @@ public class CoreMarkdownNodeRenderer extends AbstractVisitor implements NodeRen @Override public void visit(Text text) { - writer.writeEscaped(text.getLiteral(), textEscape); + String literal = text.getLiteral(); + if (writer.isAtLineStart() && !literal.isEmpty()) { + char c = literal.charAt(0); + switch (c) { + case '-': { + // Would be ambiguous with a bullet list marker, escape + writer.write("\\-"); + literal = literal.substring(1); + break; + } + case '#': { + // Would be ambiguous with an ATX heading, escape + writer.write("\\#"); + literal = literal.substring(1); + break; + } + case '=': { + // Would be ambiguous with a Setext heading, escape + writer.write("\\="); + literal = literal.substring(1); + break; + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + // Check for ordered list marker + Matcher m = orderedListMarkerPattern.matcher(literal); + if (m.find()) { + writer.write(m.group(1)); + writer.write("\\" + m.group(2)); + literal = literal.substring(m.end()); + } + } + } + } + + if (literal.endsWith("!") && text.getNext() instanceof Link) { + writer.writeEscaped(literal.substring(0, literal.length() - 1), textEscape); + writer.write("\\!"); + } else { + writer.writeEscaped(literal, textEscape); + } } @Override diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownWriter.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownWriter.java index ba682d46..bc10f020 100644 --- a/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownWriter.java +++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownWriter.java @@ -11,7 +11,7 @@ public class MarkdownWriter { private int blockSeparator = 0; private boolean tight; - private char lastChar; + private char lastChar = '\n'; private final LinkedList<String> prefixes = new LinkedList<>(); public MarkdownWriter(Appendable out) { @@ -22,6 +22,10 @@ public class MarkdownWriter { return lastChar; } + public boolean isAtLineStart() { + return lastChar == '\n' || blockSeparator > 0; + } + public void write(String s) { flushBlockSeparator(); append(s); diff --git a/commonmark/src/test/java/org/commonmark/renderer/markdown/MarkdownRendererTest.java b/commonmark/src/test/java/org/commonmark/renderer/markdown/MarkdownRendererTest.java index 76a77dbb..59039799 100644 --- a/commonmark/src/test/java/org/commonmark/renderer/markdown/MarkdownRendererTest.java +++ b/commonmark/src/test/java/org/commonmark/renderer/markdown/MarkdownRendererTest.java @@ -127,6 +127,22 @@ public class MarkdownRendererTest { // otherwise result in a different parse result (e.g. a link): assertRoundTrip("\\[a\\](/uri)\n"); assertRoundTrip("\\`abc\\`\n"); + + // Some characters only need to be escaped at the beginning of the line + assertRoundTrip("\\- Test\n"); + assertRoundTrip("\\-\n"); + assertRoundTrip("Test -\n"); + assertRoundTrip("Abc\n\n\\- Test\n"); + assertRoundTrip("\\# Test\n"); + assertRoundTrip("\\## Test\n"); + assertRoundTrip("\\#\n"); + assertRoundTrip("Foo\n\\===\n"); + + // This is a bit more tricky as we need to check for a list start + assertRoundTrip("1\\. Foo\n"); + assertRoundTrip("999\\. Foo\n"); + assertRoundTrip("1\\.\n"); + assertRoundTrip("1\\) Foo\n"); } @Test diff --git a/commonmark/src/test/java/org/commonmark/renderer/markdown/SpecMarkdownRendererTest.java b/commonmark/src/test/java/org/commonmark/renderer/markdown/SpecMarkdownRendererTest.java index 2a0c46c0..632f4acf 100644 --- a/commonmark/src/test/java/org/commonmark/renderer/markdown/SpecMarkdownRendererTest.java +++ b/commonmark/src/test/java/org/commonmark/renderer/markdown/SpecMarkdownRendererTest.java @@ -62,7 +62,7 @@ public class SpecMarkdownRendererTest { System.out.println(); } - int expectedPassed = 630; + int expectedPassed = 646; assertTrue("Expected at least " + expectedPassed + " examples to pass but was " + passes.size(), passes.size() >= expectedPassed); } -- GitLab