diff --git a/README.md b/README.md index dc27c0987fed8efe694c5c91136ca4b3e0a5b442..da7a07dd22df550d0192e29875f60271f96a41c3 100644 --- a/README.md +++ b/README.md @@ -13,13 +13,14 @@ Java library for parsing and rendering [Markdown] text according to the Introduction ------------ -Provides classes for parsing input to an abstract syntax tree of nodes -(AST), visiting and manipulating nodes, and rendering to HTML. It -started out as a port of [commonmark.js], but has since evolved into a -full library with a nice API and the following features: +Provides classes for parsing input to an abstract syntax tree (AST), +visiting and manipulating nodes, and rendering to HTML or back to Markdown. +It started out as a port of [commonmark.js], but has since evolved into an +extensible library with the following features: * Small (core has no dependencies, extensions in separate artifacts) -* Fast (10-20 times faster than pegdown, see benchmarks in repo) +* Fast (10-20 times faster than [pegdown] which used to be a popular Markdown + library, see benchmarks in repo) * Flexible (manipulate the AST after parsing, customize HTML rendering) * Extensible (tables, strikethrough, autolinking and more, see below) @@ -63,9 +64,9 @@ import org.commonmark.parser.Parser; import org.commonmark.renderer.html.HtmlRenderer; Parser parser = Parser.builder().build(); -Node document = parser.parse("This is *Sparta*"); +Node document = parser.parse("This is *Markdown*"); HtmlRenderer renderer = HtmlRenderer.builder().build(); -renderer.render(document); // "<p>This is <em>Sparta</em></p>\n" +renderer.render(document); // "<p>This is <em>Markdown</em></p>\n" ``` This uses the parser and renderer with default options. Both builders have @@ -81,8 +82,23 @@ to which tags are allowed, etc. That is the responsibility of the caller, and if you expose the resulting HTML, you probably want to run a sanitizer on it after this. -For rendering to plain text, there's also a `TextContentRenderer` with -a very similar API. +#### Render to Markdown + +```java +import org.commonmark.node.*; +import org.commonmark.renderer.markdown.MarkdownRenderer; + +MarkdownRenderer renderer = MarkdownRenderer.builder().build(); +Node document = new Document(); +Heading heading = new Heading(); +heading.setLevel(2); +heading.appendChild(new Text("My title")); +document.appendChild(heading); + +renderer.render(document); // "## My title\n" +``` + +For rendering to plain text with minimal markup, there's also `TextContentRenderer`. #### Use a visitor to process parsed nodes @@ -390,6 +406,7 @@ BSD (2-clause) licensed, see LICENSE.txt file. [CommonMark]: https://commonmark.org/ [Markdown]: https://daringfireball.net/projects/markdown/ [commonmark.js]: https://github.com/commonmark/commonmark.js +[pegdown]: https://github.com/sirthias/pegdown [CommonMark Dingus]: https://spec.commonmark.org/dingus/ [Maven Central]: https://search.maven.org/#search|ga|1|g%3A%22org.commonmark%22 [Semantic Versioning]: https://semver.org/ diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/Strikethrough.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/Strikethrough.java index 115ae9ea48e97fb9ef9d9c792e3c0fda95afda37..0c24642bccefa581aea81a734d0a87122d28be1b 100644 --- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/Strikethrough.java +++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/Strikethrough.java @@ -4,19 +4,23 @@ import org.commonmark.node.CustomNode; import org.commonmark.node.Delimited; /** - * A strikethrough node containing text and other inline nodes nodes as children. + * A strikethrough node containing text and other inline nodes as children. */ public class Strikethrough extends CustomNode implements Delimited { - private static final String DELIMITER = "~~"; + private String delimiter; + + public Strikethrough(String delimiter) { + this.delimiter = delimiter; + } @Override public String getOpeningDelimiter() { - return DELIMITER; + return delimiter; } @Override public String getClosingDelimiter() { - return DELIMITER; + return delimiter; } } diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughExtension.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughExtension.java index 4f0228a1cb26c158b5ad310682f7cba12e8dbe62..f87f3e9c8860418984811b312e4d9f2fede31245 100644 --- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughExtension.java +++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/StrikethroughExtension.java @@ -1,17 +1,24 @@ package org.commonmark.ext.gfm.strikethrough; import org.commonmark.Extension; -import org.commonmark.renderer.text.TextContentRenderer; -import org.commonmark.renderer.text.TextContentNodeRendererContext; -import org.commonmark.renderer.text.TextContentNodeRendererFactory; import org.commonmark.ext.gfm.strikethrough.internal.StrikethroughDelimiterProcessor; import org.commonmark.ext.gfm.strikethrough.internal.StrikethroughHtmlNodeRenderer; +import org.commonmark.ext.gfm.strikethrough.internal.StrikethroughMarkdownNodeRenderer; import org.commonmark.ext.gfm.strikethrough.internal.StrikethroughTextContentNodeRenderer; -import org.commonmark.renderer.html.HtmlRenderer; -import org.commonmark.renderer.html.HtmlNodeRendererContext; -import org.commonmark.renderer.html.HtmlNodeRendererFactory; import org.commonmark.parser.Parser; import org.commonmark.renderer.NodeRenderer; +import org.commonmark.renderer.html.HtmlNodeRendererContext; +import org.commonmark.renderer.html.HtmlNodeRendererFactory; +import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.renderer.markdown.MarkdownNodeRendererContext; +import org.commonmark.renderer.markdown.MarkdownNodeRendererFactory; +import org.commonmark.renderer.markdown.MarkdownRenderer; +import org.commonmark.renderer.text.TextContentNodeRendererContext; +import org.commonmark.renderer.text.TextContentNodeRendererFactory; +import org.commonmark.renderer.text.TextContentRenderer; + +import java.util.Collections; +import java.util.Set; /** * Extension for GFM strikethrough using {@code ~} or {@code ~~} (GitHub Flavored Markdown). @@ -42,7 +49,7 @@ import org.commonmark.renderer.NodeRenderer; * </p> */ public class StrikethroughExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension, - TextContentRenderer.TextContentRendererExtension { + TextContentRenderer.TextContentRendererExtension, MarkdownRenderer.MarkdownRendererExtension { private final boolean requireTwoTildes; @@ -89,13 +96,28 @@ public class StrikethroughExtension implements Parser.ParserExtension, HtmlRende }); } + @Override + public void extend(MarkdownRenderer.Builder rendererBuilder) { + rendererBuilder.nodeRendererFactory(new MarkdownNodeRendererFactory() { + @Override + public NodeRenderer create(MarkdownNodeRendererContext context) { + return new StrikethroughMarkdownNodeRenderer(context); + } + + @Override + public Set<Character> getSpecialCharacters() { + return Collections.singleton('~'); + } + }); + } + public static class Builder { private boolean requireTwoTildes = false; /** * @param requireTwoTildes Whether two tilde characters ({@code ~~}) are required for strikethrough or whether - * one is also enough. Default is {@code false}; both a single tilde and two tildes can be used for strikethrough. + * one is also enough. Default is {@code false}; both a single tilde and two tildes can be used for strikethrough. * @return {@code this} */ public Builder requireTwoTildes(boolean requireTwoTildes) { diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java index 3dedff1b92613e2b421c5bc527e383d97e9272c0..4657106ab1c8317524e0cc813abffc347eb509df 100644 --- a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java +++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughDelimiterProcessor.java @@ -43,7 +43,8 @@ public class StrikethroughDelimiterProcessor implements DelimiterProcessor { Text opener = openingRun.getOpener(); // Wrap nodes between delimiters in strikethrough. - Node strikethrough = new Strikethrough(); + String delimiter = openingRun.length() == 1 ? opener.getLiteral() : opener.getLiteral() + opener.getLiteral(); + Node strikethrough = new Strikethrough(delimiter); SourceSpans sourceSpans = new SourceSpans(); sourceSpans.addAllFrom(openingRun.getOpeners(openingRun.length())); diff --git a/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughMarkdownNodeRenderer.java b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughMarkdownNodeRenderer.java new file mode 100644 index 0000000000000000000000000000000000000000..1c91dd64fd1bf5ff5f1a34011ab849315e905841 --- /dev/null +++ b/commonmark-ext-gfm-strikethrough/src/main/java/org/commonmark/ext/gfm/strikethrough/internal/StrikethroughMarkdownNodeRenderer.java @@ -0,0 +1,34 @@ +package org.commonmark.ext.gfm.strikethrough.internal; + +import org.commonmark.ext.gfm.strikethrough.Strikethrough; +import org.commonmark.node.Node; +import org.commonmark.renderer.markdown.MarkdownNodeRendererContext; +import org.commonmark.renderer.markdown.MarkdownWriter; + +public class StrikethroughMarkdownNodeRenderer extends StrikethroughNodeRenderer { + + private final MarkdownNodeRendererContext context; + private final MarkdownWriter writer; + + public StrikethroughMarkdownNodeRenderer(MarkdownNodeRendererContext context) { + this.context = context; + this.writer = context.getWriter(); + } + + @Override + public void render(Node node) { + Strikethrough strikethrough = (Strikethrough) node; + writer.raw(strikethrough.getOpeningDelimiter()); + renderChildren(node); + writer.raw(strikethrough.getClosingDelimiter()); + } + + private void renderChildren(Node parent) { + Node node = parent.getFirstChild(); + while (node != null) { + Node next = node.getNext(); + context.render(node); + node = next; + } + } +} diff --git a/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughMarkdownRendererTest.java b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughMarkdownRendererTest.java new file mode 100644 index 0000000000000000000000000000000000000000..96df48cec006909806effdf837efd729e2764b10 --- /dev/null +++ b/commonmark-ext-gfm-strikethrough/src/test/java/org/commonmark/ext/gfm/strikethrough/StrikethroughMarkdownRendererTest.java @@ -0,0 +1,36 @@ +package org.commonmark.ext.gfm.strikethrough; + +import org.commonmark.Extension; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.markdown.MarkdownRenderer; +import org.junit.Test; + +import java.util.Collections; +import java.util.Set; + +import static org.junit.Assert.assertEquals; + +public class StrikethroughMarkdownRendererTest { + + private static final Set<Extension> EXTENSIONS = Collections.singleton(StrikethroughExtension.create()); + private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); + private static final MarkdownRenderer RENDERER = MarkdownRenderer.builder().extensions(EXTENSIONS).build(); + + @Test + public void testStrikethrough() { + assertRoundTrip("~foo~ ~bar~\n"); + assertRoundTrip("~~foo~~ ~~bar~~\n"); + assertRoundTrip("~~f\\~oo~~ ~~bar~~\n"); + + assertRoundTrip("\\~foo\\~\n"); + } + + protected String render(String source) { + return RENDERER.render(PARSER.parse(source)); + } + + private void assertRoundTrip(String input) { + String rendered = render(input); + assertEquals(input, rendered); + } +} diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java index 61880c6c377ec2382fa337e6d464605ac33468b5..bd7b40e02154ec99d4efd8138facd7998cd487cf 100644 --- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TableCell.java @@ -22,7 +22,7 @@ public class TableCell extends CustomNode { } /** - * @return the cell alignment + * @return the cell alignment or {@code null} if no specific alignment */ public Alignment getAlignment() { return alignment; diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java index 5707b0f1406f24b893aa8e08ca2ea754869b2260..d18c382835c2895670525a9408ea06dc16a155b0 100644 --- a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/TablesExtension.java @@ -3,16 +3,23 @@ package org.commonmark.ext.gfm.tables; import org.commonmark.Extension; import org.commonmark.ext.gfm.tables.internal.TableBlockParser; import org.commonmark.ext.gfm.tables.internal.TableHtmlNodeRenderer; +import org.commonmark.ext.gfm.tables.internal.TableMarkdownNodeRenderer; import org.commonmark.ext.gfm.tables.internal.TableTextContentNodeRenderer; import org.commonmark.parser.Parser; import org.commonmark.renderer.NodeRenderer; import org.commonmark.renderer.html.HtmlNodeRendererContext; import org.commonmark.renderer.html.HtmlNodeRendererFactory; import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.renderer.markdown.MarkdownNodeRendererContext; +import org.commonmark.renderer.markdown.MarkdownNodeRendererFactory; +import org.commonmark.renderer.markdown.MarkdownRenderer; import org.commonmark.renderer.text.TextContentNodeRendererContext; import org.commonmark.renderer.text.TextContentNodeRendererFactory; import org.commonmark.renderer.text.TextContentRenderer; +import java.util.Collections; +import java.util.Set; + /** * Extension for GFM tables using "|" pipes (GitHub Flavored Markdown). * <p> @@ -27,7 +34,7 @@ import org.commonmark.renderer.text.TextContentRenderer; * @see <a href="https://github.github.com/gfm/#tables-extension-">Tables (extension) in GitHub Flavored Markdown Spec</a> */ public class TablesExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension, - TextContentRenderer.TextContentRendererExtension { + TextContentRenderer.TextContentRendererExtension, MarkdownRenderer.MarkdownRendererExtension { private TablesExtension() { } @@ -60,4 +67,19 @@ public class TablesExtension implements Parser.ParserExtension, HtmlRenderer.Htm } }); } + + @Override + public void extend(MarkdownRenderer.Builder rendererBuilder) { + rendererBuilder.nodeRendererFactory(new MarkdownNodeRendererFactory() { + @Override + public NodeRenderer create(MarkdownNodeRendererContext context) { + return new TableMarkdownNodeRenderer(context); + } + + @Override + public Set<Character> getSpecialCharacters() { + return Collections.singleton('|'); + } + }); + } } diff --git a/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableMarkdownNodeRenderer.java b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableMarkdownNodeRenderer.java new file mode 100644 index 0000000000000000000000000000000000000000..4871471644a75dde6e76cbf141852d5b3cd58d7d --- /dev/null +++ b/commonmark-ext-gfm-tables/src/main/java/org/commonmark/ext/gfm/tables/internal/TableMarkdownNodeRenderer.java @@ -0,0 +1,90 @@ +package org.commonmark.ext.gfm.tables.internal; + +import org.commonmark.ext.gfm.tables.*; +import org.commonmark.internal.util.AsciiMatcher; +import org.commonmark.internal.util.CharMatcher; +import org.commonmark.node.Node; +import org.commonmark.renderer.NodeRenderer; +import org.commonmark.renderer.markdown.MarkdownNodeRendererContext; +import org.commonmark.renderer.markdown.MarkdownWriter; + +import java.util.ArrayList; +import java.util.List; + +/** + * The Table node renderer that is needed for rendering GFM tables (GitHub Flavored Markdown) to text content. + */ +public class TableMarkdownNodeRenderer extends TableNodeRenderer implements NodeRenderer { + private final MarkdownWriter writer; + private final MarkdownNodeRendererContext context; + + private final CharMatcher pipe = AsciiMatcher.builder().c('|').build(); + + private final List<TableCell.Alignment> columns = new ArrayList<>(); + + public TableMarkdownNodeRenderer(MarkdownNodeRendererContext context) { + this.writer = context.getWriter(); + this.context = context; + } + + @Override + protected void renderBlock(TableBlock node) { + columns.clear(); + writer.pushTight(true); + renderChildren(node); + writer.popTight(); + writer.block(); + } + + @Override + protected void renderHead(TableHead node) { + renderChildren(node); + for (TableCell.Alignment columnAlignment : columns) { + writer.raw('|'); + if (columnAlignment == TableCell.Alignment.LEFT) { + writer.raw(":---"); + } else if (columnAlignment == TableCell.Alignment.RIGHT) { + writer.raw("---:"); + } else if (columnAlignment == TableCell.Alignment.CENTER) { + writer.raw(":---:"); + } else { + writer.raw("---"); + } + } + writer.raw("|"); + writer.block(); + } + + @Override + protected void renderBody(TableBody node) { + renderChildren(node); + } + + @Override + protected void renderRow(TableRow node) { + renderChildren(node); + // Trailing | at the end of the line + writer.raw("|"); + writer.block(); + } + + @Override + protected void renderCell(TableCell node) { + if (node.getParent() != null && node.getParent().getParent() instanceof TableHead) { + columns.add(node.getAlignment()); + } + writer.raw("|"); + writer.pushRawEscape(pipe); + renderChildren(node); + writer.popRawEscape(); + } + + private void renderChildren(Node parent) { + Node node = parent.getFirstChild(); + while (node != null) { + Node next = node.getNext(); + context.render(node); + node = next; + } + } +} diff --git a/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TableMarkdownRendererTest.java b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TableMarkdownRendererTest.java new file mode 100644 index 0000000000000000000000000000000000000000..1db917d0817f670efb6e9ced619856127af149cb --- /dev/null +++ b/commonmark-ext-gfm-tables/src/test/java/org/commonmark/ext/gfm/tables/TableMarkdownRendererTest.java @@ -0,0 +1,76 @@ +package org.commonmark.ext.gfm.tables; + +import org.commonmark.Extension; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.markdown.MarkdownRenderer; +import org.junit.Test; + +import java.util.Collections; +import java.util.Set; + +import static org.junit.Assert.assertEquals; + +public class TableMarkdownRendererTest { + + private static final Set<Extension> EXTENSIONS = Collections.singleton(TablesExtension.create()); + private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); + private static final MarkdownRenderer RENDERER = MarkdownRenderer.builder().extensions(EXTENSIONS).build(); + + @Test + public void testHeadNoBody() { + assertRoundTrip("|Abc|\n|---|\n"); + assertRoundTrip("|Abc|Def|\n|---|---|\n"); + assertRoundTrip("|Abc||\n|---|---|\n"); + } + + @Test + public void testHeadAndBody() { + assertRoundTrip("|Abc|\n|---|\n|1|\n"); + assertRoundTrip("|Abc|Def|\n|---|---|\n|1|2|\n"); + } + + @Test + public void testBodyHasFewerColumns() { + // Could try not to write empty trailing cells but this is fine too + assertRoundTrip("|Abc|Def|\n|---|---|\n|1||\n"); + } + + @Test + public void testAlignment() { + assertRoundTrip("|Abc|Def|\n|:---|---|\n|1|2|\n"); + assertRoundTrip("|Abc|Def|\n|---|---:|\n|1|2|\n"); + assertRoundTrip("|Abc|Def|\n|:---:|:---:|\n|1|2|\n"); + } + + @Test + public void testInsideBlockQuote() { + assertRoundTrip("> |Abc|Def|\n> |---|---|\n> |1|2|\n"); + } + + @Test + public void testMultipleTables() { + assertRoundTrip("|Abc|Def|\n|---|---|\n\n|One|\n|---|\n|Only|\n"); + } + + @Test + public void testEscaping() { + assertRoundTrip("|Abc|Def|\n|---|---|\n|Pipe in|text \\||\n"); + assertRoundTrip("|Abc|Def|\n|---|---|\n|Pipe in|code `\\|`|\n"); + assertRoundTrip("|Abc|Def|\n|---|---|\n|Inline HTML|<span>Foo\\|bar</span>|\n"); + } + + @Test + public void testEscaped() { + // `|` in Text nodes needs to be escaped, otherwise the generated Markdown does not get parsed back as a table + assertRoundTrip("\\|Abc\\|\n\\|---\\|\n"); + } + + protected String render(String source) { + return RENDERER.render(PARSER.parse(source)); + } + + private void assertRoundTrip(String input) { + String rendered = render(input); + assertEquals(input, rendered); + } +} diff --git a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/InsExtension.java b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/InsExtension.java index 2f980d93b0462dad2930fd0136c8467ac404ce6e..0657195580673056f5d146454a370aced8a1c970 100644 --- a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/InsExtension.java +++ b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/InsExtension.java @@ -3,16 +3,23 @@ package org.commonmark.ext.ins; import org.commonmark.Extension; import org.commonmark.ext.ins.internal.InsDelimiterProcessor; import org.commonmark.ext.ins.internal.InsHtmlNodeRenderer; +import org.commonmark.ext.ins.internal.InsMarkdownNodeRenderer; import org.commonmark.ext.ins.internal.InsTextContentNodeRenderer; import org.commonmark.parser.Parser; import org.commonmark.renderer.NodeRenderer; import org.commonmark.renderer.html.HtmlNodeRendererContext; import org.commonmark.renderer.html.HtmlNodeRendererFactory; import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.renderer.markdown.MarkdownNodeRendererContext; +import org.commonmark.renderer.markdown.MarkdownNodeRendererFactory; +import org.commonmark.renderer.markdown.MarkdownRenderer; import org.commonmark.renderer.text.TextContentNodeRendererContext; import org.commonmark.renderer.text.TextContentNodeRendererFactory; import org.commonmark.renderer.text.TextContentRenderer; +import java.util.Collections; +import java.util.Set; + /** * Extension for ins using ++ * <p> @@ -24,9 +31,7 @@ import org.commonmark.renderer.text.TextContentRenderer; * The parsed ins text regions are turned into {@link Ins} nodes. * </p> */ -public class InsExtension implements Parser.ParserExtension, - HtmlRenderer.HtmlRendererExtension, - TextContentRenderer.TextContentRendererExtension { +public class InsExtension implements Parser.ParserExtension, HtmlRenderer.HtmlRendererExtension, TextContentRenderer.TextContentRendererExtension, MarkdownRenderer.MarkdownRendererExtension { private InsExtension() { } @@ -59,4 +64,21 @@ public class InsExtension implements Parser.ParserExtension, } }); } + + @Override + public void extend(MarkdownRenderer.Builder rendererBuilder) { + rendererBuilder.nodeRendererFactory(new MarkdownNodeRendererFactory() { + @Override + public NodeRenderer create(MarkdownNodeRendererContext context) { + return new InsMarkdownNodeRenderer(context); + } + + @Override + public Set<Character> getSpecialCharacters() { + // We technically don't need to escape single occurrences of +, but that's all the extension API + // exposes currently. + return Collections.singleton('+'); + } + }); + } } diff --git a/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsMarkdownNodeRenderer.java b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsMarkdownNodeRenderer.java new file mode 100644 index 0000000000000000000000000000000000000000..851d472824019f1d4d3230716fc6afe1a658172c --- /dev/null +++ b/commonmark-ext-ins/src/main/java/org/commonmark/ext/ins/internal/InsMarkdownNodeRenderer.java @@ -0,0 +1,32 @@ +package org.commonmark.ext.ins.internal; + +import org.commonmark.node.Node; +import org.commonmark.renderer.markdown.MarkdownNodeRendererContext; +import org.commonmark.renderer.markdown.MarkdownWriter; + +public class InsMarkdownNodeRenderer extends InsNodeRenderer { + + private final MarkdownNodeRendererContext context; + private final MarkdownWriter writer; + + public InsMarkdownNodeRenderer(MarkdownNodeRendererContext context) { + this.context = context; + this.writer = context.getWriter(); + } + + @Override + public void render(Node node) { + writer.raw("++"); + renderChildren(node); + writer.raw("++"); + } + + private void renderChildren(Node parent) { + Node node = parent.getFirstChild(); + while (node != null) { + Node next = node.getNext(); + context.render(node); + node = next; + } + } +} diff --git a/commonmark-ext-ins/src/test/java/org/commonmark/ext/ins/InsMarkdownRendererTest.java b/commonmark-ext-ins/src/test/java/org/commonmark/ext/ins/InsMarkdownRendererTest.java new file mode 100644 index 0000000000000000000000000000000000000000..16cefc7f124b18b5a6b134a92c0ca9012cb39b21 --- /dev/null +++ b/commonmark-ext-ins/src/test/java/org/commonmark/ext/ins/InsMarkdownRendererTest.java @@ -0,0 +1,34 @@ +package org.commonmark.ext.ins; + +import org.commonmark.Extension; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.markdown.MarkdownRenderer; +import org.junit.Test; + +import java.util.Collections; +import java.util.Set; + +import static org.junit.Assert.assertEquals; + +public class InsMarkdownRendererTest { + + private static final Set<Extension> EXTENSIONS = Collections.singleton(InsExtension.create()); + private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); + private static final MarkdownRenderer RENDERER = MarkdownRenderer.builder().extensions(EXTENSIONS).build(); + + @Test + public void testStrikethrough() { + assertRoundTrip("++foo++\n"); + + assertRoundTrip("\\+\\+foo\\+\\+\n"); + } + + protected String render(String source) { + return RENDERER.render(PARSER.parse(source)); + } + + private void assertRoundTrip(String input) { + String rendered = render(input); + assertEquals(input, rendered); + } +} diff --git a/commonmark-integration-test/src/test/java/org/commonmark/integration/MarkdownRendererIntegrationTest.java b/commonmark-integration-test/src/test/java/org/commonmark/integration/MarkdownRendererIntegrationTest.java new file mode 100644 index 0000000000000000000000000000000000000000..51f761cfd6aeefe57e1c8cd7126b583a76aa3b78 --- /dev/null +++ b/commonmark-integration-test/src/test/java/org/commonmark/integration/MarkdownRendererIntegrationTest.java @@ -0,0 +1,46 @@ +package org.commonmark.integration; + +import org.commonmark.Extension; +import org.commonmark.ext.autolink.AutolinkExtension; +import org.commonmark.ext.front.matter.YamlFrontMatterExtension; +import org.commonmark.ext.gfm.strikethrough.StrikethroughExtension; +import org.commonmark.ext.gfm.tables.TablesExtension; +import org.commonmark.ext.image.attributes.ImageAttributesExtension; +import org.commonmark.ext.ins.InsExtension; +import org.commonmark.ext.task.list.items.TaskListItemsExtension; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.markdown.MarkdownRenderer; +import org.junit.Test; + +import java.util.Arrays; +import java.util.List; + +import static org.junit.Assert.assertEquals; + +public class MarkdownRendererIntegrationTest { + + private static final List<Extension> EXTENSIONS = Arrays.asList( + AutolinkExtension.create(), + ImageAttributesExtension.create(), + InsExtension.create(), + StrikethroughExtension.create(), + TablesExtension.create(), + TaskListItemsExtension.create(), + YamlFrontMatterExtension.create()); + private static final Parser PARSER = Parser.builder().extensions(EXTENSIONS).build(); + private static final MarkdownRenderer RENDERER = MarkdownRenderer.builder().extensions(EXTENSIONS).build(); + + @Test + public void testStrikethroughInTable() { + assertRoundTrip("|Abc|\n|---|\n|~strikethrough~|\n|\\~escaped\\~|\n"); + } + + private String render(String source) { + return RENDERER.render(PARSER.parse(source)); + } + + private void assertRoundTrip(String input) { + String rendered = render(input); + assertEquals(input, rendered); + } +} diff --git a/commonmark-test-util/src/main/java/org/commonmark/testutil/example/Example.java b/commonmark-test-util/src/main/java/org/commonmark/testutil/example/Example.java index 417a66097fbbaf3e3e97710d6de3b4e6b20a62fa..11e87d0aa655e53c7cba05f1b237888caee2fbfc 100644 --- a/commonmark-test-util/src/main/java/org/commonmark/testutil/example/Example.java +++ b/commonmark-test-util/src/main/java/org/commonmark/testutil/example/Example.java @@ -30,6 +30,10 @@ public class Example { return html; } + public String getSection() { + return section; + } + @Override public String toString() { return "File \"" + filename + "\" section \"" + section + "\" example " + exampleNumber; diff --git a/commonmark/pom.xml b/commonmark/pom.xml index 64c45619261cde44b797a98c86711ceb658140bc..9988370a8b0e214ed539486efc5accf37cc4edd1 100644 --- a/commonmark/pom.xml +++ b/commonmark/pom.xml @@ -9,7 +9,7 @@ <artifactId>commonmark</artifactId> <name>commonmark-java core</name> - <description>Core of commonmark-java (implementation of CommonMark for parsing markdown and rendering to HTML)</description> + <description>Core of commonmark-java (a library for parsing Markdown to an AST, modifying the AST and rendering it to HTML or Markdown)</description> <dependencies> <dependency> diff --git a/commonmark/src/main/java/org/commonmark/internal/util/AsciiMatcher.java b/commonmark/src/main/java/org/commonmark/internal/util/AsciiMatcher.java index 82d83ca46f38341e817efd4dcc6f02d726cffa68..dd7e8d5eb7c5c3fcce03ebfa00dba0008e735565 100644 --- a/commonmark/src/main/java/org/commonmark/internal/util/AsciiMatcher.java +++ b/commonmark/src/main/java/org/commonmark/internal/util/AsciiMatcher.java @@ -1,6 +1,7 @@ package org.commonmark.internal.util; import java.util.BitSet; +import java.util.Set; public class AsciiMatcher implements CharMatcher { private final BitSet set; @@ -22,6 +23,10 @@ public class AsciiMatcher implements CharMatcher { return new Builder(new BitSet()); } + public static Builder builder(AsciiMatcher matcher) { + return new Builder((BitSet) matcher.set.clone()); + } + public static class Builder { private final BitSet set; @@ -37,6 +42,20 @@ public class AsciiMatcher implements CharMatcher { return this; } + public Builder anyOf(String s) { + for (int i = 0; i < s.length(); i++) { + c(s.charAt(i)); + } + return this; + } + + public Builder anyOf(Set<Character> characters) { + for (Character c : characters) { + c(c); + } + return this; + } + public Builder range(char from, char toInclusive) { for (char c = from; c <= toInclusive; c++) { c(c); diff --git a/commonmark/src/main/java/org/commonmark/package-info.java b/commonmark/src/main/java/org/commonmark/package-info.java index e784703e9e2a8d90780a70d363024c952e5c828d..b683017f6e986b61817c905329031e933dcc5898 100644 --- a/commonmark/src/main/java/org/commonmark/package-info.java +++ b/commonmark/src/main/java/org/commonmark/package-info.java @@ -4,6 +4,7 @@ * <li>{@link org.commonmark.parser} for parsing input text to AST nodes</li> * <li>{@link org.commonmark.node} for AST node types and visitors</li> * <li>{@link org.commonmark.renderer.html} for HTML rendering</li> + * <li>{@link org.commonmark.renderer.markdown} for Markdown rendering</li> * </ul> */ package org.commonmark; diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/CoreMarkdownNodeRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/CoreMarkdownNodeRenderer.java new file mode 100644 index 0000000000000000000000000000000000000000..30d6d1a5c09d44207fc4d80146b12eb57408002b --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/CoreMarkdownNodeRenderer.java @@ -0,0 +1,532 @@ +package org.commonmark.renderer.markdown; + +import org.commonmark.internal.util.AsciiMatcher; +import org.commonmark.internal.util.CharMatcher; +import org.commonmark.internal.util.Parsing; +import org.commonmark.node.*; +import org.commonmark.renderer.NodeRenderer; + +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +/** + * The node renderer that renders all the core nodes (comes last in the order of node renderers). + * <p> + * Note that while sometimes it would be easier to record what kind of syntax was used on parsing (e.g. ATX vs Setext + * heading), this renderer is intended to also work for documents that were created by directly creating + * {@link Node Nodes} instead. So in order to support that, it sometimes needs to do a bit more work. + */ +public class CoreMarkdownNodeRenderer extends AbstractVisitor implements NodeRenderer { + + private final AsciiMatcher textEscape; + private final CharMatcher textEscapeInHeading; + private final CharMatcher linkDestinationNeedsAngleBrackets = + AsciiMatcher.builder().c(' ').c('(').c(')').c('<').c('>').c('\n').c('\\').build(); + private final CharMatcher linkDestinationEscapeInAngleBrackets = + AsciiMatcher.builder().c('<').c('>').c('\n').c('\\').build(); + private final CharMatcher linkTitleEscapeInQuotes = + AsciiMatcher.builder().c('"').c('\n').c('\\').build(); + + private final Pattern orderedListMarkerPattern = Pattern.compile("^([0-9]{1,9})([.)])"); + + protected final MarkdownNodeRendererContext context; + private final MarkdownWriter writer; + /** + * If we're currently within a {@link BulletList} or {@link OrderedList}, this keeps the context of that list. + * It has a parent field so that it can represent a stack (for nested lists). + */ + private ListHolder listHolder; + + public CoreMarkdownNodeRenderer(MarkdownNodeRendererContext context) { + this.context = context; + this.writer = context.getWriter(); + + textEscape = AsciiMatcher.builder().anyOf("[]<>`*_&\n\\").anyOf(context.getSpecialCharacters()).build(); + textEscapeInHeading = AsciiMatcher.builder(textEscape).anyOf("#").build(); + } + + @Override + public Set<Class<? extends Node>> getNodeTypes() { + return new HashSet<>(Arrays.asList( + BlockQuote.class, + BulletList.class, + Code.class, + Document.class, + Emphasis.class, + FencedCodeBlock.class, + HardLineBreak.class, + Heading.class, + HtmlBlock.class, + HtmlInline.class, + Image.class, + IndentedCodeBlock.class, + Link.class, + ListItem.class, + OrderedList.class, + Paragraph.class, + SoftLineBreak.class, + StrongEmphasis.class, + Text.class, + ThematicBreak.class + )); + } + + @Override + public void render(Node node) { + node.accept(this); + } + + @Override + public void visit(Document document) { + // No rendering itself + visitChildren(document); + writer.line(); + } + + @Override + public void visit(ThematicBreak thematicBreak) { + // Let's use ___ as it doesn't introduce ambiguity with * or - list item markers + writer.raw("___"); + writer.block(); + } + + @Override + public void visit(Heading heading) { + if (heading.getLevel() <= 2) { + LineBreakVisitor lineBreakVisitor = new LineBreakVisitor(); + heading.accept(lineBreakVisitor); + boolean isMultipleLines = lineBreakVisitor.hasLineBreak(); + + if (isMultipleLines) { + // Setext headings: Can have multiple lines, but only level 1 or 2 + visitChildren(heading); + writer.line(); + if (heading.getLevel() == 1) { + // Note that it would be nice to match the length of the contents instead of just using 3, but that's + // not easy. + writer.raw("==="); + } else { + writer.raw("---"); + } + writer.block(); + return; + } + } + + // ATX headings: Can't have multiple lines, but up to level 6. + for (int i = 0; i < heading.getLevel(); i++) { + writer.raw('#'); + } + writer.raw(' '); + visitChildren(heading); + + writer.block(); + } + + @Override + public void visit(IndentedCodeBlock indentedCodeBlock) { + String literal = indentedCodeBlock.getLiteral(); + // We need to respect line prefixes which is why we need to write it line by line (e.g. an indented code block + // within a block quote) + writer.writePrefix(" "); + writer.pushPrefix(" "); + List<String> lines = getLines(literal); + for (int i = 0; i < lines.size(); i++) { + String line = lines.get(i); + writer.raw(line); + if (i != lines.size() - 1) { + writer.line(); + } + } + writer.popPrefix(); + writer.block(); + } + + @Override + public void visit(FencedCodeBlock fencedCodeBlock) { + String literal = fencedCodeBlock.getLiteral(); + String fence = repeat(String.valueOf(fencedCodeBlock.getFenceChar()), fencedCodeBlock.getFenceLength()); + int indent = fencedCodeBlock.getFenceIndent(); + + if (indent > 0) { + String indentPrefix = repeat(" ", indent); + writer.writePrefix(indentPrefix); + writer.pushPrefix(indentPrefix); + } + + writer.raw(fence); + if (fencedCodeBlock.getInfo() != null) { + writer.raw(fencedCodeBlock.getInfo()); + } + writer.line(); + if (!literal.isEmpty()) { + List<String> lines = getLines(literal); + for (String line : lines) { + writer.raw(line); + writer.line(); + } + } + writer.raw(fence); + if (indent > 0) { + writer.popPrefix(); + } + writer.block(); + } + + @Override + public void visit(HtmlBlock htmlBlock) { + List<String> lines = getLines(htmlBlock.getLiteral()); + for (int i = 0; i < lines.size(); i++) { + String line = lines.get(i); + writer.raw(line); + if (i != lines.size() - 1) { + writer.line(); + } + } + writer.block(); + } + + @Override + public void visit(Paragraph paragraph) { + visitChildren(paragraph); + writer.block(); + } + + @Override + public void visit(BlockQuote blockQuote) { + writer.writePrefix("> "); + writer.pushPrefix("> "); + visitChildren(blockQuote); + writer.popPrefix(); + writer.block(); + } + + @Override + public void visit(BulletList bulletList) { + writer.pushTight(bulletList.isTight()); + listHolder = new BulletListHolder(listHolder, bulletList); + visitChildren(bulletList); + listHolder = listHolder.parent; + writer.popTight(); + writer.block(); + } + + @Override + public void visit(OrderedList orderedList) { + writer.pushTight(orderedList.isTight()); + listHolder = new OrderedListHolder(listHolder, orderedList); + visitChildren(orderedList); + listHolder = listHolder.parent; + writer.popTight(); + writer.block(); + } + + @Override + public void visit(ListItem listItem) { + int contentIndent = listItem.getContentIndent(); + boolean pushedPrefix = false; + if (listHolder instanceof BulletListHolder) { + BulletListHolder bulletListHolder = (BulletListHolder) listHolder; + String marker = repeat(" ", listItem.getMarkerIndent()) + bulletListHolder.bulletMarker; + writer.writePrefix(marker); + writer.writePrefix(repeat(" ", contentIndent - marker.length())); + writer.pushPrefix(repeat(" ", contentIndent)); + pushedPrefix = true; + } else if (listHolder instanceof OrderedListHolder) { + OrderedListHolder orderedListHolder = (OrderedListHolder) listHolder; + String marker = repeat(" ", listItem.getMarkerIndent()) + orderedListHolder.number + orderedListHolder.delimiter; + orderedListHolder.number++; + writer.writePrefix(marker); + writer.writePrefix(repeat(" ", contentIndent - marker.length())); + writer.pushPrefix(repeat(" ", contentIndent)); + pushedPrefix = true; + } + if (listItem.getFirstChild() == null) { + // Empty list item + writer.block(); + } else { + visitChildren(listItem); + } + if (pushedPrefix) { + writer.popPrefix(); + } + } + + @Override + public void visit(Code code) { + String literal = code.getLiteral(); + // If the literal includes backticks, we can surround them by using one more backtick. + int backticks = findMaxRunLength('`', literal); + for (int i = 0; i < backticks + 1; i++) { + writer.raw('`'); + } + // If the literal starts or ends with a backtick, surround it with a single space. + // If it starts and ends with a space (but is not only spaces), add an additional space (otherwise they would + // get removed on parsing). + boolean addSpace = literal.startsWith("`") || literal.endsWith("`") || + (literal.startsWith(" ") && literal.endsWith(" ") && Parsing.hasNonSpace(literal)); + if (addSpace) { + writer.raw(' '); + } + writer.raw(literal); + if (addSpace) { + writer.raw(' '); + } + for (int i = 0; i < backticks + 1; i++) { + writer.raw('`'); + } + } + + @Override + public void visit(Emphasis emphasis) { + String delimiter = emphasis.getOpeningDelimiter(); + // Use delimiter that was parsed if available + if (delimiter == null) { + // When emphasis is nested, a different delimiter needs to be used + delimiter = writer.getLastChar() == '*' ? "_" : "*"; + } + writer.raw(delimiter); + super.visit(emphasis); + writer.raw(delimiter); + } + + @Override + public void visit(StrongEmphasis strongEmphasis) { + writer.raw("**"); + super.visit(strongEmphasis); + writer.raw("**"); + } + + @Override + public void visit(Link link) { + writeLinkLike(link.getTitle(), link.getDestination(), link, "["); + } + + @Override + public void visit(Image image) { + writeLinkLike(image.getTitle(), image.getDestination(), image, "!["); + } + + @Override + public void visit(HtmlInline htmlInline) { + writer.raw(htmlInline.getLiteral()); + } + + @Override + public void visit(HardLineBreak hardLineBreak) { + writer.raw(" "); + writer.line(); + } + + @Override + public void visit(SoftLineBreak softLineBreak) { + writer.line(); + } + + @Override + public void visit(Text text) { + // Text is tricky. In Markdown special characters (`-`, `#` etc.) can be escaped (`\-`, `\#` etc.) so that + // they're parsed as plain text. Currently, whether a character was escaped or not is not recorded in the Node, + // so here we don't know. If we just wrote out those characters unescaped, the resulting Markdown would change + // meaning (turn into a list item, heading, etc.). + // You might say "Why not store that in the Node when parsing", but that wouldn't work for the use case where + // nodes are constructed directly instead of via parsing. This renderer needs to work for that too. + // So currently, when in doubt, we escape. For special characters only occurring at the beginning of a line, + // we only escape them then (we wouldn't want to escape every `.` for example). + String literal = text.getLiteral(); + if (writer.isAtLineStart() && !literal.isEmpty()) { + char c = literal.charAt(0); + switch (c) { + case '-': { + // Would be ambiguous with a bullet list marker, escape + writer.raw("\\-"); + literal = literal.substring(1); + break; + } + case '#': { + // Would be ambiguous with an ATX heading, escape + writer.raw("\\#"); + literal = literal.substring(1); + break; + } + case '=': { + // Would be ambiguous with a Setext heading, escape + writer.raw("\\="); + literal = literal.substring(1); + break; + } + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': { + // Check for ordered list marker + Matcher m = orderedListMarkerPattern.matcher(literal); + if (m.find()) { + writer.raw(m.group(1)); + writer.raw("\\" + m.group(2)); + literal = literal.substring(m.end()); + } + break; + } + case '\t': { + writer.raw("	"); + literal = literal.substring(1); + break; + } + case ' ': { + writer.raw(" "); + literal = literal.substring(1); + break; + } + } + } + + CharMatcher escape = text.getParent() instanceof Heading ? textEscapeInHeading : textEscape; + + if (literal.endsWith("!") && text.getNext() instanceof Link) { + // If we wrote the `!` unescaped, it would turn the link into an image instead. + writer.text(literal.substring(0, literal.length() - 1), escape); + writer.raw("\\!"); + } else { + writer.text(literal, escape); + } + } + + @Override + protected void visitChildren(Node parent) { + Node node = parent.getFirstChild(); + while (node != null) { + Node next = node.getNext(); + context.render(node); + node = next; + } + } + + private static int findMaxRunLength(char c, CharSequence s) { + int backticks = 0; + int start = 0; + while (start < s.length()) { + int index = Parsing.find(c, s, start); + if (index != -1) { + start = Parsing.skip(c, s, index + 1, s.length()); + backticks = Math.max(backticks, start - index); + } else { + break; + } + } + return backticks; + } + + private static boolean contains(String s, CharMatcher charMatcher) { + for (int i = 0; i < s.length(); i++) { + if (charMatcher.matches(s.charAt(i))) { + return true; + } + } + return false; + } + + private static String repeat(String s, int count) { + StringBuilder sb = new StringBuilder(s.length() * count); + for (int i = 0; i < count; i++) { + sb.append(s); + } + return sb.toString(); + } + + private static List<String> getLines(String literal) { + // Without -1, split would discard all trailing empty strings, which is not what we want, e.g. it would + // return the same result for "abc", "abc\n" and "abc\n\n". + // With -1, it returns ["abc"], ["abc", ""] and ["abc", "", ""]. + String[] parts = literal.split("\n", -1); + if (parts[parts.length - 1].isEmpty()) { + // But we don't want the last empty string, as "\n" is used as a line terminator (not a separator), + // so return without the last element. + return Arrays.asList(parts).subList(0, parts.length - 1); + } else { + return Arrays.asList(parts); + } + } + + private void writeLinkLike(String title, String destination, Node node, String opener) { + writer.raw(opener); + visitChildren(node); + writer.raw(']'); + writer.raw('('); + if (contains(destination, linkDestinationNeedsAngleBrackets)) { + writer.raw('<'); + writer.text(destination, linkDestinationEscapeInAngleBrackets); + writer.raw('>'); + } else { + writer.raw(destination); + } + if (title != null) { + writer.raw(' '); + writer.raw('"'); + writer.text(title, linkTitleEscapeInQuotes); + writer.raw('"'); + } + writer.raw(')'); + } + + private static class ListHolder { + final ListHolder parent; + + protected ListHolder(ListHolder parent) { + this.parent = parent; + } + } + + private static class BulletListHolder extends ListHolder { + final char bulletMarker; + + public BulletListHolder(ListHolder parent, BulletList bulletList) { + super(parent); + this.bulletMarker = bulletList.getBulletMarker(); + } + } + + private static class OrderedListHolder extends ListHolder { + final char delimiter; + private int number; + + protected OrderedListHolder(ListHolder parent, OrderedList orderedList) { + super(parent); + delimiter = orderedList.getDelimiter(); + number = orderedList.getStartNumber(); + } + } + + /** + * Visits nodes to check if there are any soft or hard line breaks. + */ + private static class LineBreakVisitor extends AbstractVisitor { + private boolean lineBreak = false; + + public boolean hasLineBreak() { + return lineBreak; + } + + @Override + public void visit(SoftLineBreak softLineBreak) { + super.visit(softLineBreak); + lineBreak = true; + } + + @Override + public void visit(HardLineBreak hardLineBreak) { + super.visit(hardLineBreak); + lineBreak = true; + } + } +} diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownNodeRendererContext.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownNodeRendererContext.java new file mode 100644 index 0000000000000000000000000000000000000000..40640d1b44024c69a465fafcc5f8c119295d87ec --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownNodeRendererContext.java @@ -0,0 +1,30 @@ +package org.commonmark.renderer.markdown; + +import org.commonmark.node.Node; + +import java.util.Set; + +/** + * Context that is passed to custom node renderers, see {@link MarkdownNodeRendererFactory#create}. + */ +public interface MarkdownNodeRendererContext { + + /** + * @return the writer to use + */ + MarkdownWriter getWriter(); + + /** + * Render the specified node and its children using the configured renderers. This should be used to render child + * nodes; be careful not to pass the node that is being rendered, that would result in an endless loop. + * + * @param node the node to render + */ + void render(Node node); + + /** + * @return additional special characters that need to be escaped if they occur in normal text; currently only ASCII + * characters are allowed + */ + Set<Character> getSpecialCharacters(); +} diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownNodeRendererFactory.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownNodeRendererFactory.java new file mode 100644 index 0000000000000000000000000000000000000000..14221ea568faa37fe932b45c18821caf0989e445 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownNodeRendererFactory.java @@ -0,0 +1,25 @@ +package org.commonmark.renderer.markdown; + +import org.commonmark.renderer.NodeRenderer; + +import java.util.Set; + +/** + * Factory for instantiating new node renderers for rendering custom nodes. + */ +public interface MarkdownNodeRendererFactory { + + /** + * Create a new node renderer for the specified rendering context. + * + * @param context the context for rendering (normally passed on to the node renderer) + * @return a node renderer + */ + NodeRenderer create(MarkdownNodeRendererContext context); + + /** + * @return the additional special characters that this factory would like to have escaped in normal text; currently + * only ASCII characters are allowed + */ + Set<Character> getSpecialCharacters(); +} diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownRenderer.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownRenderer.java new file mode 100644 index 0000000000000000000000000000000000000000..25fa9a142c6b62e9c56ba80f8f6409cb854d929d --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownRenderer.java @@ -0,0 +1,162 @@ +package org.commonmark.renderer.markdown; + +import org.commonmark.Extension; +import org.commonmark.internal.renderer.NodeRendererMap; +import org.commonmark.node.Node; +import org.commonmark.renderer.NodeRenderer; +import org.commonmark.renderer.Renderer; + +import java.util.*; + +/** + * Renders nodes to Markdown (CommonMark syntax); use {@link #builder()} to create a renderer. + * <p> + * Note that it doesn't currently preserve the exact syntax of the original input Markdown (if any): + * <ul> + * <li>Headings are output as ATX headings if possible (multi-line headings need Setext headings)</li> + * <li>Escaping might be over-eager, e.g. a plain {@code *} might be escaped + * even though it doesn't need to be in that particular context</li> + * <li>Leading whitespace in paragraphs is not preserved</li> + * </ul> + * However, it should produce Markdown that is semantically equivalent to the input, i.e. if the Markdown was parsed + * again and compared against the original AST, it should be the same (minus bugs). + */ +public class MarkdownRenderer implements Renderer { + + private final List<MarkdownNodeRendererFactory> nodeRendererFactories; + + private MarkdownRenderer(Builder builder) { + this.nodeRendererFactories = new ArrayList<>(builder.nodeRendererFactories.size() + 1); + this.nodeRendererFactories.addAll(builder.nodeRendererFactories); + // Add as last. This means clients can override the rendering of core nodes if they want. + this.nodeRendererFactories.add(new MarkdownNodeRendererFactory() { + @Override + public NodeRenderer create(MarkdownNodeRendererContext context) { + return new CoreMarkdownNodeRenderer(context); + } + + @Override + public Set<Character> getSpecialCharacters() { + return Collections.emptySet(); + } + }); + } + + /** + * Create a new builder for configuring a {@link MarkdownRenderer}. + * + * @return a builder + */ + public static Builder builder() { + return new Builder(); + } + + @Override + public void render(Node node, Appendable output) { + RendererContext context = new RendererContext(new MarkdownWriter(output)); + context.render(node); + } + + @Override + public String render(Node node) { + StringBuilder sb = new StringBuilder(); + render(node, sb); + return sb.toString(); + } + + /** + * Builder for configuring a {@link MarkdownRenderer}. See methods for default configuration. + */ + public static class Builder { + + private final List<MarkdownNodeRendererFactory> nodeRendererFactories = new ArrayList<>(); + + /** + * @return the configured {@link MarkdownRenderer} + */ + public MarkdownRenderer build() { + return new MarkdownRenderer(this); + } + + /** + * Add a factory for instantiating a node renderer (done when rendering). This allows to override the rendering + * of node types or define rendering for custom node types. + * <p> + * If multiple node renderers for the same node type are created, the one from the factory that was added first + * "wins". (This is how the rendering for core node types can be overridden; the default rendering comes last.) + * + * @param nodeRendererFactory the factory for creating a node renderer + * @return {@code this} + */ + public Builder nodeRendererFactory(MarkdownNodeRendererFactory nodeRendererFactory) { + this.nodeRendererFactories.add(nodeRendererFactory); + return this; + } + + /** + * @param extensions extensions to use on this renderer + * @return {@code this} + */ + public Builder extensions(Iterable<? extends Extension> extensions) { + for (Extension extension : extensions) { + if (extension instanceof MarkdownRendererExtension) { + MarkdownRendererExtension markdownRendererExtension = (MarkdownRendererExtension) extension; + markdownRendererExtension.extend(this); + } + } + return this; + } + } + + /** + * Extension for {@link MarkdownRenderer} for rendering custom nodes. + */ + public interface MarkdownRendererExtension extends Extension { + + /** + * Extend Markdown rendering, usually by registering custom node renderers using {@link Builder#nodeRendererFactory}. + * + * @param rendererBuilder the renderer builder to extend + */ + void extend(Builder rendererBuilder); + } + + private class RendererContext implements MarkdownNodeRendererContext { + private final MarkdownWriter writer; + private final NodeRendererMap nodeRendererMap = new NodeRendererMap(); + private final Set<Character> additionalTextEscapes; + + private RendererContext(MarkdownWriter writer) { + // Set fields that are used by interface + this.writer = writer; + Set<Character> escapes = new HashSet<Character>(); + for (MarkdownNodeRendererFactory factory : nodeRendererFactories) { + escapes.addAll(factory.getSpecialCharacters()); + } + additionalTextEscapes = Collections.unmodifiableSet(escapes); + + // The first node renderer for a node type "wins". + for (int i = nodeRendererFactories.size() - 1; i >= 0; i--) { + MarkdownNodeRendererFactory nodeRendererFactory = nodeRendererFactories.get(i); + // Pass in this as context here, which uses the fields set above + NodeRenderer nodeRenderer = nodeRendererFactory.create(this); + nodeRendererMap.add(nodeRenderer); + } + } + + @Override + public MarkdownWriter getWriter() { + return writer; + } + + @Override + public void render(Node node) { + nodeRendererMap.render(node); + } + + @Override + public Set<Character> getSpecialCharacters() { + return additionalTextEscapes; + } + } +} diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownWriter.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownWriter.java new file mode 100644 index 0000000000000000000000000000000000000000..1231a4a7335b851f67d120a1a639f6c6d35fc6ba --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/MarkdownWriter.java @@ -0,0 +1,246 @@ +package org.commonmark.renderer.markdown; + +import org.commonmark.internal.util.CharMatcher; + +import java.io.IOException; +import java.util.LinkedList; + +/** + * Writer for Markdown (CommonMark) text. + */ +public class MarkdownWriter { + + private final Appendable buffer; + + private int blockSeparator = 0; + private char lastChar; + private boolean atLineStart = true; + + // Stacks of settings that affect various rendering behaviors. The common pattern here is that callers use "push" to + // change a setting, render some nodes, and then "pop" the setting off the stack again to restore previous state. + private final LinkedList<String> prefixes = new LinkedList<>(); + private final LinkedList<Boolean> tight = new LinkedList<>(); + private final LinkedList<CharMatcher> rawEscapes = new LinkedList<>(); + + public MarkdownWriter(Appendable out) { + buffer = out; + } + + /** + * Write the supplied string (raw/unescaped except if {@link #pushRawEscape} was used). + */ + public void raw(String s) { + flushBlockSeparator(); + write(s, null); + } + + /** + * Write the supplied character (raw/unescaped except if {@link #pushRawEscape} was used). + */ + public void raw(char c) { + flushBlockSeparator(); + write(c); + } + + /** + * Write the supplied string with escaping. + * + * @param s the string to write + * @param escape which characters to escape + */ + public void text(String s, CharMatcher escape) { + if (s.isEmpty()) { + return; + } + flushBlockSeparator(); + write(s, escape); + + lastChar = s.charAt(s.length() - 1); + atLineStart = false; + } + + /** + * Write a newline (line terminator). + */ + public void line() { + write('\n'); + writePrefixes(); + atLineStart = true; + } + + /** + * Enqueue a block separator to be written before the next text is written. Block separators are not written + * straight away because if there are no more blocks to write we don't want a separator (at the end of the document). + */ + public void block() { + // Remember whether this should be a tight or loose separator now because tight could get changed in between + // this and the next flush. + blockSeparator = isTight() ? 1 : 2; + atLineStart = true; + } + + /** + * Push a prefix onto the top of the stack. All prefixes are written at the beginning of each line, until the + * prefix is popped again. + * + * @param prefix the raw prefix string + */ + public void pushPrefix(String prefix) { + prefixes.addLast(prefix); + } + + /** + * Write a prefix. + * + * @param prefix the raw prefix string to write + */ + public void writePrefix(String prefix) { + boolean tmp = atLineStart; + raw(prefix); + atLineStart = tmp; + } + + /** + * Remove the last prefix from the top of the stack. + */ + public void popPrefix() { + prefixes.removeLast(); + } + + /** + * Change whether blocks are tight or loose. Loose is the default where blocks are separated by a blank line. Tight + * is where blocks are not separated by a blank line. Tight blocks are used in lists, if there are no blank lines + * within the list. + * <p> + * Note that changing this does not affect block separators that have already been enqueued with {@link #block()}, + * only future ones. + */ + public void pushTight(boolean tight) { + this.tight.addLast(tight); + } + + /** + * Remove the last "tight" setting from the top of the stack. + */ + public void popTight() { + this.tight.removeLast(); + } + + /** + * Escape the characters matching the supplied matcher, in all text (text and raw). This might be useful to + * extensions that add another layer of syntax, e.g. the tables extension that uses `|` to separate cells and needs + * all `|` characters to be escaped (even in code spans). + * + * @param rawEscape the characters to escape in raw text + */ + public void pushRawEscape(CharMatcher rawEscape) { + rawEscapes.add(rawEscape); + } + + /** + * Remove the last raw escape from the top of the stack. + */ + public void popRawEscape() { + rawEscapes.removeLast(); + } + + /** + * @return the last character that was written + */ + public char getLastChar() { + return lastChar; + } + + /** + * @return whether we're at the line start (not counting any prefixes), i.e. after a {@link #line} or {@link #block}. + */ + public boolean isAtLineStart() { + return atLineStart; + } + + private void write(String s, CharMatcher escape) { + try { + if (rawEscapes.isEmpty() && escape == null) { + // Normal fast path + buffer.append(s); + } else { + for (int i = 0; i < s.length(); i++) { + append(s.charAt(i), escape); + } + } + } catch (IOException e) { + throw new RuntimeException(e); + } + + int length = s.length(); + if (length != 0) { + lastChar = s.charAt(length - 1); + } + atLineStart = false; + } + + private void write(char c) { + try { + append(c, null); + } catch (IOException e) { + throw new RuntimeException(e); + } + + lastChar = c; + atLineStart = false; + } + + private void writePrefixes() { + if (!prefixes.isEmpty()) { + for (String prefix : prefixes) { + write(prefix, null); + } + } + } + + /** + * If a block separator has been enqueued with {@link #block()} but not yet written, write it now. + */ + private void flushBlockSeparator() { + if (blockSeparator != 0) { + write('\n'); + writePrefixes(); + if (blockSeparator > 1) { + write('\n'); + writePrefixes(); + } + blockSeparator = 0; + } + } + + private void append(char c, CharMatcher escape) throws IOException { + if (needsEscaping(c, escape)) { + if (c == '\n') { + // Can't escape this with \, use numeric character reference + buffer.append(" "); + } else { + buffer.append('\\'); + buffer.append(c); + } + } else { + buffer.append(c); + } + } + + private boolean isTight() { + return !tight.isEmpty() && tight.getLast(); + } + + private boolean needsEscaping(char c, CharMatcher escape) { + return (escape != null && escape.matches(c)) || rawNeedsEscaping(c); + } + + private boolean rawNeedsEscaping(char c) { + for (CharMatcher rawEscape : rawEscapes) { + if (rawEscape.matches(c)) { + return true; + } + } + return false; + } +} diff --git a/commonmark/src/main/java/org/commonmark/renderer/markdown/package-info.java b/commonmark/src/main/java/org/commonmark/renderer/markdown/package-info.java new file mode 100644 index 0000000000000000000000000000000000000000..f707671d502a69157db0ff7f97d7b2ded35fd699 --- /dev/null +++ b/commonmark/src/main/java/org/commonmark/renderer/markdown/package-info.java @@ -0,0 +1,4 @@ +/** + * Markdown rendering (see {@link org.commonmark.renderer.markdown.MarkdownRenderer}) + */ +package org.commonmark.renderer.markdown; diff --git a/commonmark/src/main/java/org/commonmark/renderer/text/package-info.java b/commonmark/src/main/java/org/commonmark/renderer/text/package-info.java index 07a5580918e441e5115a98437d1e46d73624cdc9..8309f4bd618ff48eee5e8db47d095a8232627307 100644 --- a/commonmark/src/main/java/org/commonmark/renderer/text/package-info.java +++ b/commonmark/src/main/java/org/commonmark/renderer/text/package-info.java @@ -1,4 +1,4 @@ /** - * Text content rendering (see {@link org.commonmark.renderer.text.TextContentRenderer}) + * Plain text rendering with minimal markup (see {@link org.commonmark.renderer.text.TextContentRenderer}) */ package org.commonmark.renderer.text; diff --git a/commonmark/src/test/java/org/commonmark/renderer/markdown/MarkdownRendererTest.java b/commonmark/src/test/java/org/commonmark/renderer/markdown/MarkdownRendererTest.java new file mode 100644 index 0000000000000000000000000000000000000000..9bab92bcc613271cefed79fec9879fa351317b56 --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/renderer/markdown/MarkdownRendererTest.java @@ -0,0 +1,260 @@ +package org.commonmark.renderer.markdown; + +import org.commonmark.node.*; +import org.commonmark.parser.Parser; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; + +public class MarkdownRendererTest { + + // Leaf blocks + + @Test + public void testThematicBreaks() { + assertRoundTrip("___\n"); + assertRoundTrip("___\n\nfoo\n"); + // List item with hr -> hr needs to not use the same as the marker + assertRoundTrip("* ___\n"); + assertRoundTrip("- ___\n"); + } + + @Test + public void testHeadings() { + // Type of heading is currently not preserved + assertRoundTrip("# foo\n"); + assertRoundTrip("## foo\n"); + assertRoundTrip("### foo\n"); + assertRoundTrip("#### foo\n"); + assertRoundTrip("##### foo\n"); + assertRoundTrip("###### foo\n"); + + assertRoundTrip("Foo\nbar\n===\n"); + assertRoundTrip("Foo \nbar\n===\n"); + assertRoundTrip("[foo\nbar](/url)\n===\n"); + + assertRoundTrip("# foo\n\nbar\n"); + } + + @Test + public void testIndentedCodeBlocks() { + assertRoundTrip(" hi\n"); + assertRoundTrip(" hi\n code\n"); + assertRoundTrip("> hi\n> code\n"); + } + + @Test + public void testFencedCodeBlocks() { + assertRoundTrip("```\ntest\n```\n"); + assertRoundTrip("~~~~\ntest\n~~~~\n"); + assertRoundTrip("```info\ntest\n```\n"); + assertRoundTrip(" ```\n test\n ```\n"); + assertRoundTrip("```\n```\n"); + } + + @Test + public void testHtmlBlocks() { + assertRoundTrip("<div>test</div>\n"); + assertRoundTrip("> <div>\n> test\n> </div>\n"); + } + + @Test + public void testParagraphs() { + assertRoundTrip("foo\n"); + assertRoundTrip("foo\n\nbar\n"); + } + + // Container blocks + + @Test + public void testBlockQuotes() { + assertRoundTrip("> test\n"); + assertRoundTrip("> foo\n> bar\n"); + assertRoundTrip("> > foo\n> > bar\n"); + assertRoundTrip("> # Foo\n> \n> bar\n> baz\n"); + } + + @Test + public void testBulletListItems() { + assertRoundTrip("* foo\n"); + assertRoundTrip("- foo\n"); + assertRoundTrip("+ foo\n"); + assertRoundTrip("* foo\n bar\n"); + assertRoundTrip("* ```\n code\n ```\n"); + assertRoundTrip("* foo\n\n* bar\n"); + // Note that the " " in the second line is not necessary, but it's not wrong either. + // We could try to avoid it in a future change, but not sure if necessary. + assertRoundTrip("* foo\n \n bar\n"); + + // Tight list + assertRoundTrip("* foo\n* bar\n"); + // Tight list where the second item contains a loose list + assertRoundTrip("- Foo\n - Bar\n \n - Baz\n"); + + // List item indent. This is a tricky one, but here the amount of space between the list marker and "one" + // determines whether "two" is part of the list item or an indented code block. + // In this case, it's an indented code block because it's not indented enough to be part of the list item. + // If the renderer would just use "- one", then "two" would change from being an indented code block to being + // a paragraph in the list item! So it is important for the renderer to preserve the content indent of the list + // item. + assertRoundTrip(" - one\n\n two\n"); + + // Empty list + assertRoundTrip("- \n\nFoo\n"); + } + + @Test + public void testOrderedListItems() { + assertRoundTrip("1. foo\n"); + assertRoundTrip("2. foo\n\n3. bar\n"); + + // Tight list + assertRoundTrip("1. foo\n2. bar\n"); + // Tight list where the second item contains a loose list + assertRoundTrip("1. Foo\n 1. Bar\n \n 2. Baz\n"); + + assertRoundTrip(" 1. one\n\n two\n"); + } + + // Inlines + + @Test + public void testTabs() { + assertRoundTrip("a\tb\n"); + } + + @Test + public void testEscaping() { + // These are a bit tricky. We always escape some characters, even though they only need escaping if they would + // otherwise result in a different parse result (e.g. a link): + assertRoundTrip("\\[a\\](/uri)\n"); + assertRoundTrip("\\`abc\\`\n"); + + // Some characters only need to be escaped at the beginning of the line + assertRoundTrip("\\- Test\n"); + assertRoundTrip("\\-\n"); + assertRoundTrip("Test -\n"); + assertRoundTrip("Abc\n\n\\- Test\n"); + assertRoundTrip("\\# Test\n"); + assertRoundTrip("\\## Test\n"); + assertRoundTrip("\\#\n"); + assertRoundTrip("Foo\n\\===\n"); + // The beginning of the line within the block, so disregarding prefixes + assertRoundTrip("> \\- Test\n"); + assertRoundTrip("- \\- Test\n"); + // That's not the beginning of the line + assertRoundTrip("`a`- foo\n"); + + // This is a bit more tricky as we need to check for a list start + assertRoundTrip("1\\. Foo\n"); + assertRoundTrip("999\\. Foo\n"); + assertRoundTrip("1\\.\n"); + assertRoundTrip("1\\) Foo\n"); + + // Escaped whitespace, wow + assertRoundTrip("	foo\n"); + assertRoundTrip("  foo\n"); + assertRoundTrip("foo bar\n"); + } + + @Test + public void testCodeSpans() { + assertRoundTrip("`foo`\n"); + assertRoundTrip("``foo ` bar``\n"); + assertRoundTrip("```foo `` ` bar```\n"); + + assertRoundTrip("`` `foo ``\n"); + assertRoundTrip("`` ` ``\n"); + assertRoundTrip("` `\n"); + } + + @Test + public void testEmphasis() { + assertRoundTrip("*foo*\n"); + assertRoundTrip("foo*bar*\n"); + // When nesting, a different delimiter needs to be used + assertRoundTrip("*_foo_*\n"); + assertRoundTrip("*_*foo*_*\n"); + assertRoundTrip("_*foo*_\n"); + + // Not emphasis (needs * inside words) + assertRoundTrip("foo\\_bar\\_\n"); + + // Even when rendering a manually constructed tree, the emphasis delimiter needs to be chosen correctly. + Document doc = new Document(); + Paragraph p = new Paragraph(); + doc.appendChild(p); + Emphasis e1 = new Emphasis(); + p.appendChild(e1); + Emphasis e2 = new Emphasis(); + e1.appendChild(e2); + e2.appendChild(new Text("hi")); + assertEquals("*_hi_*\n", render(doc)); + } + + @Test + public void testStrongEmphasis() { + assertRoundTrip("**foo**\n"); + assertRoundTrip("foo**bar**\n"); + } + + @Test + public void testLinks() { + assertRoundTrip("[link](/uri)\n"); + assertRoundTrip("[link](/uri \"title\")\n"); + assertRoundTrip("[link](</my uri>)\n"); + assertRoundTrip("[a](<b)c>)\n"); + assertRoundTrip("[a](<b(c>)\n"); + assertRoundTrip("[a](<b\\>c>)\n"); + assertRoundTrip("[a](<b\\\\\\>c>)\n"); + assertRoundTrip("[a](/uri \"foo \\\" bar\")\n"); + assertRoundTrip("[link](/uri \"tes\\\\\")\n"); + assertRoundTrip("[link](/url \"test \")\n"); + assertRoundTrip("[link](</url >)\n"); + } + + @Test + public void testImages() { + assertRoundTrip("\n"); + assertRoundTrip("\n"); + assertRoundTrip("\n"); + assertRoundTrip("c>)\n"); + assertRoundTrip("\n"); + assertRoundTrip("\n"); + assertRoundTrip("\n"); + assertRoundTrip("\n"); + } + + @Test + public void testHtmlInline() { + assertRoundTrip("<del>*foo*</del>\n"); + } + + @Test + public void testHardLineBreaks() { + assertRoundTrip("foo \nbar\n"); + } + + @Test + public void testSoftLineBreaks() { + assertRoundTrip("foo\nbar\n"); + } + + private void assertRoundTrip(String input) { + String rendered = parseAndRender(input); + assertEquals(input, rendered); + } + + private String parseAndRender(String source) { + Node parsed = parse(source); + return render(parsed); + } + + private Node parse(String source) { + return Parser.builder().build().parse(source); + } + + private String render(Node node) { + return MarkdownRenderer.builder().build().render(node); + } +} diff --git a/commonmark/src/test/java/org/commonmark/renderer/markdown/SpecMarkdownRendererTest.java b/commonmark/src/test/java/org/commonmark/renderer/markdown/SpecMarkdownRendererTest.java new file mode 100644 index 0000000000000000000000000000000000000000..5df2e5c804e8ed84b1d46412142350932d11eb4d --- /dev/null +++ b/commonmark/src/test/java/org/commonmark/renderer/markdown/SpecMarkdownRendererTest.java @@ -0,0 +1,97 @@ +package org.commonmark.renderer.markdown; + +import org.commonmark.node.Node; +import org.commonmark.parser.Parser; +import org.commonmark.renderer.html.HtmlRenderer; +import org.commonmark.testutil.TestResources; +import org.commonmark.testutil.example.Example; +import org.commonmark.testutil.example.ExampleReader; +import org.junit.Test; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertTrue; + +/** + * Tests Markdown rendering using the examples in the spec like this: + * <ol> + * <li>Parses the source to an AST and then renders it back to Markdown</li> + * <li>Parses that to an AST and then renders it to HTML</li> + * <li>Compares that HTML to the expected HTML of the example: + * If it's the same, then the expected elements were preserved in the Markdown rendering</li> + * </ol> + */ +public class SpecMarkdownRendererTest { + + public static final MarkdownRenderer MARKDOWN_RENDERER = MarkdownRenderer.builder().build(); + // The spec says URL-escaping is optional, but the examples assume that it's enabled. + public static final HtmlRenderer HTML_RENDERER = HtmlRenderer.builder().percentEncodeUrls(true).build(); + + @Test + public void testCoverage() { + List<Example> examples = ExampleReader.readExamples(TestResources.getSpec()); + List<Example> passes = new ArrayList<>(); + List<Example> fails = new ArrayList<>(); + for (Example example : examples) { + String markdown = renderMarkdown(example.getSource()); + String rendered = renderHtml(markdown); + if (rendered.equals(example.getHtml())) { + passes.add(example); + } else { + fails.add(example); + } + } + + System.out.println("Passed examples by section (total " + passes.size() + "):"); + printCountsBySection(passes); + System.out.println(); + + System.out.println("Failed examples by section (total " + fails.size() + "):"); + printCountsBySection(fails); + System.out.println(); + + System.out.println("Failed examples:"); + for (Example fail : fails) { + System.out.println("Failed: " + fail); + System.out.println("````````````````````````````````"); + System.out.print(fail.getSource()); + System.out.println("````````````````````````````````"); + System.out.println(); + } + + int expectedPassed = 652; + assertTrue("Expected at least " + expectedPassed + " examples to pass but was " + passes.size(), passes.size() >= expectedPassed); + assertEquals(0, fails.size()); + } + + private static void printCountsBySection(List<Example> examples) { + Map<String, Integer> bySection = new LinkedHashMap<>(); + for (Example example : examples) { + Integer count = bySection.get(example.getSection()); + if (count == null) { + count = 0; + } + bySection.put(example.getSection(), count + 1); + } + for (Map.Entry<String, Integer> entry : bySection.entrySet()) { + System.out.println(entry.getValue() + ": " + entry.getKey()); + } + } + + private Node parse(String source) { + return Parser.builder().build().parse(source); + } + + private String renderMarkdown(String source) { + return MARKDOWN_RENDERER.render(parse(source)); + } + + private String renderHtml(String source) { + // The spec uses "rightwards arrow" to show tabs + return HTML_RENDERER.render(parse(source)).replace("\t", "\u2192"); + } +} diff --git a/commonmark/src/test/java/org/commonmark/test/UsageExampleTest.java b/commonmark/src/test/java/org/commonmark/test/UsageExampleTest.java index 9ff646630645d4566107108f9f936bce2447eaf4..08235965a4c86345cb27641ac48d6d34d6469cb3 100644 --- a/commonmark/src/test/java/org/commonmark/test/UsageExampleTest.java +++ b/commonmark/src/test/java/org/commonmark/test/UsageExampleTest.java @@ -4,6 +4,7 @@ import org.commonmark.node.*; import org.commonmark.parser.Parser; import org.commonmark.renderer.NodeRenderer; import org.commonmark.renderer.html.*; +import org.commonmark.renderer.markdown.MarkdownRenderer; import org.junit.Ignore; import org.junit.Test; @@ -22,9 +23,21 @@ public class UsageExampleTest { @Test public void parseAndRender() { Parser parser = Parser.builder().build(); - Node document = parser.parse("This is *Sparta*"); + Node document = parser.parse("This is *Markdown*"); HtmlRenderer renderer = HtmlRenderer.builder().escapeHtml(true).build(); - assertEquals("<p>This is <em>Sparta</em></p>\n", renderer.render(document)); + assertEquals("<p>This is <em>Markdown</em></p>\n", renderer.render(document)); + } + + @Test + public void renderToMarkdown() { + MarkdownRenderer renderer = MarkdownRenderer.builder().build(); + Node document = new Document(); + Heading heading = new Heading(); + heading.setLevel(2); + heading.appendChild(new Text("My title")); + document.appendChild(heading); + + assertEquals("## My title\n", renderer.render(document)); } @Test