From e647089554ad6fdd5673a02dbcb2342d76190d85 Mon Sep 17 00:00:00 2001 From: Emiya Date: Fri, 29 Jul 2016 00:26:13 +0300 Subject: [PATCH 1/4] Add support for github style tables --- CommonMark.Tests/CommonMark.Tests.csproj | 1 + CommonMark.Tests/TableTests.cs | 336 +++++++++++++++++ CommonMark/CommonMark.Base.csproj | 1 + CommonMark/CommonMarkAdditionalFeatures.cs | 14 + CommonMark/CommonMarkConverter.cs | 4 +- CommonMark/Formatters/HtmlFormatterSlim.cs | 99 +++++ CommonMark/Parser/BlockMethods.cs | 415 +++++++++++++++++++-- CommonMark/Parser/TabTextReader.cs | 1 + CommonMark/Syntax/Block.cs | 25 +- CommonMark/Syntax/BlockTag.cs | 17 +- CommonMark/Syntax/TableHeaderAlignment.cs | 28 ++ 11 files changed, 884 insertions(+), 57 deletions(-) create mode 100644 CommonMark.Tests/TableTests.cs create mode 100644 CommonMark/Syntax/TableHeaderAlignment.cs diff --git a/CommonMark.Tests/CommonMark.Tests.csproj b/CommonMark.Tests/CommonMark.Tests.csproj index 4d0bd9d..d867031 100644 --- a/CommonMark.Tests/CommonMark.Tests.csproj +++ b/CommonMark.Tests/CommonMark.Tests.csproj @@ -65,6 +65,7 @@ + diff --git a/CommonMark.Tests/TableTests.cs b/CommonMark.Tests/TableTests.cs new file mode 100644 index 0000000..2009145 --- /dev/null +++ b/CommonMark.Tests/TableTests.cs @@ -0,0 +1,336 @@ +using CommonMark.Syntax; +using Microsoft.VisualStudio.TestTools.UnitTesting; +using System.IO; + +namespace CommonMark.Tests +{ + [TestClass] + public class TableTests + { + static CommonMarkSettings ReadSettings; + static CommonMarkSettings WriteSettings; + + static TableTests() + { + ReadSettings = CommonMarkSettings.Default.Clone(); + ReadSettings.AdditionalFeatures = CommonMarkAdditionalFeatures.GithubStyleTables; + ReadSettings.TrackSourcePosition = true; + + WriteSettings = CommonMarkSettings.Default.Clone(); + WriteSettings.AdditionalFeatures = CommonMarkAdditionalFeatures.GithubStyleTables; + } + + [TestMethod] + public void SimpleTable() + { + var markdown = "First Header | Second Header\n------------- | -------------\nContent Cell | Content Cell\nContent Cell | Content Cell\n"; + + var ast = + CommonMarkConverter.Parse( + markdown, + ReadSettings + ); + + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + Assert.AreEqual("
First HeaderSecond Header
Content CellContent Cell
Content CellContent Cell
\r\n", html); + + var firstChild = ast.FirstChild; + Assert.AreEqual(BlockTag.Table, firstChild.Tag); + Assert.AreEqual(markdown, markdown.Substring(firstChild.SourcePosition, firstChild.SourceLength)); + Assert.IsNotNull(firstChild.TableHeaderAlignments); + Assert.AreEqual(2, firstChild.TableHeaderAlignments.Count); + Assert.AreEqual(TableHeaderAlignment.None, firstChild.TableHeaderAlignments[0]); + Assert.AreEqual(TableHeaderAlignment.None, firstChild.TableHeaderAlignments[1]); + + var headerRow = firstChild.FirstChild; + Assert.AreEqual(BlockTag.TableRow, headerRow.Tag); + Assert.AreEqual("First Header | Second Header\n", markdown.Substring(headerRow.SourcePosition, headerRow.SourceLength)); + + var headerCell1 = headerRow.FirstChild; + Assert.AreEqual(BlockTag.TableCell, headerCell1.Tag); + Assert.AreEqual("First Header", markdown.Substring(headerCell1.SourcePosition, headerCell1.SourceLength)); + + var headerCell2 = headerCell1.NextSibling; + Assert.AreEqual(BlockTag.TableCell, headerCell2.Tag); + Assert.AreEqual("Second Header", markdown.Substring(headerCell2.SourcePosition, headerCell2.SourceLength)); + Assert.IsNull(headerCell2.NextSibling); + + var firstRow = headerRow.NextSibling; + Assert.AreEqual(BlockTag.TableRow, firstRow.Tag); + Assert.AreEqual("Content Cell | Content Cell\n", markdown.Substring(firstRow.SourcePosition, firstRow.SourceLength)); + + var firstRowCell1 = firstRow.FirstChild; + Assert.AreEqual(BlockTag.TableCell, firstRowCell1.Tag); + Assert.AreEqual("Content Cell", markdown.Substring(firstRowCell1.SourcePosition, firstRowCell1.SourceLength)); + + var firstRowCell2 = firstRowCell1.NextSibling; + Assert.AreEqual(BlockTag.TableCell, firstRowCell2.Tag); + Assert.AreEqual("Content Cell", markdown.Substring(firstRowCell2.SourcePosition, firstRowCell2.SourceLength)); + Assert.IsNull(firstRowCell2.NextSibling); + + var secondRow = firstRow.NextSibling; + Assert.AreEqual(BlockTag.TableRow, secondRow.Tag); + Assert.AreEqual("Content Cell | Content Cell\n", markdown.Substring(secondRow.SourcePosition, secondRow.SourceLength)); + Assert.IsNull(secondRow.NextSibling); + + var secondRowCell1 = secondRow.FirstChild; + Assert.AreEqual(BlockTag.TableCell, secondRowCell1.Tag); + Assert.AreEqual("Content Cell", markdown.Substring(secondRowCell1.SourcePosition, secondRowCell1.SourceLength)); + + var secondRowCell2 = secondRowCell1.NextSibling; + Assert.AreEqual(BlockTag.TableCell, secondRowCell2.Tag); + Assert.AreEqual("Content Cell", markdown.Substring(secondRowCell2.SourcePosition, secondRowCell2.SourceLength)); + Assert.IsNull(secondRowCell2.NextSibling); + } + + [TestMethod] + public void SplitTable() + { + var markdown = +@"First Header | Second Header +------------- | ------------- +Content Cell1 | Content Cell2 +Content Cell3 | Content Cell4 +Hello world +"; + markdown = markdown.Replace("\r\n", "\n"); + + var ast = + CommonMarkConverter.Parse( + markdown, + ReadSettings + ); + + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + Assert.AreEqual("
First HeaderSecond Header
Content Cell1Content Cell2
Content Cell3Content Cell4
\r\n

Hello world

\r\n\r\n", html); + + var firstChild = ast.FirstChild; + var secondChild = firstChild.NextSibling; + Assert.AreEqual(BlockTag.Table, firstChild.Tag); + var firstMarkdown = markdown.Substring(firstChild.SourcePosition, firstChild.SourceLength); + var shouldMatch = @"First Header | Second Header +------------- | ------------- +Content Cell1 | Content Cell2 +Content Cell3 | Content Cell4 +"; + shouldMatch = shouldMatch.Replace("\r\n", "\n"); + + Assert.AreEqual(shouldMatch, firstMarkdown); + Assert.IsNotNull(firstChild.TableHeaderAlignments); + Assert.AreEqual(2, firstChild.TableHeaderAlignments.Count); + Assert.AreEqual(TableHeaderAlignment.None, firstChild.TableHeaderAlignments[0]); + Assert.AreEqual(TableHeaderAlignment.None, firstChild.TableHeaderAlignments[1]); + + var headerRow = firstChild.FirstChild; + Assert.AreEqual(BlockTag.TableRow, headerRow.Tag); + Assert.AreEqual("First Header | Second Header\n", markdown.Substring(headerRow.SourcePosition, headerRow.SourceLength)); + + var headerCell1 = headerRow.FirstChild; + Assert.AreEqual(BlockTag.TableCell, headerCell1.Tag); + Assert.AreEqual("First Header", markdown.Substring(headerCell1.SourcePosition, headerCell1.SourceLength)); + + var headerCell2 = headerCell1.NextSibling; + Assert.AreEqual(BlockTag.TableCell, headerCell2.Tag); + Assert.AreEqual("Second Header", markdown.Substring(headerCell2.SourcePosition, headerCell2.SourceLength)); + Assert.IsNull(headerCell2.NextSibling); + + var firstRow = headerRow.NextSibling; + Assert.AreEqual(BlockTag.TableRow, firstRow.Tag); + Assert.AreEqual("Content Cell1 | Content Cell2\n", markdown.Substring(firstRow.SourcePosition, firstRow.SourceLength)); + + var firstRowCell1 = firstRow.FirstChild; + Assert.AreEqual(BlockTag.TableCell, firstRowCell1.Tag); + Assert.AreEqual("Content Cell1", markdown.Substring(firstRowCell1.SourcePosition, firstRowCell1.SourceLength)); + + var firstRowCell2 = firstRowCell1.NextSibling; + Assert.AreEqual(BlockTag.TableCell, firstRowCell2.Tag); + Assert.AreEqual("Content Cell2", markdown.Substring(firstRowCell2.SourcePosition, firstRowCell2.SourceLength)); + Assert.IsNull(firstRowCell2.NextSibling); + + var secondRow = firstRow.NextSibling; + Assert.AreEqual(BlockTag.TableRow, secondRow.Tag); + Assert.AreEqual("Content Cell3 | Content Cell4\n", markdown.Substring(secondRow.SourcePosition, secondRow.SourceLength)); + Assert.IsNull(secondRow.NextSibling); + + var secondRowCell1 = secondRow.FirstChild; + Assert.AreEqual(BlockTag.TableCell, secondRowCell1.Tag); + Assert.AreEqual("Content Cell3", markdown.Substring(secondRowCell1.SourcePosition, secondRowCell1.SourceLength)); + + var secondRowCell2 = secondRowCell1.NextSibling; + Assert.AreEqual(BlockTag.TableCell, secondRowCell2.Tag); + Assert.AreEqual("Content Cell4", markdown.Substring(secondRowCell2.SourcePosition, secondRowCell2.SourceLength)); + Assert.IsNull(secondRowCell2.NextSibling); + + Assert.AreEqual(BlockTag.Paragraph, secondChild.Tag); + var secondMarkdown = markdown.Substring(secondChild.SourcePosition, secondChild.SourceLength); + Assert.AreEqual("Hello world\n", secondMarkdown); + } + + [TestMethod] + public void WrappedTable() + { + var markdown = +@"Nope nope. + +First Header | Second Header +------------- | ------------- +Content Cell | Content Cell +Content Cell | Content Cell +Hello world +"; + + var ast = + CommonMarkConverter.Parse( + markdown, + ReadSettings + ); + + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + Assert.AreEqual("

Nope nope.

\r\n
First HeaderSecond Header
Content CellContent Cell
Content CellContent Cell
\r\n

Hello world

\r\n\r\n", html); + + Assert.AreEqual(BlockTag.Paragraph, ast.FirstChild.Tag); + Assert.AreEqual(BlockTag.Table, ast.FirstChild.NextSibling.Tag); + Assert.AreEqual(BlockTag.Paragraph, ast.FirstChild.NextSibling.NextSibling.Tag); + } + + [TestMethod] + public void TableWithInlines() + { + var markdown = +@" Name | Description + ------------- | ----------- + Help | **Display the** [help](/help) window. + Close | _Closes_ a window "; + + var ast = + CommonMarkConverter.Parse( + markdown, + ReadSettings + ); + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + Assert.AreEqual("
NameDescription
HelpDisplay the help window.
CloseCloses a window
\r\n", html); + } + + [TestMethod] + public void TableWithExtraPipes() + { + var markdown = "| First Header | Second Header |\n| ------------- | ------------- |\n| cell #11 | cell #12 |\n| cell #21 | cell #22 |\n"; + + var ast = + CommonMarkConverter.Parse( + markdown, + ReadSettings + ); + + var firstChild = ast.FirstChild; + Assert.AreEqual(BlockTag.Table, firstChild.Tag); + Assert.AreEqual(markdown, markdown.Substring(firstChild.SourcePosition, firstChild.SourceLength)); + + var headerRow = firstChild.FirstChild; + Assert.AreEqual(BlockTag.TableRow, headerRow.Tag); + Assert.AreEqual("| First Header | Second Header |\n", markdown.Substring(headerRow.SourcePosition, headerRow.SourceLength)); + + var headerCell1 = headerRow.FirstChild; + Assert.AreEqual(BlockTag.TableCell, headerCell1.Tag); + Assert.AreEqual("First Header", markdown.Substring(headerCell1.SourcePosition, headerCell1.SourceLength)); + + var headerCell2 = headerCell1.NextSibling; + Assert.AreEqual(BlockTag.TableCell, headerCell2.Tag); + Assert.AreEqual("Second Header", markdown.Substring(headerCell2.SourcePosition, headerCell2.SourceLength)); + Assert.IsNull(headerCell2.NextSibling); + + var firstRow = headerRow.NextSibling; + Assert.AreEqual(BlockTag.TableRow, firstRow.Tag); + Assert.AreEqual("| cell #11 | cell #12 |\n", markdown.Substring(firstRow.SourcePosition, firstRow.SourceLength)); + + var firstRowCell1 = firstRow.FirstChild; + Assert.AreEqual(BlockTag.TableCell, firstRowCell1.Tag); + Assert.AreEqual("cell #11", markdown.Substring(firstRowCell1.SourcePosition, firstRowCell1.SourceLength)); + + var firstRowCell2 = firstRowCell1.NextSibling; + Assert.AreEqual(BlockTag.TableCell, firstRowCell2.Tag); + Assert.AreEqual("cell #12", markdown.Substring(firstRowCell2.SourcePosition, firstRowCell2.SourceLength)); + Assert.IsNull(firstRowCell2.NextSibling); + + var secondRow = firstRow.NextSibling; + Assert.AreEqual(BlockTag.TableRow, secondRow.Tag); + Assert.AreEqual("| cell #21 | cell #22 |\n", markdown.Substring(secondRow.SourcePosition, secondRow.SourceLength)); + Assert.IsNull(secondRow.NextSibling); + + var secondRowCell1 = secondRow.FirstChild; + Assert.AreEqual(BlockTag.TableCell, secondRowCell1.Tag); + Assert.AreEqual("cell #21", markdown.Substring(secondRowCell1.SourcePosition, secondRowCell1.SourceLength)); + + var secondRowCell2 = secondRowCell1.NextSibling; + Assert.AreEqual(BlockTag.TableCell, secondRowCell2.Tag); + Assert.AreEqual("cell #22", markdown.Substring(secondRowCell2.SourcePosition, secondRowCell2.SourceLength)); + Assert.IsNull(secondRowCell2.NextSibling); + } + + [TestMethod] + public void TableCellMismatch() + { + var markdown = +@"| First Header | Second Header | +| ------------- | ------------- | +| 11 | +| 21 | 22 | 23 +"; + + var ast = CommonMarkConverter.Parse(markdown, ReadSettings); + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + Assert.AreEqual("
First HeaderSecond Header
11
2122
\r\n", html); + } + + [TestMethod] + public void TableAlignment() + { + var markdown = +@"| H1 | H2 | H3 | H4 + --- | :-- | ---:| :-: | +|1|2|3|4| +"; + + var ast = CommonMarkConverter.Parse(markdown, ReadSettings); + var table = ast.FirstChild; + Assert.AreEqual(BlockTag.Table, table.Tag); + Assert.AreEqual(4, table.TableHeaderAlignments.Count); + Assert.AreEqual(TableHeaderAlignment.None, table.TableHeaderAlignments[0]); + Assert.AreEqual(TableHeaderAlignment.Left, table.TableHeaderAlignments[1]); + Assert.AreEqual(TableHeaderAlignment.Right, table.TableHeaderAlignments[2]); + Assert.AreEqual(TableHeaderAlignment.Center, table.TableHeaderAlignments[3]); + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + Assert.AreEqual("
H1H2H3H4
1234
\r\n", html); + } + } +} \ No newline at end of file diff --git a/CommonMark/CommonMark.Base.csproj b/CommonMark/CommonMark.Base.csproj index c58644b..f05c409 100644 --- a/CommonMark/CommonMark.Base.csproj +++ b/CommonMark/CommonMark.Base.csproj @@ -86,6 +86,7 @@ + diff --git a/CommonMark/CommonMarkAdditionalFeatures.cs b/CommonMark/CommonMarkAdditionalFeatures.cs index f1e1814..07ae2ac 100644 --- a/CommonMark/CommonMarkAdditionalFeatures.cs +++ b/CommonMark/CommonMarkAdditionalFeatures.cs @@ -20,6 +20,20 @@ public enum CommonMarkAdditionalFeatures /// StrikethroughTilde = 1, + /// + /// The parser will recognize + /// + /// First Header | Second Header + /// ------------- | ------------- + /// Content Cell | Content Cell + /// Content Cell | Content Cell + /// + /// style tables. + /// + /// Refer to https://help.github.com/articles/organizing-information-with-tables/ for more examples + /// + GithubStyleTables = 2, + /// /// All additional features are enabled. /// diff --git a/CommonMark/CommonMarkConverter.cs b/CommonMark/CommonMarkConverter.cs index 69f6a3e..7f1d94c 100644 --- a/CommonMark/CommonMarkConverter.cs +++ b/CommonMark/CommonMarkConverter.cs @@ -111,7 +111,7 @@ public static Syntax.Block ProcessStage1(TextReader source, CommonMarkSettings s reader.ReadLine(line); while (line.Line != null) { - BlockMethods.IncorporateLine(line, ref cur); + BlockMethods.IncorporateLine(line, ref cur, settings); reader.ReadLine(line); } } @@ -132,7 +132,7 @@ public static Syntax.Block ProcessStage1(TextReader source, CommonMarkSettings s { do { - BlockMethods.Finalize(cur, line); + BlockMethods.Finalize(cur, line, settings); cur = cur.Parent; } while (cur != null); } diff --git a/CommonMark/Formatters/HtmlFormatterSlim.cs b/CommonMark/Formatters/HtmlFormatterSlim.cs index c9da0c6..b7ac2ad 100644 --- a/CommonMark/Formatters/HtmlFormatterSlim.cs +++ b/CommonMark/Formatters/HtmlFormatterSlim.cs @@ -212,6 +212,101 @@ internal static void PrintPosition(HtmlTextWriter writer, Inline inline) writer.WriteConstant("\""); } + static void WriteTable(Block table, HtmlTextWriter writer, CommonMarkSettings settings, Stack stack) + { + if ((settings.AdditionalFeatures & CommonMarkAdditionalFeatures.GithubStyleTables) == 0) + { + throw new CommonMarkException("Table encountered in AST, but GithubStyleTables are not enabled"); + } + + var header = table.FirstChild; + var firstRow = table.FirstChild.NextSibling; + + writer.WriteConstant(""); + writer.WriteConstant(""); + writer.WriteConstant(""); + + var numHeadings = 0; + + var curHeaderCell = header.FirstChild; + while (curHeaderCell != null) + { + var alignment = table.TableHeaderAlignments[numHeadings]; + + numHeadings++; + + if (alignment == TableHeaderAlignment.None) + { + writer.WriteConstant(""); + + curHeaderCell = curHeaderCell.NextSibling; + } + + writer.WriteConstant(""); + writer.WriteConstant(""); + + writer.WriteConstant(""); + var curRow = firstRow; + while (curRow != null) + { + writer.WriteConstant(""); + var curRowCell = curRow.FirstChild; + + var numCells = 0; + + while (curRowCell != null && numCells < numHeadings) + { + var alignment = table.TableHeaderAlignments[numCells]; + + numCells++; + + if (alignment == TableHeaderAlignment.None) + { + writer.WriteConstant(""); + + curRowCell = curRowCell.NextSibling; + } + + while (numCells < numHeadings) + { + numCells++; + writer.WriteConstant(""); + } + + writer.WriteConstant(""); + + curRow = curRow.NextSibling; + } + writer.WriteConstant(""); + writer.WriteConstant("
"); + } + else + { + switch (alignment) + { + case TableHeaderAlignment.Center: writer.WriteConstant(""); break; + case TableHeaderAlignment.Left: writer.WriteConstant(""); break; + case TableHeaderAlignment.Right: writer.WriteConstant(""); break; + default: throw new CommonMarkException("Unexpected TableHeaderAlignment [" + alignment + "]"); + } + } + InlinesToHtml(writer, curHeaderCell.InlineContent, settings, stack); + writer.WriteConstant("
"); + } + else + { + switch (alignment) + { + case TableHeaderAlignment.Center: writer.WriteConstant(""); break; + case TableHeaderAlignment.Left: writer.WriteConstant(""); break; + case TableHeaderAlignment.Right: writer.WriteConstant(""); break; + default: throw new CommonMarkException("Unexpected TableHeaderAlignment [" + alignment + "]"); + } + } + InlinesToHtml(writer, curRowCell.InlineContent, settings, stack); + writer.WriteConstant("
"); + } + private static void BlocksToHtmlInner(HtmlTextWriter writer, Block block, CommonMarkSettings settings) { var stack = new Stack(); @@ -357,6 +452,10 @@ private static void BlocksToHtmlInner(HtmlTextWriter writer, Block block, Common break; + case BlockTag.Table: + WriteTable(block, writer, settings, inlineStack); + break; + case BlockTag.ReferenceDefinition: break; diff --git a/CommonMark/Parser/BlockMethods.cs b/CommonMark/Parser/BlockMethods.cs index 5b1ce59..19e77e7 100644 --- a/CommonMark/Parser/BlockMethods.cs +++ b/CommonMark/Parser/BlockMethods.cs @@ -1,5 +1,6 @@ using System.Collections.Generic; using System.Globalization; +using System.Text; using CommonMark.Syntax; namespace CommonMark.Parser @@ -79,7 +80,7 @@ private static bool EndsWithBlankLine(Block block) /// /// Break out of all containing lists /// - private static void BreakOutOfLists(ref Block blockRef, LineInfo line) + private static void BreakOutOfLists(ref Block blockRef, LineInfo line, CommonMarkSettings settings) { Block container = blockRef; Block b = container.Top; @@ -92,16 +93,355 @@ private static void BreakOutOfLists(ref Block blockRef, LineInfo line) { while (container != null && container != b) { - Finalize(container, line); + Finalize(container, line, settings); container = container.Parent; } - Finalize(b, line); + Finalize(b, line, settings); blockRef = b.Parent; } } - public static void Finalize(Block b, LineInfo line) + static List ParseTableLine(string line, StringBuilder sb) + { + var ret = new List(); + + var i = 0; + + if (i < line.Length && line[i] == '|') i++; + + while (i < line.Length && char.IsWhiteSpace(line[i])) i++; + + for (; i < line.Length; i++) + { + var c = line[i]; + if (c == '\\') + { + i++; + continue; + } + + if (c == '|') + { + ret.Add(sb.ToString()); + sb.Clear(); + } + else + { + sb.Append(c); + } + } + + if (sb.Length != 0) + { + ret.Add(sb.ToString()); + sb.Clear(); + } + + return ret; + } + + static void MakeTableCells(Block row, StringBuilder sb) + { + var asStr = row.StringContent.ToString(); + + var offset = 0; + + for (var i = 0; i < asStr.Length; i++) + { + var c = asStr[i]; + + if (c == '|') + { + var text = sb.ToString(); + sb.Clear(); + + if (text.Length > 0) + { + var leadingWhiteSpace = 0; + while (leadingWhiteSpace < text.Length && char.IsWhiteSpace(text[leadingWhiteSpace])) leadingWhiteSpace++; + var trailingWhiteSpace = 0; + while (trailingWhiteSpace < text.Length && char.IsWhiteSpace(text[text.Length - trailingWhiteSpace - 1])) trailingWhiteSpace++; + + var cell = new Block(BlockTag.TableCell, row.SourcePosition + offset + leadingWhiteSpace); + cell.SourceLastPosition = cell.SourcePosition + text.Length - trailingWhiteSpace - leadingWhiteSpace; + cell.StringContent = new StringContent(); + cell.StringContent.Append(text, leadingWhiteSpace, text.Length - leadingWhiteSpace - trailingWhiteSpace); + + if (row.LastChild == null) + { + row.FirstChild = row.LastChild = cell; + } + else + { + row.LastChild.NextSibling = cell; + row.LastChild = cell; + } + + cell.IsOpen = false; + } + + offset += text.Length; + + // skip the | + offset++; + continue; + } + + if (c == '\\') + { + sb.Append(c); + if (i + 1 < asStr.Length) + { + sb.Append(asStr[i + 1]); + } + i++; + } + else + { + sb.Append(c); + } + } + + if (sb.Length > 0) + { + var text = sb.ToString(); + sb.Clear(); + + if (text.Length > 0) + { + var leadingWhiteSpace = 0; + while (leadingWhiteSpace < text.Length && char.IsWhiteSpace(text[leadingWhiteSpace])) leadingWhiteSpace++; + var trailingWhiteSpace = 0; + while (trailingWhiteSpace < text.Length && char.IsWhiteSpace(text[text.Length - trailingWhiteSpace - 1])) trailingWhiteSpace++; + + if (text.Length - leadingWhiteSpace - trailingWhiteSpace > 0) + { + var cell = new Block(BlockTag.TableCell, row.SourcePosition + offset + leadingWhiteSpace); + cell.SourceLastPosition = cell.SourcePosition + text.Length - trailingWhiteSpace - leadingWhiteSpace; + cell.StringContent = new StringContent(); + cell.StringContent.Append(text, leadingWhiteSpace, text.Length - leadingWhiteSpace - trailingWhiteSpace); + + if (row.LastChild == null) + { + row.FirstChild = row.LastChild = cell; + } + else + { + row.LastChild.NextSibling = cell; + row.LastChild = cell; + } + + cell.IsOpen = false; + } + } + } + } + + static void MakeTableRows(Block table, StringBuilder sb) + { + var asStr = table.StringContent.ToString(); + var lines = asStr.Split('\n'); + + var offset = 0; + + for (var i = 0; i < lines.Length; i++) + { + var line = lines[i]; + + var lineLength = line.Length; + var hasLineBreak = offset + lineLength < asStr.Length && asStr[offset + lineLength] == '\n'; + if (hasLineBreak) lineLength++; + + // skip the header row + if (i != 1 && !string.IsNullOrWhiteSpace(line)) + { + var rowStartsInDocument = table.SourcePosition + offset; + var row = new Block(BlockTag.TableRow, rowStartsInDocument); + row.SourceLastPosition = rowStartsInDocument + lineLength; + + row.StringContent = new StringContent(); + row.StringContent.Append(asStr, offset, row.SourceLength); + + if (table.LastChild == null) + { + table.FirstChild = row; + table.LastChild = row; + } + else + { + table.LastChild.NextSibling = row; + table.LastChild = row; + } + + MakeTableCells(row, sb); + row.IsOpen = false; + } + + offset += lineLength; + } + } + + static bool TryMakeTable(Block b, LineInfo line, CommonMarkSettings settings) + { + if ((settings.AdditionalFeatures & CommonMarkAdditionalFeatures.GithubStyleTables) == 0) return false; + + var asStr = b.StringContent.ToString(); + var lines = asStr.Split('\n'); + + if (lines.Length < 2) return false; + + var sb = new StringBuilder(); + + var columnsLine = ParseTableLine(lines[0], sb); + if (columnsLine.Count == 1) return false; + + var headerLine = ParseTableLine(lines[1], sb); + if (headerLine.Count == 1) return false; + + var headerAlignment = new List(); + + foreach (var headerPart in headerLine) + { + var trimmed = headerPart.Trim(); + if (trimmed.Length < 3) return false; + + var validateFrom = 0; + var startsWithColon = trimmed[validateFrom] == ':'; + if (startsWithColon) validateFrom++; + + var validateTo = trimmed.Length - 1; + var endsWithColon = trimmed[validateTo] == ':'; + if (endsWithColon) validateTo--; + + for (var i = validateFrom; i <= validateTo; i++) + { + // don't check for escapes, they don't count in header + if (trimmed[i] != '-') return false; + } + + if (!startsWithColon && !endsWithColon) + { + headerAlignment.Add(TableHeaderAlignment.None); + continue; + } + + if (startsWithColon && endsWithColon) + { + headerAlignment.Add(TableHeaderAlignment.Center); + continue; + } + + if (startsWithColon) + { + headerAlignment.Add(TableHeaderAlignment.Left); + } + + if (endsWithColon) + { + headerAlignment.Add(TableHeaderAlignment.Right); + } + } + + while (columnsLine.Count > 0 && string.IsNullOrWhiteSpace(columnsLine[0])) columnsLine.RemoveAt(0); + while (columnsLine.Count > 0 && string.IsNullOrWhiteSpace(columnsLine[columnsLine.Count - 1])) columnsLine.RemoveAt(columnsLine.Count - 1); + while (headerLine.Count > 0 && string.IsNullOrWhiteSpace(headerLine[0])) headerLine.RemoveAt(0); + while (headerLine.Count > 0 && string.IsNullOrWhiteSpace(headerLine[headerLine.Count - 1])) headerLine.RemoveAt(headerLine.Count - 1); + + if (columnsLine.Count < 2) return false; + if (headerLine.Count < columnsLine.Count) return false; + + var lastTableLine = 1; + + // it's a table! + for (var i = 2; i < lines.Length; i++) + { + var hasPipe = false; + for (var j = 0; j < lines[i].Length; j++) + { + var c = lines[i][j]; + if (c == '\\') + { + j++; + continue; + } + + if (c == '|') + { + hasPipe = true; + break; + } + } + if (!hasPipe) break; + + lastTableLine = i; + } + + if (lastTableLine + 1 < lines.Length && string.IsNullOrWhiteSpace(lines[lastTableLine + 1])) + { + lastTableLine++; + } + + var wholeBlockIsTable = lastTableLine == (lines.Length - 1); + + // No need to break, the whole block is a table now + if (wholeBlockIsTable) + { + b.Tag = BlockTag.Table; + b.TableHeaderAlignments = headerAlignment; + + // create table rows + MakeTableRows(b, sb); + return true; + } + + var takingCharsForTable = 0; + for (var i = 0; i <= lastTableLine; i++) + { + takingCharsForTable += lines[i].Length; + var hasFollowingLineBreak = takingCharsForTable < asStr.Length && asStr[takingCharsForTable] == '\n'; + if (hasFollowingLineBreak) + { + takingCharsForTable++; + } + } + + // get the text of the table separate + var tableBlockString = b.StringContent.TakeFromStart(takingCharsForTable, trim: true); + var newBlock = new Block(BlockTag.Paragraph, b.SourcePosition + tableBlockString.Length); + + // create the trailing paragraph, and set it's text and source positions + var newParagraph = b.Clone(); + newParagraph.StringContent = b.StringContent; + if (settings.TrackSourcePosition) + { + newParagraph.SourcePosition = b.SourcePosition + tableBlockString.Length; + newParagraph.SourceLastPosition = newParagraph.SourcePosition + (asStr.Length - tableBlockString.Length); + } + + // update the text of the table block + b.Tag = BlockTag.Table; + b.TableHeaderAlignments = headerAlignment; + b.StringContent = new StringContent(); + b.StringContent.Append(tableBlockString, 0, tableBlockString.Length); + if (settings.TrackSourcePosition) + { + b.SourceLastPosition = b.SourcePosition + tableBlockString.Length; + } + + // create table rows + MakeTableRows(b, sb); + + // put the new paragraph after the table + newParagraph.NextSibling = b.NextSibling; + b.NextSibling = newParagraph; + + Finalize(newParagraph, line, settings); + + return true; + } + + public static void Finalize(Block b, LineInfo line, CommonMarkSettings settings) { // don't do anything if the block is already closed if (!b.IsOpen) @@ -124,22 +464,24 @@ public static void Finalize(Block b, LineInfo line) b.SourceLastPosition = line.CalculateOrigin(0, false); } -#pragma warning disable 0618 - b.EndLine = (line.LineNumber > b.StartLine) ? line.LineNumber - 1 : line.LineNumber; -#pragma warning restore 0618 - switch (b.Tag) { case BlockTag.Paragraph: var sc = b.StringContent; + + if (TryMakeTable(b, line, settings)) + { + break; + } + if (!sc.StartsWith('[')) break; var subj = new Subject(b.Top.Document); sc.FillSubject(subj); var origPos = subj.Position; - while (subj.Position < subj.Buffer.Length - && subj.Buffer[subj.Position] == '[' + while (subj.Position < subj.Buffer.Length + && subj.Buffer[subj.Position] == '[' && 0 != InlineMethods.ParseReference(subj)) { } @@ -208,13 +550,13 @@ public static void Finalize(Block b, LineInfo line) /// Adds a new block as child of another. Return the child. /// /// Original: add_child - public static Block CreateChildBlock(Block parent, LineInfo line, BlockTag blockType, int startColumn) + public static Block CreateChildBlock(Block parent, LineInfo line, CommonMarkSettings settings, BlockTag blockType, int startColumn) { // if 'parent' isn't the kind of block that can accept this child, // then back up til we hit a block that can. while (!CanContain(parent.Tag, blockType)) { - Finalize(parent, line); + Finalize(parent, line, settings); parent = parent.Parent; } @@ -229,9 +571,6 @@ public static Block CreateChildBlock(Block parent, LineInfo line, BlockTag block if (lastChild != null) { lastChild.NextSibling = child; -#pragma warning disable 0618 - child.Previous = lastChild; -#pragma warning restore 0618 } else { @@ -295,7 +634,7 @@ public static void ProcessInlines(Block block, DocumentData data, CommonMarkSett while (block != null) { var tag = block.Tag; - if (tag == BlockTag.Paragraph || tag == BlockTag.AtxHeading || tag == BlockTag.SetextHeading) + if (tag == BlockTag.Paragraph || tag == BlockTag.AtxHeading || tag == BlockTag.SetextHeading || tag == BlockTag.TableCell) { sc = block.StringContent; if (sc != null) @@ -403,7 +742,7 @@ private static bool ListsMatch(ListData listData, ListData itemData) { return (listData.ListType == itemData.ListType && listData.Delimiter == itemData.Delimiter && - // list_data.marker_offset == item_data.marker_offset && + // list_data.marker_offset == item_data.marker_offset && listData.BulletChar == itemData.BulletChar); } @@ -481,12 +820,12 @@ private static void AdvanceOffset(string line, int count, bool columns, ref int // Process one line at a time, modifying a block. // Returns 0 if successful. curptr is changed to point to // the currently open block. - public static void IncorporateLine(LineInfo line, ref Block curptr) + public static void IncorporateLine(LineInfo line, ref Block curptr, CommonMarkSettings settings) { var ln = line.Line; Block last_matched_container; - + // offset is the char position in the line var offset = 0; @@ -646,7 +985,7 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) // check to see if we've hit 2nd blank line, break out of list: if (blank && container.IsLastLineBlank) - BreakOutOfLists(ref container, line); + BreakOutOfLists(ref container, line, settings); var maybeLazy = cur.Tag == BlockTag.Paragraph; @@ -669,21 +1008,21 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) AdvanceOffset(ln, first_nonspace + 1 - offset, false, ref offset, ref column, ref remainingSpaces); AdvanceOptionalSpace(ln, ref offset, ref column, ref remainingSpaces); - container = CreateChildBlock(container, line, BlockTag.BlockQuote, first_nonspace); + container = CreateChildBlock(container, line, settings, BlockTag.BlockQuote, first_nonspace); } else if (!indented && curChar == '#' && 0 != (matched = Scanner.scan_atx_heading_start(ln, first_nonspace, ln.Length, out i))) { AdvanceOffset(ln, first_nonspace + matched - offset, false, ref offset, ref column, ref remainingSpaces); - container = CreateChildBlock(container, line, BlockTag.AtxHeading, first_nonspace); + container = CreateChildBlock(container, line, settings, BlockTag.AtxHeading, first_nonspace); container.Heading = new HeadingData(i); } else if (!indented && (curChar == '`' || curChar == '~') && 0 != (matched = Scanner.scan_open_code_fence(ln, first_nonspace, ln.Length))) { - container = CreateChildBlock(container, line, BlockTag.FencedCode, first_nonspace); + container = CreateChildBlock(container, line, settings, BlockTag.FencedCode, first_nonspace); container.FencedCodeData = new FencedCodeData(); container.FencedCodeData.FenceChar = curChar; container.FencedCodeData.FenceLength = matched; @@ -692,13 +1031,13 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) AdvanceOffset(ln, first_nonspace + matched - offset, false, ref offset, ref column, ref remainingSpaces); } - else if (!indented && curChar == '<' && + else if (!indented && curChar == '<' && (0 != (matched = (int)Scanner.scan_html_block_start(ln, first_nonspace, ln.Length)) || (container.Tag != BlockTag.Paragraph && 0 != (matched = (int)Scanner.scan_html_block_start_7(ln, first_nonspace, ln.Length))) )) { - container = CreateChildBlock(container, line, BlockTag.HtmlBlock, first_nonspace); + container = CreateChildBlock(container, line, settings, BlockTag.HtmlBlock, first_nonspace); container.HtmlBlockType = (HtmlBlockType)matched; // note, we don't adjust offset because the tag is part of the text @@ -712,19 +1051,19 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) AdvanceOffset(ln, ln.Length - 1 - offset, false, ref offset, ref column, ref remainingSpaces); } - else if (!indented - && !(container.Tag == BlockTag.Paragraph && !all_matched) + else if (!indented + && !(container.Tag == BlockTag.Paragraph && !all_matched) && 0 != (Scanner.scan_thematic_break(ln, first_nonspace, ln.Length))) { // it's only now that we know the line is not part of a setext heading: - container = CreateChildBlock(container, line, BlockTag.ThematicBreak, first_nonspace); - Finalize(container, line); + container = CreateChildBlock(container, line, settings, BlockTag.ThematicBreak, first_nonspace); + Finalize(container, line, settings); container = container.Parent; AdvanceOffset(ln, ln.Length - 1 - offset, false, ref offset, ref column, ref remainingSpaces); } - else if ((!indented || container.Tag == BlockTag.List) + else if ((!indented || container.Tag == BlockTag.List) && 0 != (matched = ParseListMarker(ln, first_nonspace, out data))) { @@ -769,18 +1108,18 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) if (container.Tag != BlockTag.List || !ListsMatch(container.ListData, data)) { - container = CreateChildBlock(container, line, BlockTag.List, first_nonspace); + container = CreateChildBlock(container, line, settings, BlockTag.List, first_nonspace); container.ListData = data; } // add the list item - container = CreateChildBlock(container, line, BlockTag.ListItem, first_nonspace); + container = CreateChildBlock(container, line, settings, BlockTag.ListItem, first_nonspace); container.ListData = data; } else if (indented && !maybeLazy && !blank) { AdvanceOffset(ln, CODE_INDENT, true, ref offset, ref column, ref remainingSpaces); - container = CreateChildBlock(container, line, BlockTag.IndentedCode, offset); + container = CreateChildBlock(container, line, settings, BlockTag.IndentedCode, offset); } else { @@ -844,7 +1183,7 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) while (cur != last_matched_container) { - Finalize(cur, line); + Finalize(cur, line, settings); cur = cur.Parent; if (cur == null) @@ -880,7 +1219,7 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) if (Scanner.scan_html_block_end(container.HtmlBlockType, ln, first_nonspace, ln.Length)) { - Finalize(container, line); + Finalize(container, line, settings); container = container.Parent; } @@ -915,7 +1254,7 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) p--; AddLine(container, line, ln, first_nonspace, remainingSpaces, p - first_nonspace + 1); - Finalize(container, line); + Finalize(container, line, settings); container = container.Parent; } @@ -929,7 +1268,7 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) { // create paragraph container for line - container = CreateChildBlock(container, line, BlockTag.Paragraph, first_nonspace); + container = CreateChildBlock(container, line, settings, BlockTag.Paragraph, first_nonspace); AddLine(container, line, ln, first_nonspace, remainingSpaces); } @@ -944,7 +1283,7 @@ public static void IncorporateLine(LineInfo line, ref Block curptr) } } - private static void FindFirstNonspace(string ln, int offset, int column, out int first_nonspace, + private static void FindFirstNonspace(string ln, int offset, int column, out int first_nonspace, out int first_nonspace_column, out char curChar) { var chars_to_tab = TabSize - (column % TabSize); diff --git a/CommonMark/Parser/TabTextReader.cs b/CommonMark/Parser/TabTextReader.cs index f426ff9..f1865c3 100644 --- a/CommonMark/Parser/TabTextReader.cs +++ b/CommonMark/Parser/TabTextReader.cs @@ -57,6 +57,7 @@ public void ReadLine(LineInfo line) if (c == '\r' || c == '\n') goto IL_4A; + if (c == '\0') this._buffer[num] = '\uFFFD'; diff --git a/CommonMark/Syntax/Block.cs b/CommonMark/Syntax/Block.cs index 792f911..73d9232 100644 --- a/CommonMark/Syntax/Block.cs +++ b/CommonMark/Syntax/Block.cs @@ -32,7 +32,6 @@ public Block(BlockTag tag, int startLine, int startColumn, int sourcePosition) { this.Tag = tag; this.StartLine = startLine; - this.EndLine = startLine; this.StartColumn = startColumn; this.SourcePosition = sourcePosition; this.IsOpen = true; @@ -88,14 +87,6 @@ internal static Block CreateDocument() [System.ComponentModel.EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] public int StartColumn { get; set; } - /// - /// Gets or sets the number of the last line in the source text that contains this element. - /// - [Obsolete("This is deprecated in favor of SourcePosition/SourceLength and will be removed in future. If you have a use case where this property cannot be replaced with the new ones, please log an issue at https://github.com/Knagis/CommonMark.NET", false)] - [System.Diagnostics.DebuggerBrowsable(System.Diagnostics.DebuggerBrowsableState.Never)] - [System.ComponentModel.EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] - public int EndLine { get; set; } - /// /// Gets or sets the position of the block element within the source data. This position is before /// any opening characters. must be enabled @@ -174,6 +165,11 @@ public int SourceLength /// public FencedCodeData FencedCodeData { get; set; } + /// + /// Gets or sets the alignment specified as part of a table heading in a GithubStyleTables. + /// + public List TableHeaderAlignments { get; set; } + /// /// Gets or sets the additional properties that apply to heading elements. /// @@ -220,12 +216,9 @@ public Dictionary ReferenceMap /// public Block NextSibling { get; set; } - /// - /// Gets or sets the previous sibling of this block element. null if this is the first element. - /// - [Obsolete("This property will be removed in future. If you have a use case where this property is required, please log an issue at https://github.com/Knagis/CommonMark.NET", false)] - [System.Diagnostics.DebuggerBrowsable(System.Diagnostics.DebuggerBrowsableState.Never)] - [System.ComponentModel.EditorBrowsable(System.ComponentModel.EditorBrowsableState.Never)] - public Block Previous { get; set; } + internal Block Clone() + { + return (Block)this.MemberwiseClone(); + } } } diff --git a/CommonMark/Syntax/BlockTag.cs b/CommonMark/Syntax/BlockTag.cs index d3e034d..bc242a5 100644 --- a/CommonMark/Syntax/BlockTag.cs +++ b/CommonMark/Syntax/BlockTag.cs @@ -83,6 +83,21 @@ public enum BlockTag : byte /// /// A text block that contains only link reference definitions. /// - ReferenceDefinition + ReferenceDefinition, + + /// + /// A table block + /// + Table, + + /// + /// A table row block + /// + TableRow, + + /// + /// A table cell block + /// + TableCell, } } diff --git a/CommonMark/Syntax/TableHeaderAlignment.cs b/CommonMark/Syntax/TableHeaderAlignment.cs new file mode 100644 index 0000000..810245d --- /dev/null +++ b/CommonMark/Syntax/TableHeaderAlignment.cs @@ -0,0 +1,28 @@ +namespace CommonMark.Syntax +{ + /// + /// Defines the alignment specified in a header column for a GithubStyleTable + /// + public enum TableHeaderAlignment + { + /// + /// No alignment specified + /// + None = 0, + + /// + /// Left alignment + /// + Left = 1, + + /// + /// Right alignment + /// + Right = 2, + + /// + /// Center alignment + /// + Center = 3 + } +} \ No newline at end of file From 572c8178e3b77cf4aa6f62bd61065b24f223fce9 Mon Sep 17 00:00:00 2001 From: Viktor Ekblom Date: Tue, 21 Mar 2017 22:56:05 +0100 Subject: [PATCH 2/4] Fixed Knagis comments on pull request #90 --- CommonMark.Tests/TableTests.cs | 18 +- CommonMark/CommonMark.NET35.csproj | 1 + CommonMark/Parser/BlockMethods.cs | 2633 ++++++++++++++-------------- CommonMark/Syntax/Block.cs | 6 +- CommonMark/Utilities.cs | 207 ++- 5 files changed, 1456 insertions(+), 1409 deletions(-) diff --git a/CommonMark.Tests/TableTests.cs b/CommonMark.Tests/TableTests.cs index 2009145..1c17bf9 100644 --- a/CommonMark.Tests/TableTests.cs +++ b/CommonMark.Tests/TableTests.cs @@ -37,13 +37,13 @@ public void SimpleTable() CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); html = str.ToString(); } - Assert.AreEqual("
First HeaderSecond Header
Content CellContent Cell
Content CellContent Cell
\r\n", html); + Assert.AreEqual("
First HeaderSecond Header
Content CellContent Cell
Content CellContent Cell
", html); var firstChild = ast.FirstChild; Assert.AreEqual(BlockTag.Table, firstChild.Tag); Assert.AreEqual(markdown, markdown.Substring(firstChild.SourcePosition, firstChild.SourceLength)); Assert.IsNotNull(firstChild.TableHeaderAlignments); - Assert.AreEqual(2, firstChild.TableHeaderAlignments.Count); + Assert.AreEqual(2, firstChild.TableHeaderAlignments.Length); Assert.AreEqual(TableHeaderAlignment.None, firstChild.TableHeaderAlignments[0]); Assert.AreEqual(TableHeaderAlignment.None, firstChild.TableHeaderAlignments[1]); @@ -112,7 +112,7 @@ Hello world CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); html = str.ToString(); } - Assert.AreEqual("
First HeaderSecond Header
Content Cell1Content Cell2
Content Cell3Content Cell4
\r\n

Hello world

\r\n\r\n", html); + Assert.AreEqual("
First HeaderSecond Header
Content Cell1Content Cell2
Content Cell3Content Cell4
\r\n

Hello world

\r\n", html); var firstChild = ast.FirstChild; var secondChild = firstChild.NextSibling; @@ -127,7 +127,7 @@ Content Cell3 | Content Cell4 Assert.AreEqual(shouldMatch, firstMarkdown); Assert.IsNotNull(firstChild.TableHeaderAlignments); - Assert.AreEqual(2, firstChild.TableHeaderAlignments.Count); + Assert.AreEqual(2, firstChild.TableHeaderAlignments.Length); Assert.AreEqual(TableHeaderAlignment.None, firstChild.TableHeaderAlignments[0]); Assert.AreEqual(TableHeaderAlignment.None, firstChild.TableHeaderAlignments[1]); @@ -201,7 +201,7 @@ Hello world CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); html = str.ToString(); } - Assert.AreEqual("

Nope nope.

\r\n
First HeaderSecond Header
Content CellContent Cell
Content CellContent Cell
\r\n

Hello world

\r\n\r\n", html); + Assert.AreEqual("

Nope nope.

\r\n
First HeaderSecond Header
Content CellContent Cell
Content CellContent Cell
\r\n

Hello world

\r\n", html); Assert.AreEqual(BlockTag.Paragraph, ast.FirstChild.Tag); Assert.AreEqual(BlockTag.Table, ast.FirstChild.NextSibling.Tag); @@ -228,7 +228,7 @@ public void TableWithInlines() CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); html = str.ToString(); } - Assert.AreEqual("
NameDescription
HelpDisplay the help window.
CloseCloses a window
\r\n", html); + Assert.AreEqual("
NameDescription
HelpDisplay the help window.
CloseCloses a window
", html); } [TestMethod] @@ -304,7 +304,7 @@ public void TableCellMismatch() CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); html = str.ToString(); } - Assert.AreEqual("
First HeaderSecond Header
11
2122
\r\n", html); + Assert.AreEqual("
First HeaderSecond Header
11
2122
", html); } [TestMethod] @@ -319,7 +319,7 @@ public void TableAlignment() var ast = CommonMarkConverter.Parse(markdown, ReadSettings); var table = ast.FirstChild; Assert.AreEqual(BlockTag.Table, table.Tag); - Assert.AreEqual(4, table.TableHeaderAlignments.Count); + Assert.AreEqual(4, table.TableHeaderAlignments.Length); Assert.AreEqual(TableHeaderAlignment.None, table.TableHeaderAlignments[0]); Assert.AreEqual(TableHeaderAlignment.Left, table.TableHeaderAlignments[1]); Assert.AreEqual(TableHeaderAlignment.Right, table.TableHeaderAlignments[2]); @@ -330,7 +330,7 @@ public void TableAlignment() CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); html = str.ToString(); } - Assert.AreEqual("
H1H2H3H4
1234
\r\n", html); + Assert.AreEqual("
H1H2H3H4
1234
", html); } } } \ No newline at end of file diff --git a/CommonMark/CommonMark.NET35.csproj b/CommonMark/CommonMark.NET35.csproj index 50dd0a1..e60090d 100644 --- a/CommonMark/CommonMark.NET35.csproj +++ b/CommonMark/CommonMark.NET35.csproj @@ -5,6 +5,7 @@ {61829669-5091-4F2A-A0B1-7348D7CED840} v3.5 Client + OptimizeFor35 diff --git a/CommonMark/Parser/BlockMethods.cs b/CommonMark/Parser/BlockMethods.cs index 0237da1..c58a7fa 100644 --- a/CommonMark/Parser/BlockMethods.cs +++ b/CommonMark/Parser/BlockMethods.cs @@ -5,1322 +5,1347 @@ namespace CommonMark.Parser { - internal static class BlockMethods - { - private const int CODE_INDENT = 4; - private const int TabSize = 4; - private const string Spaces = " "; + internal static class BlockMethods + { + private const int CODE_INDENT = 4; + private const int TabSize = 4; + private const string Spaces = " "; #if OptimizeFor45 [System.Runtime.CompilerServices.MethodImpl(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)] #endif - private static bool CanContain(BlockTag parent_type, BlockTag child_type) - { - return (parent_type == BlockTag.Document || - parent_type == BlockTag.BlockQuote || - parent_type == BlockTag.ListItem || - (parent_type == BlockTag.List && child_type == BlockTag.ListItem)); - } + private static bool CanContain(BlockTag parent_type, BlockTag child_type) + { + return (parent_type == BlockTag.Document || + parent_type == BlockTag.BlockQuote || + parent_type == BlockTag.ListItem || + (parent_type == BlockTag.List && child_type == BlockTag.ListItem)); + } #if OptimizeFor45 [System.Runtime.CompilerServices.MethodImpl(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)] #endif - private static bool AcceptsLines(BlockTag block_type) - { - return (block_type == BlockTag.Paragraph || - block_type == BlockTag.AtxHeading || - block_type == BlockTag.IndentedCode || - block_type == BlockTag.FencedCode); - } - - private static void AddLine(Block block, LineInfo lineInfo, string ln, int offset, int remainingSpaces, int length = -1) - { - if (!block.IsOpen) - throw new CommonMarkException(string.Format(CultureInfo.InvariantCulture, "Attempted to add line '{0}' to closed container ({1}).", ln, block.Tag)); - - var len = length == -1 ? ln.Length - offset : length; - if (len <= 0) - return; - - var curSC = block.StringContent; - if (curSC == null) - { - block.StringContent = curSC = new StringContent(); - if (lineInfo.IsTrackingPositions) - curSC.PositionTracker = new PositionTracker(lineInfo.LineOffset); - } - - if (lineInfo.IsTrackingPositions) - curSC.PositionTracker.AddOffset(lineInfo, offset, len); - - curSC.Append(Spaces, 0, remainingSpaces); - curSC.Append(ln, offset, len); - } - - /// - /// Check to see if a block ends with a blank line, descending if needed into lists and sublists. - /// - private static bool EndsWithBlankLine(Block block) - { - while (true) - { - if (block.IsLastLineBlank) - return true; - - if (block.Tag != BlockTag.List && block.Tag != BlockTag.ListItem) - return false; - - block = block.LastChild; - - if (block == null) - return false; - } - } - - /// - /// Break out of all containing lists - /// - private static void BreakOutOfLists(ref Block blockRef, LineInfo line, CommonMarkSettings settings) - { - Block container = blockRef; - Block b = container.Top; - - // find first containing list: - while (b != null && b.Tag != BlockTag.List) - b = b.LastChild; - - if (b != null) - { - while (container != null && container != b) - { - Finalize(container, line, settings); - container = container.Parent; - } - - Finalize(b, line, settings); - blockRef = b.Parent; - } - } - - static List ParseTableLine(string line, StringBuilder sb) - { - var ret = new List(); - - var i = 0; - - if (i < line.Length && line[i] == '|') i++; - - while (i < line.Length && char.IsWhiteSpace(line[i])) i++; - - for (; i < line.Length; i++) - { - var c = line[i]; - if (c == '\\') - { - i++; - continue; - } - - if (c == '|') - { - ret.Add(sb.ToString()); - sb.Clear(); - } - else - { - sb.Append(c); - } - } - - if (sb.Length != 0) - { - ret.Add(sb.ToString()); - sb.Clear(); - } - - return ret; - } - - static void MakeTableCells(Block row, StringBuilder sb) - { - var asStr = row.StringContent.ToString(); - - var offset = 0; - - for (var i = 0; i < asStr.Length; i++) - { - var c = asStr[i]; - - if (c == '|') - { - var text = sb.ToString(); - sb.Clear(); - - if (text.Length > 0) - { - var leadingWhiteSpace = 0; - while (leadingWhiteSpace < text.Length && char.IsWhiteSpace(text[leadingWhiteSpace])) leadingWhiteSpace++; - var trailingWhiteSpace = 0; - while (trailingWhiteSpace < text.Length && char.IsWhiteSpace(text[text.Length - trailingWhiteSpace - 1])) trailingWhiteSpace++; - - var cell = new Block(BlockTag.TableCell, row.SourcePosition + offset + leadingWhiteSpace); - cell.SourceLastPosition = cell.SourcePosition + text.Length - trailingWhiteSpace - leadingWhiteSpace; - cell.StringContent = new StringContent(); - cell.StringContent.Append(text, leadingWhiteSpace, text.Length - leadingWhiteSpace - trailingWhiteSpace); - - if (row.LastChild == null) - { - row.FirstChild = row.LastChild = cell; - } - else - { - row.LastChild.NextSibling = cell; - row.LastChild = cell; - } - - cell.IsOpen = false; - } - - offset += text.Length; - - // skip the | - offset++; - continue; - } - - if (c == '\\') - { - sb.Append(c); - if (i + 1 < asStr.Length) - { - sb.Append(asStr[i + 1]); - } - i++; - } - else - { - sb.Append(c); - } - } - - if (sb.Length > 0) - { - var text = sb.ToString(); - sb.Clear(); - - if (text.Length > 0) - { - var leadingWhiteSpace = 0; - while (leadingWhiteSpace < text.Length && char.IsWhiteSpace(text[leadingWhiteSpace])) leadingWhiteSpace++; - var trailingWhiteSpace = 0; - while (trailingWhiteSpace < text.Length && char.IsWhiteSpace(text[text.Length - trailingWhiteSpace - 1])) trailingWhiteSpace++; - - if (text.Length - leadingWhiteSpace - trailingWhiteSpace > 0) - { - var cell = new Block(BlockTag.TableCell, row.SourcePosition + offset + leadingWhiteSpace); - cell.SourceLastPosition = cell.SourcePosition + text.Length - trailingWhiteSpace - leadingWhiteSpace; - cell.StringContent = new StringContent(); - cell.StringContent.Append(text, leadingWhiteSpace, text.Length - leadingWhiteSpace - trailingWhiteSpace); - - if (row.LastChild == null) - { - row.FirstChild = row.LastChild = cell; - } - else - { - row.LastChild.NextSibling = cell; - row.LastChild = cell; - } - - cell.IsOpen = false; - } - } - } - } - - static void MakeTableRows(Block table, StringBuilder sb) - { - var asStr = table.StringContent.ToString(); - var lines = asStr.Split('\n'); - - var offset = 0; - - for (var i = 0; i < lines.Length; i++) - { - var line = lines[i]; - - var lineLength = line.Length; - var hasLineBreak = offset + lineLength < asStr.Length && asStr[offset + lineLength] == '\n'; - if (hasLineBreak) lineLength++; - - // skip the header row - if (i != 1 && !string.IsNullOrWhiteSpace(line)) - { - var rowStartsInDocument = table.SourcePosition + offset; - var row = new Block(BlockTag.TableRow, rowStartsInDocument); - row.SourceLastPosition = rowStartsInDocument + lineLength; - - row.StringContent = new StringContent(); - row.StringContent.Append(asStr, offset, row.SourceLength); - - if (table.LastChild == null) - { - table.FirstChild = row; - table.LastChild = row; - } - else - { - table.LastChild.NextSibling = row; - table.LastChild = row; - } - - MakeTableCells(row, sb); - row.IsOpen = false; - } - - offset += lineLength; - } - } - - static bool TryMakeTable(Block b, LineInfo line, CommonMarkSettings settings) - { - if ((settings.AdditionalFeatures & CommonMarkAdditionalFeatures.GithubStyleTables) == 0) return false; - - var asStr = b.StringContent.ToString(); - var lines = asStr.Split('\n'); - - if (lines.Length < 2) return false; - - var sb = new StringBuilder(); - - var columnsLine = ParseTableLine(lines[0], sb); - if (columnsLine.Count == 1) return false; - - var headerLine = ParseTableLine(lines[1], sb); - if (headerLine.Count == 1) return false; - - var headerAlignment = new List(); - - foreach (var headerPart in headerLine) - { - var trimmed = headerPart.Trim(); - if (trimmed.Length < 3) return false; - - var validateFrom = 0; - var startsWithColon = trimmed[validateFrom] == ':'; - if (startsWithColon) validateFrom++; - - var validateTo = trimmed.Length - 1; - var endsWithColon = trimmed[validateTo] == ':'; - if (endsWithColon) validateTo--; - - for (var i = validateFrom; i <= validateTo; i++) - { - // don't check for escapes, they don't count in header - if (trimmed[i] != '-') return false; - } - - if (!startsWithColon && !endsWithColon) - { - headerAlignment.Add(TableHeaderAlignment.None); - continue; - } - - if (startsWithColon && endsWithColon) - { - headerAlignment.Add(TableHeaderAlignment.Center); - continue; - } - - if (startsWithColon) - { - headerAlignment.Add(TableHeaderAlignment.Left); - } - - if (endsWithColon) - { - headerAlignment.Add(TableHeaderAlignment.Right); - } - } - - while (columnsLine.Count > 0 && string.IsNullOrWhiteSpace(columnsLine[0])) columnsLine.RemoveAt(0); - while (columnsLine.Count > 0 && string.IsNullOrWhiteSpace(columnsLine[columnsLine.Count - 1])) columnsLine.RemoveAt(columnsLine.Count - 1); - while (headerLine.Count > 0 && string.IsNullOrWhiteSpace(headerLine[0])) headerLine.RemoveAt(0); - while (headerLine.Count > 0 && string.IsNullOrWhiteSpace(headerLine[headerLine.Count - 1])) headerLine.RemoveAt(headerLine.Count - 1); - - if (columnsLine.Count < 2) return false; - if (headerLine.Count < columnsLine.Count) return false; - - var lastTableLine = 1; - - // it's a table! - for (var i = 2; i < lines.Length; i++) - { - var hasPipe = false; - for (var j = 0; j < lines[i].Length; j++) - { - var c = lines[i][j]; - if (c == '\\') - { - j++; - continue; - } - - if (c == '|') - { - hasPipe = true; - break; - } - } - if (!hasPipe) break; - - lastTableLine = i; - } - - if (lastTableLine + 1 < lines.Length && string.IsNullOrWhiteSpace(lines[lastTableLine + 1])) - { - lastTableLine++; - } - - var wholeBlockIsTable = lastTableLine == (lines.Length - 1); - - // No need to break, the whole block is a table now - if (wholeBlockIsTable) - { - b.Tag = BlockTag.Table; - b.TableHeaderAlignments = headerAlignment; - - // create table rows - MakeTableRows(b, sb); - return true; - } - - var takingCharsForTable = 0; - for (var i = 0; i <= lastTableLine; i++) - { - takingCharsForTable += lines[i].Length; - var hasFollowingLineBreak = takingCharsForTable < asStr.Length && asStr[takingCharsForTable] == '\n'; - if (hasFollowingLineBreak) - { - takingCharsForTable++; - } - } - - // get the text of the table separate - var tableBlockString = b.StringContent.TakeFromStart(takingCharsForTable, trim: true); - var newBlock = new Block(BlockTag.Paragraph, b.SourcePosition + tableBlockString.Length); - - // create the trailing paragraph, and set it's text and source positions - var newParagraph = b.Clone(); - newParagraph.StringContent = b.StringContent; - if (settings.TrackSourcePosition) - { - newParagraph.SourcePosition = b.SourcePosition + tableBlockString.Length; - newParagraph.SourceLastPosition = newParagraph.SourcePosition + (asStr.Length - tableBlockString.Length); - } - - // update the text of the table block - b.Tag = BlockTag.Table; - b.TableHeaderAlignments = headerAlignment; - b.StringContent = new StringContent(); - b.StringContent.Append(tableBlockString, 0, tableBlockString.Length); - if (settings.TrackSourcePosition) - { - b.SourceLastPosition = b.SourcePosition + tableBlockString.Length; - } - - // create table rows - MakeTableRows(b, sb); - - // put the new paragraph after the table - newParagraph.NextSibling = b.NextSibling; - b.NextSibling = newParagraph; - - Finalize(newParagraph, line, settings); - - return true; - } - - public static void Finalize(Block b, LineInfo line, CommonMarkSettings settings) - { - // don't do anything if the block is already closed - if (!b.IsOpen) - return; - - b.IsOpen = false; - - if (line.IsTrackingPositions) - { - // HTML Blocks other than type 7 call Finalize when the last line is encountered. - // Block types 6 and 7 calls Finalize once it finds the next empty row but that empty row is no longer considered to be part of the block. - var includesThisLine = b.HtmlBlockType != HtmlBlockType.None && b.HtmlBlockType != HtmlBlockType.InterruptingBlock && b.HtmlBlockType != HtmlBlockType.NonInterruptingBlock; - - // (b.SourcePosition >= line.LineOffset) determines if the block started on this line. - includesThisLine = includesThisLine || b.SourcePosition >= line.LineOffset; - - if (includesThisLine && line.Line != null) - b.SourceLastPosition = line.CalculateOrigin(line.Line.Length, false); - else - b.SourceLastPosition = line.CalculateOrigin(0, false); - } + private static bool AcceptsLines(BlockTag block_type) + { + return (block_type == BlockTag.Paragraph || + block_type == BlockTag.AtxHeading || + block_type == BlockTag.IndentedCode || + block_type == BlockTag.FencedCode); + } + + private static void AddLine(Block block, LineInfo lineInfo, string ln, int offset, int remainingSpaces, int length = -1) + { + if (!block.IsOpen) + throw new CommonMarkException(string.Format(CultureInfo.InvariantCulture, "Attempted to add line '{0}' to closed container ({1}).", ln, block.Tag)); + + var len = length == -1 ? ln.Length - offset : length; + if (len <= 0) + return; + + var curSC = block.StringContent; + if (curSC == null) + { + block.StringContent = curSC = new StringContent(); + if (lineInfo.IsTrackingPositions) + curSC.PositionTracker = new PositionTracker(lineInfo.LineOffset); + } + + if (lineInfo.IsTrackingPositions) + curSC.PositionTracker.AddOffset(lineInfo, offset, len); + + curSC.Append(Spaces, 0, remainingSpaces); + curSC.Append(ln, offset, len); + } + + /// + /// Check to see if a block ends with a blank line, descending if needed into lists and sublists. + /// + private static bool EndsWithBlankLine(Block block) + { + while (true) + { + if (block.IsLastLineBlank) + return true; + + if (block.Tag != BlockTag.List && block.Tag != BlockTag.ListItem) + return false; + + block = block.LastChild; + + if (block == null) + return false; + } + } + + /// + /// Break out of all containing lists + /// + private static void BreakOutOfLists(ref Block blockRef, LineInfo line, CommonMarkSettings settings) + { + Block container = blockRef; + Block b = container.Top; + + // find first containing list: + while (b != null && b.Tag != BlockTag.List) + b = b.LastChild; + + if (b != null) + { + while (container != null && container != b) + { + Finalize(container, line, settings); + container = container.Parent; + } + + Finalize(b, line, settings); + blockRef = b.Parent; + } + } + + static List ParseTableLine(StringPart part, StringBuilder sb) + { + string line = part.Source.Substring(part.StartIndex, part.Length); + line = line.TrimEnd('\n'); + + var ret = new List(); + + var i = 0; + + if (i < line.Length && line[i] == '|') i++; + + while (i < line.Length && char.IsWhiteSpace(line[i])) i++; + + for (; i < line.Length; i++) + { + var c = line[i]; + if (c == '\\') + { + i++; + if (i < line.Length && line[i] == '|') + { + sb.Append(line[i]); + continue; + } + i--; + } + + if (c == '|') + { + ret.Add(sb.ToString()); + Utilities.ClearStringBuilder(sb); + } + else + { + sb.Append(c); + } + } + + if (sb.Length != 0) + { + ret.Add(sb.ToString()); + Utilities.ClearStringBuilder(sb); + } + + return ret; + } + + static void MakeTableCells(Block row, StringBuilder sb) + { + var offset = 0; + + var parts = row.StringContent.RetrieveParts(); + foreach (var part in parts.Array) + { + if (part.Length <= 0) + continue; + + string asStr = part.Source.Substring(part.StartIndex, part.Length); + + for (var i = 0; i < asStr.Length; i++) + { + var c = asStr[i]; + + if (c == '|') + { + var text = sb.ToString(); + Utilities.ClearStringBuilder(sb); + + if (text.Length > 0) + { + int length = text.Length; + string trimmedText = text.TrimStart(); + var leadingWhiteSpace = length - trimmedText.Length; + trimmedText = trimmedText.TrimEnd(); + var trailingWhiteSpace = length - leadingWhiteSpace - text.Length; + + var cell = new Block(BlockTag.TableCell, row.SourcePosition + offset + leadingWhiteSpace); + cell.SourceLastPosition = cell.SourcePosition + trimmedText.Length; + cell.StringContent = new StringContent(); + cell.StringContent.Append(trimmedText, 0, trimmedText.Length); + + if (row.LastChild == null) + { + row.FirstChild = row.LastChild = cell; + } + else + { + row.LastChild.NextSibling = cell; + row.LastChild = cell; + } + + cell.IsOpen = false; + } + + offset += text.Length; + + // skip the | + offset++; + continue; + } + + if (c == '\\') + { + sb.Append(c); + if (i + 1 < asStr.Length) + { + if (Utilities.IsEscapableSymbol(asStr[i + 1])) + sb.Append(asStr[i + 1]); + } + i++; + } + else + { + sb.Append(c); + } + } + } + + if (sb.Length > 0) + { + var text = sb.ToString(); + Utilities.ClearStringBuilder(sb); + + if (text.Length > 0) + { + var leadingWhiteSpace = 0; + while (leadingWhiteSpace < text.Length && char.IsWhiteSpace(text[leadingWhiteSpace])) leadingWhiteSpace++; + var trailingWhiteSpace = 0; + while (trailingWhiteSpace < text.Length && char.IsWhiteSpace(text[text.Length - trailingWhiteSpace - 1])) trailingWhiteSpace++; + + if (text.Length - leadingWhiteSpace - trailingWhiteSpace > 0) + { + var cell = new Block(BlockTag.TableCell, row.SourcePosition + offset + leadingWhiteSpace); + cell.SourceLastPosition = cell.SourcePosition + text.Length - trailingWhiteSpace - leadingWhiteSpace; + cell.StringContent = new StringContent(); + cell.StringContent.Append(text, leadingWhiteSpace, text.Length - leadingWhiteSpace - trailingWhiteSpace); + + if (row.LastChild == null) + { + row.FirstChild = row.LastChild = cell; + } + else + { + row.LastChild.NextSibling = cell; + row.LastChild = cell; + } + + cell.IsOpen = false; + } + } + } + } + + static void MakeTableRows(Block table, StringBuilder sb) + { + var parts = table.StringContent.RetrieveParts(); + var offset = 0; + + for (var i = 0; i < parts.Array.Length; i++) + { + var line = parts.Array[i]; + if (line.Length <= 0) + continue; + + var lineLength = line.Length; + string actualLine = line.Source.Substring(line.StartIndex, line.Length); + + // skip the header row + if (i != 1 && !string.IsNullOrEmpty(actualLine) && actualLine != " ") + { + var rowStartsInDocument = table.SourcePosition + offset; + var row = new Block(BlockTag.TableRow, rowStartsInDocument); + row.SourceLastPosition = rowStartsInDocument + lineLength; + + row.StringContent = new StringContent(); + row.StringContent.Append(actualLine, 0, actualLine.Length); + + if (table.LastChild == null) + { + table.FirstChild = row; + table.LastChild = row; + } + else + { + table.LastChild.NextSibling = row; + table.LastChild = row; + } + + MakeTableCells(row, sb); + row.IsOpen = false; + } + + offset += lineLength; + } + } + + static bool TryMakeTable(Block b, LineInfo line, CommonMarkSettings settings) + { + if ((settings.AdditionalFeatures & CommonMarkAdditionalFeatures.GithubStyleTables) == 0) return false; + + var parts = b.StringContent.RetrieveParts().Array; + + if (parts.Length < 2) return false; + + var sb = new StringBuilder(); + + var columnsPart = parts[0]; + var columnsLine = ParseTableLine(columnsPart, sb); + if (columnsLine.Count == 1) return false; + + var headersPart = parts[1]; + var headerLine = ParseTableLine(headersPart, sb); + if (headerLine.Count == 1) return false; + + TableHeaderAlignment[] headerAlignment = new TableHeaderAlignment[headerLine.Count]; + + for (int hl = 0; hl < headerLine.Count; hl++) + { + var headerPart = headerLine[hl]; + var trimmed = headerPart.Trim(); + if (trimmed.Length < 3) return false; + + var validateFrom = 0; + var startsWithColon = trimmed[validateFrom] == ':'; + if (startsWithColon) validateFrom++; + + var validateTo = trimmed.Length - 1; + var endsWithColon = trimmed[validateTo] == ':'; + if (endsWithColon) validateTo--; + + for (var i = validateFrom; i <= validateTo; i++) + { + // don't check for escapes, they don't count in header + if (trimmed[i] != '-') return false; + } + + if (!startsWithColon && !endsWithColon) + { + headerAlignment[hl] = TableHeaderAlignment.None; + continue; + } + + if (startsWithColon && endsWithColon) + { + headerAlignment[hl] = TableHeaderAlignment.Center; + continue; + } + + if (startsWithColon) + { + headerAlignment[hl] = TableHeaderAlignment.Left; + } + + if (endsWithColon) + { + headerAlignment[hl] = TableHeaderAlignment.Right; + } + } + + if (columnsLine.Count < 2) return false; + if (headerLine.Count < columnsLine.Count) return false; + + var lastTableLine = 1; + + // it's a table! + List tableParts = new List { columnsPart, headersPart }; + var takingCharsForTable = columnsPart.Length + headersPart.Length; + for (var i = 2; i < parts.Length; i++) + { + var hasPipe = false; + var part = parts[i]; + + if (part.Length <= 0) + continue; + + string strLine = part.Source.Substring(part.StartIndex, part.Length); + + int indexOfPipe = strLine.IndexOf('|'); + hasPipe = indexOfPipe == 0; + + while (!hasPipe) + { + if (indexOfPipe > 0 && strLine[indexOfPipe - 1] == '\\') + { + indexOfPipe = strLine.IndexOf('|', indexOfPipe); + } + else if(indexOfPipe > 0) + { + hasPipe = true; + break; + } + else + { + break; + } + } + + if (!hasPipe) break; + + tableParts.Add(part); + takingCharsForTable += part.Length; + lastTableLine = i; + } + + bool hasTrailingParts = false; + for (var i = lastTableLine + 1; i < parts.Length; i++) + { + var part = parts[i]; + if (part.Length <= 0) + continue; + + hasTrailingParts = true; + break; + } + + // No need to break, the whole block is a table now + if (!hasTrailingParts) + { + b.Tag = BlockTag.Table; + b.TableHeaderAlignments = headerAlignment; + + // create table rows + MakeTableRows(b, sb); + return true; + } + + // get the text of the table separate + var tableBlockString = b.StringContent.TakeFromStart(takingCharsForTable, trim: true); + var newBlock = new Block(BlockTag.Paragraph, b.SourcePosition + tableBlockString.Length); + + // create the trailing paragraph, and set it's text and source positions + var newParagraph = b.Clone(); + newParagraph.StringContent = b.StringContent; + if (settings.TrackSourcePosition) + { + newParagraph.SourcePosition = b.SourcePosition + tableBlockString.Length; + newParagraph.SourceLastPosition = newParagraph.SourcePosition + (b.SourceLength - tableBlockString.Length); + } + + // update the text of the table block + b.Tag = BlockTag.Table; + b.TableHeaderAlignments = headerAlignment; + b.StringContent = new StringContent(); + foreach (StringPart part in tableParts) + { + b.StringContent.Append(part.Source, part.StartIndex, part.Length); + } + if (settings.TrackSourcePosition) + { + b.SourceLastPosition = b.SourcePosition + tableBlockString.Length; + } + + // create table rows + MakeTableRows(b, sb); + + // put the new paragraph after the table + newParagraph.NextSibling = b.NextSibling; + b.NextSibling = newParagraph; + + Finalize(newParagraph, line, settings); + + return true; + } + + public static void Finalize(Block b, LineInfo line, CommonMarkSettings settings) + { + // don't do anything if the block is already closed + if (!b.IsOpen) + return; + + b.IsOpen = false; + + if (line.IsTrackingPositions) + { + // HTML Blocks other than type 7 call Finalize when the last line is encountered. + // Block types 6 and 7 calls Finalize once it finds the next empty row but that empty row is no longer considered to be part of the block. + var includesThisLine = b.HtmlBlockType != HtmlBlockType.None && b.HtmlBlockType != HtmlBlockType.InterruptingBlock && b.HtmlBlockType != HtmlBlockType.NonInterruptingBlock; + + // (b.SourcePosition >= line.LineOffset) determines if the block started on this line. + includesThisLine = includesThisLine || b.SourcePosition >= line.LineOffset; + + if (includesThisLine && line.Line != null) + b.SourceLastPosition = line.CalculateOrigin(line.Line.Length, false); + else + b.SourceLastPosition = line.CalculateOrigin(0, false); + } #pragma warning disable 0618 - b.EndLine = (line.LineNumber > b.StartLine) ? line.LineNumber - 1 : line.LineNumber; + b.EndLine = (line.LineNumber > b.StartLine) ? line.LineNumber - 1 : line.LineNumber; #pragma warning restore 0618 - switch (b.Tag) - { - case BlockTag.Paragraph: - var sc = b.StringContent; - - if (TryMakeTable(b, line, settings)) - { - break; - } - - if (!sc.StartsWith('[')) - break; - - var subj = new Subject(b.Top.Document); - sc.FillSubject(subj); - var origPos = subj.Position; - while (subj.Position < subj.Buffer.Length - && subj.Buffer[subj.Position] == '[' - && 0 != InlineMethods.ParseReference(subj)) - { - } - - if (subj.Position != origPos) - { - sc.Replace(subj.Buffer, subj.Position, subj.Buffer.Length - subj.Position); - - if (sc.PositionTracker != null) - sc.PositionTracker.AddBlockOffset(subj.Position - origPos); - - if (Utilities.IsFirstLineBlank(subj.Buffer, subj.Position)) - b.Tag = BlockTag.ReferenceDefinition; - } - - break; - - case BlockTag.IndentedCode: - b.StringContent.RemoveTrailingBlankLines(); - break; - - case BlockTag.FencedCode: - // first line of contents becomes info - var firstlinelen = b.StringContent.IndexOf('\n') + 1; - b.FencedCodeData.Info = InlineMethods.Unescape(b.StringContent.TakeFromStart(firstlinelen, true).Trim()); - break; - - case BlockTag.List: // determine tight/loose status - b.ListData.IsTight = true; // tight by default - var item = b.FirstChild; - Block subitem; - - while (item != null) - { - // check for non-final non-empty list item ending with blank line: - if (item.IsLastLineBlank && item.NextSibling != null) - { - b.ListData.IsTight = false; - break; - } - - // recurse into children of list item, to see if there are spaces between them: - subitem = item.FirstChild; - while (subitem != null) - { - if (EndsWithBlankLine(subitem) && (item.NextSibling != null || subitem.NextSibling != null)) - { - b.ListData.IsTight = false; - break; - } - - subitem = subitem.NextSibling; - } - - if (!b.ListData.IsTight) - break; - - item = item.NextSibling; - } - - break; - } - } - - /// - /// Adds a new block as child of another. Return the child. - /// - /// Original: add_child - public static Block CreateChildBlock(Block parent, LineInfo line, CommonMarkSettings settings, BlockTag blockType, int startColumn) - { - // if 'parent' isn't the kind of block that can accept this child, - // then back up til we hit a block that can. - while (!CanContain(parent.Tag, blockType)) - { - Finalize(parent, line, settings); - parent = parent.Parent; - } - - var startPosition = line.IsTrackingPositions ? line.CalculateOrigin(startColumn, true) : line.LineOffset; + switch (b.Tag) + { + case BlockTag.Paragraph: + var sc = b.StringContent; + + if (TryMakeTable(b, line, settings)) + { + break; + } + + if (!sc.StartsWith('[')) + break; + + var subj = new Subject(b.Top.Document); + sc.FillSubject(subj); + var origPos = subj.Position; + while (subj.Position < subj.Buffer.Length + && subj.Buffer[subj.Position] == '[' + && 0 != InlineMethods.ParseReference(subj)) + { + } + + if (subj.Position != origPos) + { + sc.Replace(subj.Buffer, subj.Position, subj.Buffer.Length - subj.Position); + + if (sc.PositionTracker != null) + sc.PositionTracker.AddBlockOffset(subj.Position - origPos); + + if (Utilities.IsFirstLineBlank(subj.Buffer, subj.Position)) + b.Tag = BlockTag.ReferenceDefinition; + } + + break; + + case BlockTag.IndentedCode: + b.StringContent.RemoveTrailingBlankLines(); + break; + + case BlockTag.FencedCode: + // first line of contents becomes info + var firstlinelen = b.StringContent.IndexOf('\n') + 1; + b.FencedCodeData.Info = InlineMethods.Unescape(b.StringContent.TakeFromStart(firstlinelen, true).Trim()); + break; + + case BlockTag.List: // determine tight/loose status + b.ListData.IsTight = true; // tight by default + var item = b.FirstChild; + Block subitem; + + while (item != null) + { + // check for non-final non-empty list item ending with blank line: + if (item.IsLastLineBlank && item.NextSibling != null) + { + b.ListData.IsTight = false; + break; + } + + // recurse into children of list item, to see if there are spaces between them: + subitem = item.FirstChild; + while (subitem != null) + { + if (EndsWithBlankLine(subitem) && (item.NextSibling != null || subitem.NextSibling != null)) + { + b.ListData.IsTight = false; + break; + } + + subitem = subitem.NextSibling; + } + + if (!b.ListData.IsTight) + break; + + item = item.NextSibling; + } + + break; + } + } + + /// + /// Adds a new block as child of another. Return the child. + /// + /// Original: add_child + public static Block CreateChildBlock(Block parent, LineInfo line, CommonMarkSettings settings, BlockTag blockType, int startColumn) + { + // if 'parent' isn't the kind of block that can accept this child, + // then back up til we hit a block that can. + while (!CanContain(parent.Tag, blockType)) + { + Finalize(parent, line, settings); + parent = parent.Parent; + } + + var startPosition = line.IsTrackingPositions ? line.CalculateOrigin(startColumn, true) : line.LineOffset; #pragma warning disable 0618 - Block child = new Block(blockType, line.LineNumber, startColumn + 1, startPosition); + Block child = new Block(blockType, line.LineNumber, startColumn + 1, startPosition); #pragma warning restore 0618 - child.Parent = parent; - child.Top = parent.Top; - - var lastChild = parent.LastChild; - if (lastChild != null) - { - lastChild.NextSibling = child; - } - else - { - parent.FirstChild = child; - } - - parent.LastChild = child; - return child; - } - - private static void AdjustInlineSourcePosition(Inline inline, PositionTracker tracker, ref Stack stack) - { - if (stack == null) - stack = new Stack(); - - while (inline != null) - { - inline.SourcePosition = tracker.CalculateInlineOrigin(inline.SourcePosition, true); - inline.SourceLastPosition = tracker.CalculateInlineOrigin(inline.SourceLastPosition, false); - - if (inline.FirstChild != null) - { - if (inline.NextSibling != null) - stack.Push(inline.NextSibling); - - inline = inline.FirstChild; - } - else if (inline.NextSibling != null) - { - inline = inline.NextSibling; - } - else if (stack.Count > 0) - { - inline = stack.Pop(); - } - else - { - inline = null; - } - } - - } - - /// - /// Walk through the block, its children and siblings, parsing string content into inline content where appropriate. - /// - /// The document level block from which to start the processing. - /// Document data. - /// The settings that influence how the inline parsing is performed. - public static void ProcessInlines(Block block, DocumentData data, CommonMarkSettings settings) - { - Stack inlineStack = null; - var stack = new Stack(); - var parsers = settings.InlineParsers; - var specialCharacters = settings.InlineParserSpecialCharacters; - var subj = new Subject(data); - - StringContent sc; - int delta; - - while (block != null) - { - var tag = block.Tag; - if (tag == BlockTag.Paragraph || tag == BlockTag.AtxHeading || tag == BlockTag.SetextHeading || tag == BlockTag.TableCell) - { - sc = block.StringContent; - if (sc != null) - { - sc.FillSubject(subj); - delta = subj.Position; - - block.InlineContent = InlineMethods.parse_inlines(subj, parsers, specialCharacters); - block.StringContent = null; - - if (sc.PositionTracker != null) - { - sc.PositionTracker.AddBlockOffset(-delta); - AdjustInlineSourcePosition(block.InlineContent, sc.PositionTracker, ref inlineStack); - } - } - } - - if (block.FirstChild != null) - { - if (block.NextSibling != null) - stack.Push(block.NextSibling); - - block = block.FirstChild; - } - else if (block.NextSibling != null) - { - block = block.NextSibling; - } - else if (stack.Count > 0) - { - block = stack.Pop(); - } - else - { - block = null; - } - } - } - - /// - /// Attempts to parse a list item marker (bullet or enumerated). - /// On success, returns length of the marker, and populates - /// data with the details. On failure, returns 0. - /// - /// Original: int parse_list_marker(string ln, int pos, ref ListData dataptr) - private static int ParseListMarker(string ln, int pos, bool interruptsParagraph, out ListData data) - { - char c; - int startpos; - data = null; - var len = ln.Length; - - startpos = pos; - c = ln[pos]; - - if (c == '+' || c == '•' || ((c == '*' || c == '-') && 0 == Scanner.scan_thematic_break(ln, pos, len))) - { - pos++; - - if (pos == len || !Utilities.IsWhitespace(ln[pos])) - return 0; - - if (interruptsParagraph && Scanner.scan_spacechars(ln, pos + 1, ln.Length) == ln.Length - pos - 1) - return 0; - - data = new ListData(); - data.BulletChar = c; - data.Start = 1; - } - else if (c >= '0' && c <= '9') - { - - int start = c - '0'; - - while (pos < len - 1) - { - c = ln[++pos]; - // We limit to 9 digits to avoid overflow, This also seems to be the limit for 'start' in some browsers. - if (c >= '0' && c <= '9' && start < 100000000) - start = start * 10 + (c - '0'); - else - break; - } - - if (pos >= len - 1 || (c != '.' && c != ')')) - return 0; - - pos++; - if (pos == len || !Utilities.IsWhitespace(ln[pos])) - return 0; - - if (interruptsParagraph && - (start != 1 || Scanner.scan_spacechars(ln, pos + 1, ln.Length) == ln.Length - pos - 1)) - return 0; - - data = new ListData(); - data.ListType = ListType.Ordered; - data.BulletChar = '\0'; - data.Start = start; - data.Delimiter = (c == '.' ? ListDelimiter.Period : ListDelimiter.Parenthesis); - - } - else - { - return 0; - } - - return (pos - startpos); - } - - private static bool ListsMatch(ListData listData, ListData itemData) - { - return (listData.ListType == itemData.ListType && - listData.Delimiter == itemData.Delimiter && - // list_data.marker_offset == item_data.marker_offset && - listData.BulletChar == itemData.BulletChar); - } - - private static bool AdvanceOptionalSpace(string line, ref int offset, ref int column, ref int remainingSpaces) - { - if (remainingSpaces > 0) - { - remainingSpaces--; - return true; - } - - var c = line[offset]; - if (c == ' ') - { - offset++; - column++; - return true; - } - else if (c == '\t') - { - offset++; - var chars_to_tab = 4 - (column % TabSize); - column += chars_to_tab; - remainingSpaces = chars_to_tab - 1; - return true; - } - - return false; - } - - private static void AdvanceOffset(string line, int count, bool columns, ref int offset, ref int column, ref int remainingSpaces) - { - if (columns) - { - if (remainingSpaces > count) - { - remainingSpaces -= count; - count = 0; - } - else - { - count -= remainingSpaces; - remainingSpaces = 0; - } - } - else - { - remainingSpaces = 0; - } - - char c; - while (count > 0 && (c = line[offset]) != '\n') - { - if (c == '\t') - { - var chars_to_tab = 4 - (column % TabSize); - column += chars_to_tab; - offset += 1; - count -= columns ? chars_to_tab : 1; - - if (count < 0) - { - remainingSpaces = 0 - count; - } - } - else - { - offset += 1; - column += 1; // assume ascii; block starts are ascii - count -= 1; - } - } - } - - // Process one line at a time, modifying a block. - // Returns 0 if successful. curptr is changed to point to - // the currently open block. - public static void IncorporateLine(LineInfo line, ref Block curptr, CommonMarkSettings settings) - { - var ln = line.Line; - - Block last_matched_container; - - // offset is the char position in the line - var offset = 0; - - // column is the virtual position in the line that takes TAB expansion into account - var column = 0; - - // the adjustment to the virtual position `column` that points to the number of spaces from the TAB that have not been included in any indent. - var remainingSpaces = 0; - - // the char position of the first non-space char - int first_nonspace; - - // the virtual position of the first non-space chart, that includes TAB expansion - int first_nonspace_column; - - int matched; - int i; - ListData data; - bool all_matched = true; - Block cur = curptr; - var blank = false; - char curChar; - int indent; - - // container starts at the document root. - var container = cur.Top; - - // for each containing block, try to parse the associated line start. - // bail out on failure: container will point to the last matching block. - - while (container.LastChild != null && container.LastChild.IsOpen) - { - container = container.LastChild; - - FindFirstNonspace(ln, offset, column, out first_nonspace, out first_nonspace_column, out curChar); - - indent = first_nonspace_column - column + remainingSpaces; - blank = curChar == '\n'; - - switch (container.Tag) - { - case BlockTag.BlockQuote: - { - if (indent <= 3 && curChar == '>') - { - AdvanceOffset(ln, indent + 1, true, ref offset, ref column, ref remainingSpaces); - AdvanceOptionalSpace(ln, ref offset, ref column, ref remainingSpaces); - } - else - { - all_matched = false; - } - - break; - } - - case BlockTag.ListItem: - { - if (indent >= container.ListData.MarkerOffset + container.ListData.Padding) - { - AdvanceOffset(ln, container.ListData.MarkerOffset + container.ListData.Padding, true, ref offset, ref column, ref remainingSpaces); - } - else if (blank && container.FirstChild != null) - { - // if container->first_child is NULL, then the opening line - // of the list item was blank after the list marker; in this - // case, we are done with the list item. - AdvanceOffset(ln, first_nonspace - offset, false, ref offset, ref column, ref remainingSpaces); - } - else - { - all_matched = false; - } - - break; - } - - case BlockTag.IndentedCode: - { - if (indent >= CODE_INDENT) - AdvanceOffset(ln, CODE_INDENT, true, ref offset, ref column, ref remainingSpaces); - else if (blank) - AdvanceOffset(ln, first_nonspace - offset, false, ref offset, ref column, ref remainingSpaces); - else - all_matched = false; - - break; - } - - case BlockTag.AtxHeading: - case BlockTag.SetextHeading: - { - // a heading can never contain more than one line - all_matched = false; - if (blank) - container.IsLastLineBlank = true; - - break; - } - - case BlockTag.FencedCode: - { - // -1 means we've seen closer - if (container.FencedCodeData.FenceLength == -1) - { - all_matched = false; - if (blank) - container.IsLastLineBlank = true; - } - else - { - // skip optional spaces of fence offset - i = container.FencedCodeData.FenceOffset; - while (i > 0 && ln[offset] == ' ') - { - offset++; - column++; - i--; - } - } - - break; - } - - case BlockTag.HtmlBlock: - { - // all other block types can accept blanks - if (blank && container.HtmlBlockType >= HtmlBlockType.InterruptingBlock) - { - container.IsLastLineBlank = true; - all_matched = false; - } - - break; - } - - case BlockTag.Paragraph: - { - if (blank) - { - container.IsLastLineBlank = true; - all_matched = false; - } - - break; - } - } - - if (!all_matched) - { - container = container.Parent; // back up to last matching block - break; - } - } - - last_matched_container = container; - - // check to see if we've hit 2nd blank line, break out of list: - if (blank && container.IsLastLineBlank) - BreakOutOfLists(ref container, line, settings); - - var maybeLazy = cur.Tag == BlockTag.Paragraph; - - // unless last matched container is code block, try new container starts: - while (container.Tag != BlockTag.FencedCode && - container.Tag != BlockTag.IndentedCode && - container.Tag != BlockTag.HtmlBlock) - { - - FindFirstNonspace(ln, offset, column, out first_nonspace, out first_nonspace_column, out curChar); - - indent = first_nonspace_column - column + remainingSpaces; - blank = curChar == '\n'; - - var indented = indent >= CODE_INDENT; - - if (!indented && curChar == '>') - { - - AdvanceOffset(ln, first_nonspace + 1 - offset, false, ref offset, ref column, ref remainingSpaces); - AdvanceOptionalSpace(ln, ref offset, ref column, ref remainingSpaces); - - container = CreateChildBlock(container, line, settings, BlockTag.BlockQuote, first_nonspace); - - } - else if (!indented && curChar == '#' && 0 != (matched = Scanner.scan_atx_heading_start(ln, first_nonspace, ln.Length, out i))) - { - - AdvanceOffset(ln, first_nonspace + matched - offset, false, ref offset, ref column, ref remainingSpaces); - container = CreateChildBlock(container, line, settings, BlockTag.AtxHeading, first_nonspace); - container.Heading = new HeadingData(i); - - } - else if (!indented && (curChar == '`' || curChar == '~') && 0 != (matched = Scanner.scan_open_code_fence(ln, first_nonspace, ln.Length))) - { - - container = CreateChildBlock(container, line, settings, BlockTag.FencedCode, first_nonspace); - container.FencedCodeData = new FencedCodeData(); - container.FencedCodeData.FenceChar = curChar; - container.FencedCodeData.FenceLength = matched; - container.FencedCodeData.FenceOffset = first_nonspace - offset; - - AdvanceOffset(ln, first_nonspace + matched - offset, false, ref offset, ref column, ref remainingSpaces); - - } - else if (!indented && curChar == '<' && - (0 != (matched = (int)Scanner.scan_html_block_start(ln, first_nonspace, ln.Length)) - || (container.Tag != BlockTag.Paragraph && 0 != (matched = (int)Scanner.scan_html_block_start_7(ln, first_nonspace, ln.Length))) - )) - { - - container = CreateChildBlock(container, line, settings, BlockTag.HtmlBlock, first_nonspace); - container.HtmlBlockType = (HtmlBlockType)matched; - // note, we don't adjust offset because the tag is part of the text - - } - else if (!indented && container.Tag == BlockTag.Paragraph && (curChar == '=' || curChar == '-') - && 0 != (matched = Scanner.scan_setext_heading_line(ln, first_nonspace, ln.Length))) - { - - container.Tag = BlockTag.SetextHeading; - container.Heading = new HeadingData(matched); - AdvanceOffset(ln, ln.Length - 1 - offset, false, ref offset, ref column, ref remainingSpaces); - - } - else if (!indented - && !(container.Tag == BlockTag.Paragraph && !all_matched) - && 0 != (Scanner.scan_thematic_break(ln, first_nonspace, ln.Length))) - { - - // it's only now that we know the line is not part of a setext heading: - container = CreateChildBlock(container, line, settings, BlockTag.ThematicBreak, first_nonspace); - Finalize(container, line, settings); - container = container.Parent; - AdvanceOffset(ln, ln.Length - 1 - offset, false, ref offset, ref column, ref remainingSpaces); - - } - else if ((!indented || container.Tag == BlockTag.List) - && 0 != (matched = ParseListMarker(ln, first_nonspace, container.Tag == BlockTag.Paragraph, out data))) - { - - // compute padding: - AdvanceOffset(ln, first_nonspace + matched - offset, false, ref offset, ref column, ref remainingSpaces); - - var prevOffset = offset; - var prevColumn = column; - var prevRemainingSpaces = remainingSpaces; - - while (column - prevColumn <= CODE_INDENT) - { - if (!AdvanceOptionalSpace(ln, ref offset, ref column, ref remainingSpaces)) - break; - } - - // i = number of spaces after marker, up to 5 - if (column == prevColumn) - { - // no spaces at all - data.Padding = matched + 1; - } - else if (column - prevColumn > CODE_INDENT || ln[offset] == '\n') - { - data.Padding = matched + 1; - - // too many (or none) spaces, ignoring everything but the first one - offset = prevOffset; - column = prevColumn; - remainingSpaces = prevRemainingSpaces; - AdvanceOptionalSpace(ln, ref offset, ref column, ref remainingSpaces); - } - else - { - data.Padding = matched + column - prevColumn; - } - - // check container; if it's a list, see if this list item - // can continue the list; otherwise, create a list container. - - data.MarkerOffset = indent; - - if (container.Tag != BlockTag.List || !ListsMatch(container.ListData, data)) - { - container = CreateChildBlock(container, line, settings, BlockTag.List, first_nonspace); - container.ListData = data; - } - - // add the list item - container = CreateChildBlock(container, line, settings, BlockTag.ListItem, first_nonspace); - container.ListData = data; - } - else if (indented && !maybeLazy && !blank) - { - AdvanceOffset(ln, CODE_INDENT, true, ref offset, ref column, ref remainingSpaces); - container = CreateChildBlock(container, line, settings, BlockTag.IndentedCode, offset); - } - else - { - break; - } - - if (AcceptsLines(container.Tag)) - { - // if it's a line container, it can't contain other containers - break; - } - - maybeLazy = false; - } - - // what remains at offset is a text line. add the text to the - // appropriate container. - - FindFirstNonspace(ln, offset, column, out first_nonspace, out first_nonspace_column, out curChar); - indent = first_nonspace_column - column; - blank = curChar == '\n'; - - if (blank && container.LastChild != null) - { - container.LastChild.IsLastLineBlank = true; - } - - // block quote lines are never blank as they start with > - // and we don't count blanks in fenced code for purposes of tight/loose - // lists or breaking out of lists. we also don't set last_line_blank - // on an empty list item. - container.IsLastLineBlank = (blank && - container.Tag != BlockTag.BlockQuote && - container.Tag != BlockTag.SetextHeading && - container.Tag != BlockTag.FencedCode && - !(container.Tag == BlockTag.ListItem && - container.FirstChild == null && - container.SourcePosition >= line.LineOffset)); - - Block cont = container; - while (cont.Parent != null) - { - cont.Parent.IsLastLineBlank = false; - cont = cont.Parent; - } - - if (cur != last_matched_container && - container == last_matched_container && - !blank && - cur.Tag == BlockTag.Paragraph && - cur.StringContent.Length > 0) - { - - AddLine(cur, line, ln, offset, remainingSpaces); - - } - else - { // not a lazy continuation - - // finalize any blocks that were not matched and set cur to container: - while (cur != last_matched_container) - { - - Finalize(cur, line, settings); - cur = cur.Parent; - - if (cur == null) - throw new CommonMarkException("Cannot finalize container block. Last matched container tag = " + last_matched_container.Tag); - - } - - if (container.Tag == BlockTag.IndentedCode) - { - AddLine(container, line, ln, offset, remainingSpaces); - - } - else if (container.Tag == BlockTag.FencedCode) - { - - if ((indent <= 3 - && curChar == container.FencedCodeData.FenceChar) - && (0 != Scanner.scan_close_code_fence(ln, first_nonspace, container.FencedCodeData.FenceLength, ln.Length))) - { - // if closing fence, set fence length to -1. it will be closed when the next line is processed. - container.FencedCodeData.FenceLength = -1; - } - else - { - AddLine(container, line, ln, offset, remainingSpaces); - } - - } - else if (container.Tag == BlockTag.HtmlBlock) - { - - AddLine(container, line, ln, offset, remainingSpaces); - - if (Scanner.scan_html_block_end(container.HtmlBlockType, ln, first_nonspace, ln.Length)) - { - Finalize(container, line, settings); - container = container.Parent; - } - - } - else if (blank) - { - - // ??? do nothing - - } - else if (container.Tag == BlockTag.AtxHeading) - { - - int p = ln.Length - 1; - - // trim trailing spaces - while (p >= 0 && (ln[p] == ' ' || ln[p] == '\t' || ln[p] == '\n')) - p--; - - int px = p; - - // if string ends in #s, remove these: - while (p >= 0 && ln[p] == '#') - p--; - - // there must be a space before the last hashtag - if (p < 0 || (ln[p] != ' ' && ln[p] != '\t')) - p = px; - - // trim trailing spaces that are before the closing #s - while (p >= first_nonspace && (ln[p] == ' ' || ln[p] == '\t')) - p--; - - AddLine(container, line, ln, first_nonspace, remainingSpaces, p - first_nonspace + 1); - Finalize(container, line, settings); - container = container.Parent; - - } - else if (AcceptsLines(container.Tag)) - { - - AddLine(container, line, ln, first_nonspace, remainingSpaces); - - } - else if (container.Tag != BlockTag.ThematicBreak && container.Tag != BlockTag.SetextHeading) - { - - // create paragraph container for line - container = CreateChildBlock(container, line, settings, BlockTag.Paragraph, first_nonspace); - AddLine(container, line, ln, first_nonspace, remainingSpaces); - - } - else - { - - Utilities.Warning("Line {0} with container type {1} did not match any condition:\n\"{2}\"", line.LineNumber, container.Tag, ln); - - } - - curptr = container; - } - } - - private static void FindFirstNonspace(string ln, int offset, int column, out int first_nonspace, - out int first_nonspace_column, out char curChar) - { - var chars_to_tab = TabSize - (column % TabSize); - first_nonspace = offset; - first_nonspace_column = column; - while ((curChar = ln[first_nonspace]) != '\n') - { - if (curChar == ' ') - { - first_nonspace++; - first_nonspace_column++; - chars_to_tab--; - if (chars_to_tab == 0) chars_to_tab = TabSize; - } - else if (curChar == '\t') - { - first_nonspace++; - first_nonspace_column += chars_to_tab; - chars_to_tab = TabSize; - } - else - { - break; - } - } - } - } + child.Parent = parent; + child.Top = parent.Top; + + var lastChild = parent.LastChild; + if (lastChild != null) + { + lastChild.NextSibling = child; + } + else + { + parent.FirstChild = child; + } + + parent.LastChild = child; + return child; + } + + private static void AdjustInlineSourcePosition(Inline inline, PositionTracker tracker, ref Stack stack) + { + if (stack == null) + stack = new Stack(); + + while (inline != null) + { + inline.SourcePosition = tracker.CalculateInlineOrigin(inline.SourcePosition, true); + inline.SourceLastPosition = tracker.CalculateInlineOrigin(inline.SourceLastPosition, false); + + if (inline.FirstChild != null) + { + if (inline.NextSibling != null) + stack.Push(inline.NextSibling); + + inline = inline.FirstChild; + } + else if (inline.NextSibling != null) + { + inline = inline.NextSibling; + } + else if (stack.Count > 0) + { + inline = stack.Pop(); + } + else + { + inline = null; + } + } + + } + + /// + /// Walk through the block, its children and siblings, parsing string content into inline content where appropriate. + /// + /// The document level block from which to start the processing. + /// Document data. + /// The settings that influence how the inline parsing is performed. + public static void ProcessInlines(Block block, DocumentData data, CommonMarkSettings settings) + { + Stack inlineStack = null; + var stack = new Stack(); + var parsers = settings.InlineParsers; + var specialCharacters = settings.InlineParserSpecialCharacters; + var subj = new Subject(data); + + StringContent sc; + int delta; + + while (block != null) + { + var tag = block.Tag; + if (tag == BlockTag.Paragraph || tag == BlockTag.AtxHeading || tag == BlockTag.SetextHeading || tag == BlockTag.TableCell) + { + sc = block.StringContent; + if (sc != null) + { + sc.FillSubject(subj); + delta = subj.Position; + + block.InlineContent = InlineMethods.parse_inlines(subj, parsers, specialCharacters); + block.StringContent = null; + + if (sc.PositionTracker != null) + { + sc.PositionTracker.AddBlockOffset(-delta); + AdjustInlineSourcePosition(block.InlineContent, sc.PositionTracker, ref inlineStack); + } + } + } + + if (block.FirstChild != null) + { + if (block.NextSibling != null) + stack.Push(block.NextSibling); + + block = block.FirstChild; + } + else if (block.NextSibling != null) + { + block = block.NextSibling; + } + else if (stack.Count > 0) + { + block = stack.Pop(); + } + else + { + block = null; + } + } + } + + /// + /// Attempts to parse a list item marker (bullet or enumerated). + /// On success, returns length of the marker, and populates + /// data with the details. On failure, returns 0. + /// + /// Original: int parse_list_marker(string ln, int pos, ref ListData dataptr) + private static int ParseListMarker(string ln, int pos, bool interruptsParagraph, out ListData data) + { + char c; + int startpos; + data = null; + var len = ln.Length; + + startpos = pos; + c = ln[pos]; + + if (c == '+' || c == '•' || ((c == '*' || c == '-') && 0 == Scanner.scan_thematic_break(ln, pos, len))) + { + pos++; + + if (pos == len || !Utilities.IsWhitespace(ln[pos])) + return 0; + + if (interruptsParagraph && Scanner.scan_spacechars(ln, pos + 1, ln.Length) == ln.Length - pos - 1) + return 0; + + data = new ListData(); + data.BulletChar = c; + data.Start = 1; + } + else if (c >= '0' && c <= '9') + { + + int start = c - '0'; + + while (pos < len - 1) + { + c = ln[++pos]; + // We limit to 9 digits to avoid overflow, This also seems to be the limit for 'start' in some browsers. + if (c >= '0' && c <= '9' && start < 100000000) + start = start * 10 + (c - '0'); + else + break; + } + + if (pos >= len - 1 || (c != '.' && c != ')')) + return 0; + + pos++; + if (pos == len || !Utilities.IsWhitespace(ln[pos])) + return 0; + + if (interruptsParagraph && + (start != 1 || Scanner.scan_spacechars(ln, pos + 1, ln.Length) == ln.Length - pos - 1)) + return 0; + + data = new ListData(); + data.ListType = ListType.Ordered; + data.BulletChar = '\0'; + data.Start = start; + data.Delimiter = (c == '.' ? ListDelimiter.Period : ListDelimiter.Parenthesis); + + } + else + { + return 0; + } + + return (pos - startpos); + } + + private static bool ListsMatch(ListData listData, ListData itemData) + { + return (listData.ListType == itemData.ListType && + listData.Delimiter == itemData.Delimiter && + // list_data.marker_offset == item_data.marker_offset && + listData.BulletChar == itemData.BulletChar); + } + + private static bool AdvanceOptionalSpace(string line, ref int offset, ref int column, ref int remainingSpaces) + { + if (remainingSpaces > 0) + { + remainingSpaces--; + return true; + } + + var c = line[offset]; + if (c == ' ') + { + offset++; + column++; + return true; + } + else if (c == '\t') + { + offset++; + var chars_to_tab = 4 - (column % TabSize); + column += chars_to_tab; + remainingSpaces = chars_to_tab - 1; + return true; + } + + return false; + } + + private static void AdvanceOffset(string line, int count, bool columns, ref int offset, ref int column, ref int remainingSpaces) + { + if (columns) + { + if (remainingSpaces > count) + { + remainingSpaces -= count; + count = 0; + } + else + { + count -= remainingSpaces; + remainingSpaces = 0; + } + } + else + { + remainingSpaces = 0; + } + + char c; + while (count > 0 && (c = line[offset]) != '\n') + { + if (c == '\t') + { + var chars_to_tab = 4 - (column % TabSize); + column += chars_to_tab; + offset += 1; + count -= columns ? chars_to_tab : 1; + + if (count < 0) + { + remainingSpaces = 0 - count; + } + } + else + { + offset += 1; + column += 1; // assume ascii; block starts are ascii + count -= 1; + } + } + } + + // Process one line at a time, modifying a block. + // Returns 0 if successful. curptr is changed to point to + // the currently open block. + public static void IncorporateLine(LineInfo line, ref Block curptr, CommonMarkSettings settings) + { + var ln = line.Line; + + Block last_matched_container; + + // offset is the char position in the line + var offset = 0; + + // column is the virtual position in the line that takes TAB expansion into account + var column = 0; + + // the adjustment to the virtual position `column` that points to the number of spaces from the TAB that have not been included in any indent. + var remainingSpaces = 0; + + // the char position of the first non-space char + int first_nonspace; + + // the virtual position of the first non-space chart, that includes TAB expansion + int first_nonspace_column; + + int matched; + int i; + ListData data; + bool all_matched = true; + Block cur = curptr; + var blank = false; + char curChar; + int indent; + + // container starts at the document root. + var container = cur.Top; + + // for each containing block, try to parse the associated line start. + // bail out on failure: container will point to the last matching block. + + while (container.LastChild != null && container.LastChild.IsOpen) + { + container = container.LastChild; + + FindFirstNonspace(ln, offset, column, out first_nonspace, out first_nonspace_column, out curChar); + + indent = first_nonspace_column - column + remainingSpaces; + blank = curChar == '\n'; + + switch (container.Tag) + { + case BlockTag.BlockQuote: + { + if (indent <= 3 && curChar == '>') + { + AdvanceOffset(ln, indent + 1, true, ref offset, ref column, ref remainingSpaces); + AdvanceOptionalSpace(ln, ref offset, ref column, ref remainingSpaces); + } + else + { + all_matched = false; + } + + break; + } + + case BlockTag.ListItem: + { + if (indent >= container.ListData.MarkerOffset + container.ListData.Padding) + { + AdvanceOffset(ln, container.ListData.MarkerOffset + container.ListData.Padding, true, ref offset, ref column, ref remainingSpaces); + } + else if (blank && container.FirstChild != null) + { + // if container->first_child is NULL, then the opening line + // of the list item was blank after the list marker; in this + // case, we are done with the list item. + AdvanceOffset(ln, first_nonspace - offset, false, ref offset, ref column, ref remainingSpaces); + } + else + { + all_matched = false; + } + + break; + } + + case BlockTag.IndentedCode: + { + if (indent >= CODE_INDENT) + AdvanceOffset(ln, CODE_INDENT, true, ref offset, ref column, ref remainingSpaces); + else if (blank) + AdvanceOffset(ln, first_nonspace - offset, false, ref offset, ref column, ref remainingSpaces); + else + all_matched = false; + + break; + } + + case BlockTag.AtxHeading: + case BlockTag.SetextHeading: + { + // a heading can never contain more than one line + all_matched = false; + if (blank) + container.IsLastLineBlank = true; + + break; + } + + case BlockTag.FencedCode: + { + // -1 means we've seen closer + if (container.FencedCodeData.FenceLength == -1) + { + all_matched = false; + if (blank) + container.IsLastLineBlank = true; + } + else + { + // skip optional spaces of fence offset + i = container.FencedCodeData.FenceOffset; + while (i > 0 && ln[offset] == ' ') + { + offset++; + column++; + i--; + } + } + + break; + } + + case BlockTag.HtmlBlock: + { + // all other block types can accept blanks + if (blank && container.HtmlBlockType >= HtmlBlockType.InterruptingBlock) + { + container.IsLastLineBlank = true; + all_matched = false; + } + + break; + } + + case BlockTag.Paragraph: + { + if (blank) + { + container.IsLastLineBlank = true; + all_matched = false; + } + + break; + } + } + + if (!all_matched) + { + container = container.Parent; // back up to last matching block + break; + } + } + + last_matched_container = container; + + // check to see if we've hit 2nd blank line, break out of list: + if (blank && container.IsLastLineBlank) + BreakOutOfLists(ref container, line, settings); + + var maybeLazy = cur.Tag == BlockTag.Paragraph; + + // unless last matched container is code block, try new container starts: + while (container.Tag != BlockTag.FencedCode && + container.Tag != BlockTag.IndentedCode && + container.Tag != BlockTag.HtmlBlock) + { + + FindFirstNonspace(ln, offset, column, out first_nonspace, out first_nonspace_column, out curChar); + + indent = first_nonspace_column - column + remainingSpaces; + blank = curChar == '\n'; + + var indented = indent >= CODE_INDENT; + + if (!indented && curChar == '>') + { + + AdvanceOffset(ln, first_nonspace + 1 - offset, false, ref offset, ref column, ref remainingSpaces); + AdvanceOptionalSpace(ln, ref offset, ref column, ref remainingSpaces); + + container = CreateChildBlock(container, line, settings, BlockTag.BlockQuote, first_nonspace); + + } + else if (!indented && curChar == '#' && 0 != (matched = Scanner.scan_atx_heading_start(ln, first_nonspace, ln.Length, out i))) + { + + AdvanceOffset(ln, first_nonspace + matched - offset, false, ref offset, ref column, ref remainingSpaces); + container = CreateChildBlock(container, line, settings, BlockTag.AtxHeading, first_nonspace); + container.Heading = new HeadingData(i); + + } + else if (!indented && (curChar == '`' || curChar == '~') && 0 != (matched = Scanner.scan_open_code_fence(ln, first_nonspace, ln.Length))) + { + + container = CreateChildBlock(container, line, settings, BlockTag.FencedCode, first_nonspace); + container.FencedCodeData = new FencedCodeData(); + container.FencedCodeData.FenceChar = curChar; + container.FencedCodeData.FenceLength = matched; + container.FencedCodeData.FenceOffset = first_nonspace - offset; + + AdvanceOffset(ln, first_nonspace + matched - offset, false, ref offset, ref column, ref remainingSpaces); + + } + else if (!indented && curChar == '<' && + (0 != (matched = (int)Scanner.scan_html_block_start(ln, first_nonspace, ln.Length)) + || (container.Tag != BlockTag.Paragraph && 0 != (matched = (int)Scanner.scan_html_block_start_7(ln, first_nonspace, ln.Length))) + )) + { + + container = CreateChildBlock(container, line, settings, BlockTag.HtmlBlock, first_nonspace); + container.HtmlBlockType = (HtmlBlockType)matched; + // note, we don't adjust offset because the tag is part of the text + + } + else if (!indented && container.Tag == BlockTag.Paragraph && (curChar == '=' || curChar == '-') + && 0 != (matched = Scanner.scan_setext_heading_line(ln, first_nonspace, ln.Length))) + { + + container.Tag = BlockTag.SetextHeading; + container.Heading = new HeadingData(matched); + AdvanceOffset(ln, ln.Length - 1 - offset, false, ref offset, ref column, ref remainingSpaces); + + } + else if (!indented + && !(container.Tag == BlockTag.Paragraph && !all_matched) + && 0 != (Scanner.scan_thematic_break(ln, first_nonspace, ln.Length))) + { + + // it's only now that we know the line is not part of a setext heading: + container = CreateChildBlock(container, line, settings, BlockTag.ThematicBreak, first_nonspace); + Finalize(container, line, settings); + container = container.Parent; + AdvanceOffset(ln, ln.Length - 1 - offset, false, ref offset, ref column, ref remainingSpaces); + + } + else if ((!indented || container.Tag == BlockTag.List) + && 0 != (matched = ParseListMarker(ln, first_nonspace, container.Tag == BlockTag.Paragraph, out data))) + { + + // compute padding: + AdvanceOffset(ln, first_nonspace + matched - offset, false, ref offset, ref column, ref remainingSpaces); + + var prevOffset = offset; + var prevColumn = column; + var prevRemainingSpaces = remainingSpaces; + + while (column - prevColumn <= CODE_INDENT) + { + if (!AdvanceOptionalSpace(ln, ref offset, ref column, ref remainingSpaces)) + break; + } + + // i = number of spaces after marker, up to 5 + if (column == prevColumn) + { + // no spaces at all + data.Padding = matched + 1; + } + else if (column - prevColumn > CODE_INDENT || ln[offset] == '\n') + { + data.Padding = matched + 1; + + // too many (or none) spaces, ignoring everything but the first one + offset = prevOffset; + column = prevColumn; + remainingSpaces = prevRemainingSpaces; + AdvanceOptionalSpace(ln, ref offset, ref column, ref remainingSpaces); + } + else + { + data.Padding = matched + column - prevColumn; + } + + // check container; if it's a list, see if this list item + // can continue the list; otherwise, create a list container. + + data.MarkerOffset = indent; + + if (container.Tag != BlockTag.List || !ListsMatch(container.ListData, data)) + { + container = CreateChildBlock(container, line, settings, BlockTag.List, first_nonspace); + container.ListData = data; + } + + // add the list item + container = CreateChildBlock(container, line, settings, BlockTag.ListItem, first_nonspace); + container.ListData = data; + } + else if (indented && !maybeLazy && !blank) + { + AdvanceOffset(ln, CODE_INDENT, true, ref offset, ref column, ref remainingSpaces); + container = CreateChildBlock(container, line, settings, BlockTag.IndentedCode, offset); + } + else + { + break; + } + + if (AcceptsLines(container.Tag)) + { + // if it's a line container, it can't contain other containers + break; + } + + maybeLazy = false; + } + + // what remains at offset is a text line. add the text to the + // appropriate container. + + FindFirstNonspace(ln, offset, column, out first_nonspace, out first_nonspace_column, out curChar); + indent = first_nonspace_column - column; + blank = curChar == '\n'; + + if (blank && container.LastChild != null) + { + container.LastChild.IsLastLineBlank = true; + } + + // block quote lines are never blank as they start with > + // and we don't count blanks in fenced code for purposes of tight/loose + // lists or breaking out of lists. we also don't set last_line_blank + // on an empty list item. + container.IsLastLineBlank = (blank && + container.Tag != BlockTag.BlockQuote && + container.Tag != BlockTag.SetextHeading && + container.Tag != BlockTag.FencedCode && + !(container.Tag == BlockTag.ListItem && + container.FirstChild == null && + container.SourcePosition >= line.LineOffset)); + + Block cont = container; + while (cont.Parent != null) + { + cont.Parent.IsLastLineBlank = false; + cont = cont.Parent; + } + + if (cur != last_matched_container && + container == last_matched_container && + !blank && + cur.Tag == BlockTag.Paragraph && + cur.StringContent.Length > 0) + { + + AddLine(cur, line, ln, offset, remainingSpaces); + + } + else + { // not a lazy continuation + + // finalize any blocks that were not matched and set cur to container: + while (cur != last_matched_container) + { + + Finalize(cur, line, settings); + cur = cur.Parent; + + if (cur == null) + throw new CommonMarkException("Cannot finalize container block. Last matched container tag = " + last_matched_container.Tag); + + } + + if (container.Tag == BlockTag.IndentedCode) + { + AddLine(container, line, ln, offset, remainingSpaces); + + } + else if (container.Tag == BlockTag.FencedCode) + { + + if ((indent <= 3 + && curChar == container.FencedCodeData.FenceChar) + && (0 != Scanner.scan_close_code_fence(ln, first_nonspace, container.FencedCodeData.FenceLength, ln.Length))) + { + // if closing fence, set fence length to -1. it will be closed when the next line is processed. + container.FencedCodeData.FenceLength = -1; + } + else + { + AddLine(container, line, ln, offset, remainingSpaces); + } + + } + else if (container.Tag == BlockTag.HtmlBlock) + { + + AddLine(container, line, ln, offset, remainingSpaces); + + if (Scanner.scan_html_block_end(container.HtmlBlockType, ln, first_nonspace, ln.Length)) + { + Finalize(container, line, settings); + container = container.Parent; + } + + } + else if (blank) + { + + // ??? do nothing + + } + else if (container.Tag == BlockTag.AtxHeading) + { + + int p = ln.Length - 1; + + // trim trailing spaces + while (p >= 0 && (ln[p] == ' ' || ln[p] == '\t' || ln[p] == '\n')) + p--; + + int px = p; + + // if string ends in #s, remove these: + while (p >= 0 && ln[p] == '#') + p--; + + // there must be a space before the last hashtag + if (p < 0 || (ln[p] != ' ' && ln[p] != '\t')) + p = px; + + // trim trailing spaces that are before the closing #s + while (p >= first_nonspace && (ln[p] == ' ' || ln[p] == '\t')) + p--; + + AddLine(container, line, ln, first_nonspace, remainingSpaces, p - first_nonspace + 1); + Finalize(container, line, settings); + container = container.Parent; + + } + else if (AcceptsLines(container.Tag)) + { + + AddLine(container, line, ln, first_nonspace, remainingSpaces); + + } + else if (container.Tag != BlockTag.ThematicBreak && container.Tag != BlockTag.SetextHeading) + { + + // create paragraph container for line + container = CreateChildBlock(container, line, settings, BlockTag.Paragraph, first_nonspace); + AddLine(container, line, ln, first_nonspace, remainingSpaces); + + } + else + { + + Utilities.Warning("Line {0} with container type {1} did not match any condition:\n\"{2}\"", line.LineNumber, container.Tag, ln); + + } + + curptr = container; + } + } + + private static void FindFirstNonspace(string ln, int offset, int column, out int first_nonspace, + out int first_nonspace_column, out char curChar) + { + var chars_to_tab = TabSize - (column % TabSize); + first_nonspace = offset; + first_nonspace_column = column; + while ((curChar = ln[first_nonspace]) != '\n') + { + if (curChar == ' ') + { + first_nonspace++; + first_nonspace_column++; + chars_to_tab--; + if (chars_to_tab == 0) chars_to_tab = TabSize; + } + else if (curChar == '\t') + { + first_nonspace++; + first_nonspace_column += chars_to_tab; + chars_to_tab = TabSize; + } + else + { + break; + } + } + } + } } diff --git a/CommonMark/Syntax/Block.cs b/CommonMark/Syntax/Block.cs index 177f83c..7a0488a 100644 --- a/CommonMark/Syntax/Block.cs +++ b/CommonMark/Syntax/Block.cs @@ -97,7 +97,7 @@ internal static Block CreateDocument() public int EndLine { get; set; } /// - /// Gets or sets the position of the block element within the source data. This position is before + /// Gets or sets the position of the block element within the source data. This position is before /// any opening characters. must be enabled /// for this value to be defined. /// @@ -136,7 +136,7 @@ public int SourceLength public Block FirstChild { get; set; } /// - /// Gets or sets the last child element (the last sibling of ) of this instance. + /// Gets or sets the last child element (the last sibling of ) of this instance. /// if there are no children. /// public Block LastChild { get; set; } @@ -177,7 +177,7 @@ public int SourceLength /// /// Gets or sets the alignment specified as part of a table heading in a GithubStyleTables. /// - public List TableHeaderAlignments { get; set; } + public TableHeaderAlignment[] TableHeaderAlignments { get; set; } /// /// Gets or sets the additional properties that apply to heading elements. diff --git a/CommonMark/Utilities.cs b/CommonMark/Utilities.cs index 57a8c03..a7f7b18 100644 --- a/CommonMark/Utilities.cs +++ b/CommonMark/Utilities.cs @@ -4,116 +4,137 @@ namespace CommonMark { - /// - /// Reusable utility functions, not directly related to parsing or formatting data. - /// - internal static class Utilities - { - /// - /// Writes a warning to the Debug window. - /// - /// The message with optional formatting placeholders. - /// The arguments for the formatting placeholders. - [System.Diagnostics.Conditional("DEBUG")] - public static void Warning(string message, params object[] args) - { - if (args != null && args.Length > 0) - message = string.Format(System.Globalization.CultureInfo.InvariantCulture, message, args); - - System.Diagnostics.Debug.WriteLine(message, "Warning"); - } + /// + /// Reusable utility functions, not directly related to parsing or formatting data. + /// + internal static class Utilities + { + /// + /// Writes a warning to the Debug window. + /// + /// The message with optional formatting placeholders. + /// The arguments for the formatting placeholders. + [System.Diagnostics.Conditional("DEBUG")] + public static void Warning(string message, params object[] args) + { + if (args != null && args.Length > 0) + message = string.Format(System.Globalization.CultureInfo.InvariantCulture, message, args); + + System.Diagnostics.Debug.WriteLine(message, "Warning"); + } #if OptimizeFor45 [System.Runtime.CompilerServices.MethodImpl(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)] #endif - public static bool IsEscapableSymbol(char c) - { - // char.IsSymbol also works with Unicode symbols that cannot be escaped based on the specification. - return (c > ' ' && c < '0') || (c > '9' && c < 'A') || (c > 'Z' && c < 'a') || (c > 'z' && c < 127) || c == '•'; - } + public static bool IsEscapableSymbol(char c) + { + // char.IsSymbol also works with Unicode symbols that cannot be escaped based on the specification. + return (c > ' ' && c < '0') || (c > '9' && c < 'A') || (c > 'Z' && c < 'a') || (c > 'z' && c < 127) || c == '•'; + } #if OptimizeFor45 [System.Runtime.CompilerServices.MethodImpl(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)] #endif - public static bool IsAsciiLetter(char c) - { - return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); - } + public static bool IsAsciiLetter(char c) + { + return (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z'); + } #if OptimizeFor45 [System.Runtime.CompilerServices.MethodImpl(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)] #endif - public static bool IsAsciiLetterOrDigit(char c) - { - return (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z'); - } + public static bool IsAsciiLetterOrDigit(char c) + { + return (c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || (c >= 'A' && c <= 'Z'); + } #if OptimizeFor45 [System.Runtime.CompilerServices.MethodImpl(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)] #endif - public static bool IsWhitespace(char c) - { - return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f'; - } - - /// - /// Checks if the given character is an Unicode space or punctuation character. - /// + public static bool IsWhitespace(char c) + { + return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f'; + } + + /// + /// Checks if the given character is an Unicode space or punctuation character. + /// #if OptimizeFor45 [System.Runtime.CompilerServices.MethodImpl(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)] #endif - public static void CheckUnicodeCategory(char c, out bool space, out bool punctuation) - { - // This method does the same as would calling the two built-in methods: - // // space = char.IsWhiteSpace(c); - // // punctuation = char.IsPunctuation(c); - // - // The performance benefit for using this method is ~50% when calling only on ASCII characters - // and ~12% when calling only on Unicode characters. - - if (c <= 'ÿ') - { - space = c == ' ' || (c >= '\t' && c <= '\r') || c == '\u00a0' || c == '\u0085'; - punctuation = (c >= 33 && c <= 47) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126); - } - else - { - var category = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(c); - space = category == System.Globalization.UnicodeCategory.SpaceSeparator - || category == System.Globalization.UnicodeCategory.LineSeparator - || category == System.Globalization.UnicodeCategory.ParagraphSeparator; - punctuation = !space && - (category == System.Globalization.UnicodeCategory.ConnectorPunctuation - || category == System.Globalization.UnicodeCategory.DashPunctuation - || category == System.Globalization.UnicodeCategory.OpenPunctuation - || category == System.Globalization.UnicodeCategory.ClosePunctuation - || category == System.Globalization.UnicodeCategory.InitialQuotePunctuation - || category == System.Globalization.UnicodeCategory.FinalQuotePunctuation - || category == System.Globalization.UnicodeCategory.OtherPunctuation); - } - } - - /// - /// Determines if the first line (ignoring the first ) of a string contains only spaces. - /// - public static bool IsFirstLineBlank(string source, int startIndex) - { - char c; - var lastIndex = source.Length; - - while (startIndex < lastIndex) - { - c = source[startIndex]; - if (c == '\n') - return true; - - if (c != ' ') - return false; - - startIndex++; - } - - return true; - } - } + public static void CheckUnicodeCategory(char c, out bool space, out bool punctuation) + { + // This method does the same as would calling the two built-in methods: + // // space = char.IsWhiteSpace(c); + // // punctuation = char.IsPunctuation(c); + // + // The performance benefit for using this method is ~50% when calling only on ASCII characters + // and ~12% when calling only on Unicode characters. + + if (c <= 'ÿ') + { + space = c == ' ' || (c >= '\t' && c <= '\r') || c == '\u00a0' || c == '\u0085'; + punctuation = (c >= 33 && c <= 47) || (c >= 58 && c <= 64) || (c >= 91 && c <= 96) || (c >= 123 && c <= 126); + } + else + { + var category = System.Globalization.CharUnicodeInfo.GetUnicodeCategory(c); + space = category == System.Globalization.UnicodeCategory.SpaceSeparator + || category == System.Globalization.UnicodeCategory.LineSeparator + || category == System.Globalization.UnicodeCategory.ParagraphSeparator; + punctuation = !space && + (category == System.Globalization.UnicodeCategory.ConnectorPunctuation + || category == System.Globalization.UnicodeCategory.DashPunctuation + || category == System.Globalization.UnicodeCategory.OpenPunctuation + || category == System.Globalization.UnicodeCategory.ClosePunctuation + || category == System.Globalization.UnicodeCategory.InitialQuotePunctuation + || category == System.Globalization.UnicodeCategory.FinalQuotePunctuation + || category == System.Globalization.UnicodeCategory.OtherPunctuation); + } + } + + /// + /// Determines if the first line (ignoring the first ) of a string contains only spaces. + /// + public static bool IsFirstLineBlank(string source, int startIndex) + { + char c; + var lastIndex = source.Length; + + while (startIndex < lastIndex) + { + c = source[startIndex]; + if (c == '\n') + return true; + + if (c != ' ') + return false; + + startIndex++; + } + + return true; + } + + + /// + /// Clears the contents of the string builder. + /// + /// + /// The to clear. + /// + public static void ClearStringBuilder(StringBuilder value) + { +#if OptimizeFor20 + value.Length = 0; + value.Capacity = 0; +#elif OptimizeFor35 + value.Length = 0; + value.Capacity = 0; +#else + value.Clear(); +#endif + } + + } } From 28e867f008d8b6dc59b002e78efaed2ff6b6c99d Mon Sep 17 00:00:00 2001 From: Viktor Ekblom Date: Wed, 22 Mar 2017 15:40:29 +0100 Subject: [PATCH 3/4] Moved all table methods to separet file --- CommonMark/CommonMark.Base.csproj | 1 + CommonMark/Parser/BlockMethods.cs | 368 +---------------------------- CommonMark/Parser/TableMethods.cs | 373 ++++++++++++++++++++++++++++++ 3 files changed, 375 insertions(+), 367 deletions(-) create mode 100644 CommonMark/Parser/TableMethods.cs diff --git a/CommonMark/CommonMark.Base.csproj b/CommonMark/CommonMark.Base.csproj index f05c409..116982e 100644 --- a/CommonMark/CommonMark.Base.csproj +++ b/CommonMark/CommonMark.Base.csproj @@ -59,6 +59,7 @@ + diff --git a/CommonMark/Parser/BlockMethods.cs b/CommonMark/Parser/BlockMethods.cs index c58a7fa..4bc1c8f 100644 --- a/CommonMark/Parser/BlockMethods.cs +++ b/CommonMark/Parser/BlockMethods.cs @@ -102,370 +102,6 @@ private static void BreakOutOfLists(ref Block blockRef, LineInfo line, CommonMar } } - static List ParseTableLine(StringPart part, StringBuilder sb) - { - string line = part.Source.Substring(part.StartIndex, part.Length); - line = line.TrimEnd('\n'); - - var ret = new List(); - - var i = 0; - - if (i < line.Length && line[i] == '|') i++; - - while (i < line.Length && char.IsWhiteSpace(line[i])) i++; - - for (; i < line.Length; i++) - { - var c = line[i]; - if (c == '\\') - { - i++; - if (i < line.Length && line[i] == '|') - { - sb.Append(line[i]); - continue; - } - i--; - } - - if (c == '|') - { - ret.Add(sb.ToString()); - Utilities.ClearStringBuilder(sb); - } - else - { - sb.Append(c); - } - } - - if (sb.Length != 0) - { - ret.Add(sb.ToString()); - Utilities.ClearStringBuilder(sb); - } - - return ret; - } - - static void MakeTableCells(Block row, StringBuilder sb) - { - var offset = 0; - - var parts = row.StringContent.RetrieveParts(); - foreach (var part in parts.Array) - { - if (part.Length <= 0) - continue; - - string asStr = part.Source.Substring(part.StartIndex, part.Length); - - for (var i = 0; i < asStr.Length; i++) - { - var c = asStr[i]; - - if (c == '|') - { - var text = sb.ToString(); - Utilities.ClearStringBuilder(sb); - - if (text.Length > 0) - { - int length = text.Length; - string trimmedText = text.TrimStart(); - var leadingWhiteSpace = length - trimmedText.Length; - trimmedText = trimmedText.TrimEnd(); - var trailingWhiteSpace = length - leadingWhiteSpace - text.Length; - - var cell = new Block(BlockTag.TableCell, row.SourcePosition + offset + leadingWhiteSpace); - cell.SourceLastPosition = cell.SourcePosition + trimmedText.Length; - cell.StringContent = new StringContent(); - cell.StringContent.Append(trimmedText, 0, trimmedText.Length); - - if (row.LastChild == null) - { - row.FirstChild = row.LastChild = cell; - } - else - { - row.LastChild.NextSibling = cell; - row.LastChild = cell; - } - - cell.IsOpen = false; - } - - offset += text.Length; - - // skip the | - offset++; - continue; - } - - if (c == '\\') - { - sb.Append(c); - if (i + 1 < asStr.Length) - { - if (Utilities.IsEscapableSymbol(asStr[i + 1])) - sb.Append(asStr[i + 1]); - } - i++; - } - else - { - sb.Append(c); - } - } - } - - if (sb.Length > 0) - { - var text = sb.ToString(); - Utilities.ClearStringBuilder(sb); - - if (text.Length > 0) - { - var leadingWhiteSpace = 0; - while (leadingWhiteSpace < text.Length && char.IsWhiteSpace(text[leadingWhiteSpace])) leadingWhiteSpace++; - var trailingWhiteSpace = 0; - while (trailingWhiteSpace < text.Length && char.IsWhiteSpace(text[text.Length - trailingWhiteSpace - 1])) trailingWhiteSpace++; - - if (text.Length - leadingWhiteSpace - trailingWhiteSpace > 0) - { - var cell = new Block(BlockTag.TableCell, row.SourcePosition + offset + leadingWhiteSpace); - cell.SourceLastPosition = cell.SourcePosition + text.Length - trailingWhiteSpace - leadingWhiteSpace; - cell.StringContent = new StringContent(); - cell.StringContent.Append(text, leadingWhiteSpace, text.Length - leadingWhiteSpace - trailingWhiteSpace); - - if (row.LastChild == null) - { - row.FirstChild = row.LastChild = cell; - } - else - { - row.LastChild.NextSibling = cell; - row.LastChild = cell; - } - - cell.IsOpen = false; - } - } - } - } - - static void MakeTableRows(Block table, StringBuilder sb) - { - var parts = table.StringContent.RetrieveParts(); - var offset = 0; - - for (var i = 0; i < parts.Array.Length; i++) - { - var line = parts.Array[i]; - if (line.Length <= 0) - continue; - - var lineLength = line.Length; - string actualLine = line.Source.Substring(line.StartIndex, line.Length); - - // skip the header row - if (i != 1 && !string.IsNullOrEmpty(actualLine) && actualLine != " ") - { - var rowStartsInDocument = table.SourcePosition + offset; - var row = new Block(BlockTag.TableRow, rowStartsInDocument); - row.SourceLastPosition = rowStartsInDocument + lineLength; - - row.StringContent = new StringContent(); - row.StringContent.Append(actualLine, 0, actualLine.Length); - - if (table.LastChild == null) - { - table.FirstChild = row; - table.LastChild = row; - } - else - { - table.LastChild.NextSibling = row; - table.LastChild = row; - } - - MakeTableCells(row, sb); - row.IsOpen = false; - } - - offset += lineLength; - } - } - - static bool TryMakeTable(Block b, LineInfo line, CommonMarkSettings settings) - { - if ((settings.AdditionalFeatures & CommonMarkAdditionalFeatures.GithubStyleTables) == 0) return false; - - var parts = b.StringContent.RetrieveParts().Array; - - if (parts.Length < 2) return false; - - var sb = new StringBuilder(); - - var columnsPart = parts[0]; - var columnsLine = ParseTableLine(columnsPart, sb); - if (columnsLine.Count == 1) return false; - - var headersPart = parts[1]; - var headerLine = ParseTableLine(headersPart, sb); - if (headerLine.Count == 1) return false; - - TableHeaderAlignment[] headerAlignment = new TableHeaderAlignment[headerLine.Count]; - - for (int hl = 0; hl < headerLine.Count; hl++) - { - var headerPart = headerLine[hl]; - var trimmed = headerPart.Trim(); - if (trimmed.Length < 3) return false; - - var validateFrom = 0; - var startsWithColon = trimmed[validateFrom] == ':'; - if (startsWithColon) validateFrom++; - - var validateTo = trimmed.Length - 1; - var endsWithColon = trimmed[validateTo] == ':'; - if (endsWithColon) validateTo--; - - for (var i = validateFrom; i <= validateTo; i++) - { - // don't check for escapes, they don't count in header - if (trimmed[i] != '-') return false; - } - - if (!startsWithColon && !endsWithColon) - { - headerAlignment[hl] = TableHeaderAlignment.None; - continue; - } - - if (startsWithColon && endsWithColon) - { - headerAlignment[hl] = TableHeaderAlignment.Center; - continue; - } - - if (startsWithColon) - { - headerAlignment[hl] = TableHeaderAlignment.Left; - } - - if (endsWithColon) - { - headerAlignment[hl] = TableHeaderAlignment.Right; - } - } - - if (columnsLine.Count < 2) return false; - if (headerLine.Count < columnsLine.Count) return false; - - var lastTableLine = 1; - - // it's a table! - List tableParts = new List { columnsPart, headersPart }; - var takingCharsForTable = columnsPart.Length + headersPart.Length; - for (var i = 2; i < parts.Length; i++) - { - var hasPipe = false; - var part = parts[i]; - - if (part.Length <= 0) - continue; - - string strLine = part.Source.Substring(part.StartIndex, part.Length); - - int indexOfPipe = strLine.IndexOf('|'); - hasPipe = indexOfPipe == 0; - - while (!hasPipe) - { - if (indexOfPipe > 0 && strLine[indexOfPipe - 1] == '\\') - { - indexOfPipe = strLine.IndexOf('|', indexOfPipe); - } - else if(indexOfPipe > 0) - { - hasPipe = true; - break; - } - else - { - break; - } - } - - if (!hasPipe) break; - - tableParts.Add(part); - takingCharsForTable += part.Length; - lastTableLine = i; - } - - bool hasTrailingParts = false; - for (var i = lastTableLine + 1; i < parts.Length; i++) - { - var part = parts[i]; - if (part.Length <= 0) - continue; - - hasTrailingParts = true; - break; - } - - // No need to break, the whole block is a table now - if (!hasTrailingParts) - { - b.Tag = BlockTag.Table; - b.TableHeaderAlignments = headerAlignment; - - // create table rows - MakeTableRows(b, sb); - return true; - } - - // get the text of the table separate - var tableBlockString = b.StringContent.TakeFromStart(takingCharsForTable, trim: true); - var newBlock = new Block(BlockTag.Paragraph, b.SourcePosition + tableBlockString.Length); - - // create the trailing paragraph, and set it's text and source positions - var newParagraph = b.Clone(); - newParagraph.StringContent = b.StringContent; - if (settings.TrackSourcePosition) - { - newParagraph.SourcePosition = b.SourcePosition + tableBlockString.Length; - newParagraph.SourceLastPosition = newParagraph.SourcePosition + (b.SourceLength - tableBlockString.Length); - } - - // update the text of the table block - b.Tag = BlockTag.Table; - b.TableHeaderAlignments = headerAlignment; - b.StringContent = new StringContent(); - foreach (StringPart part in tableParts) - { - b.StringContent.Append(part.Source, part.StartIndex, part.Length); - } - if (settings.TrackSourcePosition) - { - b.SourceLastPosition = b.SourcePosition + tableBlockString.Length; - } - - // create table rows - MakeTableRows(b, sb); - - // put the new paragraph after the table - newParagraph.NextSibling = b.NextSibling; - b.NextSibling = newParagraph; - - Finalize(newParagraph, line, settings); - - return true; - } - public static void Finalize(Block b, LineInfo line, CommonMarkSettings settings) { // don't do anything if the block is already closed @@ -498,10 +134,8 @@ public static void Finalize(Block b, LineInfo line, CommonMarkSettings settings) case BlockTag.Paragraph: var sc = b.StringContent; - if (TryMakeTable(b, line, settings)) - { + if (TableMethods.TryMakeTable(b, line, settings)) break; - } if (!sc.StartsWith('[')) break; diff --git a/CommonMark/Parser/TableMethods.cs b/CommonMark/Parser/TableMethods.cs new file mode 100644 index 0000000..e898e5b --- /dev/null +++ b/CommonMark/Parser/TableMethods.cs @@ -0,0 +1,373 @@ +using System.Collections.Generic; +using System.Text; +using CommonMark.Syntax; + +namespace CommonMark.Parser +{ + internal static class TableMethods + { + static List ParseTableLine(StringPart part, StringBuilder sb) + { + string line = part.Source.Substring(part.StartIndex, part.Length); + line = line.TrimEnd('\n'); + + var ret = new List(); + + var i = 0; + + if (i < line.Length && line[i] == '|') i++; + + while (i < line.Length && char.IsWhiteSpace(line[i])) i++; + + for (; i < line.Length; i++) + { + var c = line[i]; + if (c == '\\') + { + i++; + if (i < line.Length && line[i] == '|') + { + sb.Append(line[i]); + continue; + } + i--; + } + + if (c == '|') + { + ret.Add(sb.ToString()); + Utilities.ClearStringBuilder(sb); + } + else + { + sb.Append(c); + } + } + + if (sb.Length != 0) + { + ret.Add(sb.ToString()); + Utilities.ClearStringBuilder(sb); + } + + return ret; + } + + static void MakeTableCells(Block row, StringBuilder sb) + { + var offset = 0; + + var parts = row.StringContent.RetrieveParts(); + foreach (var part in parts.Array) + { + if (part.Length <= 0) + continue; + + string asStr = part.Source.Substring(part.StartIndex, part.Length); + + for (var i = 0; i < asStr.Length; i++) + { + var c = asStr[i]; + + if (c == '|') + { + var text = sb.ToString(); + Utilities.ClearStringBuilder(sb); + + if (text.Length > 0) + { + int length = text.Length; + string trimmedText = text.TrimStart(); + var leadingWhiteSpace = length - trimmedText.Length; + trimmedText = trimmedText.TrimEnd(); + var trailingWhiteSpace = length - leadingWhiteSpace - text.Length; + + var cell = new Block(BlockTag.TableCell, row.SourcePosition + offset + leadingWhiteSpace); + cell.SourceLastPosition = cell.SourcePosition + trimmedText.Length; + cell.StringContent = new StringContent(); + cell.StringContent.Append(trimmedText, 0, trimmedText.Length); + + if (row.LastChild == null) + { + row.FirstChild = row.LastChild = cell; + } + else + { + row.LastChild.NextSibling = cell; + row.LastChild = cell; + } + + cell.IsOpen = false; + } + + offset += text.Length; + + // skip the | + offset++; + continue; + } + + if (c == '\\') + { + sb.Append(c); + if (i + 1 < asStr.Length) + { + if (Utilities.IsEscapableSymbol(asStr[i + 1])) + sb.Append(asStr[i + 1]); + } + i++; + } + else + { + sb.Append(c); + } + } + } + + if (sb.Length > 0) + { + var text = sb.ToString(); + Utilities.ClearStringBuilder(sb); + + if (text.Length > 0) + { + var leadingWhiteSpace = 0; + while (leadingWhiteSpace < text.Length && char.IsWhiteSpace(text[leadingWhiteSpace])) leadingWhiteSpace++; + var trailingWhiteSpace = 0; + while (trailingWhiteSpace < text.Length && char.IsWhiteSpace(text[text.Length - trailingWhiteSpace - 1])) trailingWhiteSpace++; + + if (text.Length - leadingWhiteSpace - trailingWhiteSpace > 0) + { + var cell = new Block(BlockTag.TableCell, row.SourcePosition + offset + leadingWhiteSpace); + cell.SourceLastPosition = cell.SourcePosition + text.Length - trailingWhiteSpace - leadingWhiteSpace; + cell.StringContent = new StringContent(); + cell.StringContent.Append(text, leadingWhiteSpace, text.Length - leadingWhiteSpace - trailingWhiteSpace); + + if (row.LastChild == null) + { + row.FirstChild = row.LastChild = cell; + } + else + { + row.LastChild.NextSibling = cell; + row.LastChild = cell; + } + + cell.IsOpen = false; + } + } + } + } + + static void MakeTableRows(Block table, StringBuilder sb) + { + var parts = table.StringContent.RetrieveParts(); + var offset = 0; + + for (var i = 0; i < parts.Array.Length; i++) + { + var line = parts.Array[i]; + if (line.Length <= 0) + continue; + + var lineLength = line.Length; + string actualLine = line.Source.Substring(line.StartIndex, line.Length); + + // skip the header row + if (i != 1 && !string.IsNullOrEmpty(actualLine) && actualLine != " ") + { + var rowStartsInDocument = table.SourcePosition + offset; + var row = new Block(BlockTag.TableRow, rowStartsInDocument); + row.SourceLastPosition = rowStartsInDocument + lineLength; + + row.StringContent = new StringContent(); + row.StringContent.Append(actualLine, 0, actualLine.Length); + + if (table.LastChild == null) + { + table.FirstChild = row; + table.LastChild = row; + } + else + { + table.LastChild.NextSibling = row; + table.LastChild = row; + } + + MakeTableCells(row, sb); + row.IsOpen = false; + } + + offset += lineLength; + } + } + + internal static bool TryMakeTable(Block b, LineInfo line, CommonMarkSettings settings) + { + if ((settings.AdditionalFeatures & CommonMarkAdditionalFeatures.GithubStyleTables) == 0) return false; + + var parts = b.StringContent.RetrieveParts().Array; + + if (parts.Length < 2) return false; + + var sb = new StringBuilder(); + + var columnsPart = parts[0]; + var columnsLine = ParseTableLine(columnsPart, sb); + if (columnsLine.Count == 1) return false; + + var headersPart = parts[1]; + var headerLine = ParseTableLine(headersPart, sb); + if (headerLine.Count == 1) return false; + + TableHeaderAlignment[] headerAlignment = new TableHeaderAlignment[headerLine.Count]; + + for (int hl = 0; hl < headerLine.Count; hl++) + { + var headerPart = headerLine[hl]; + var trimmed = headerPart.Trim(); + if (trimmed.Length < 3) return false; + + var validateFrom = 0; + var startsWithColon = trimmed[validateFrom] == ':'; + if (startsWithColon) validateFrom++; + + var validateTo = trimmed.Length - 1; + var endsWithColon = trimmed[validateTo] == ':'; + if (endsWithColon) validateTo--; + + for (var i = validateFrom; i <= validateTo; i++) + { + // don't check for escapes, they don't count in header + if (trimmed[i] != '-') return false; + } + + if (!startsWithColon && !endsWithColon) + { + headerAlignment[hl] = TableHeaderAlignment.None; + continue; + } + + if (startsWithColon && endsWithColon) + { + headerAlignment[hl] = TableHeaderAlignment.Center; + continue; + } + + if (startsWithColon) + { + headerAlignment[hl] = TableHeaderAlignment.Left; + } + + if (endsWithColon) + { + headerAlignment[hl] = TableHeaderAlignment.Right; + } + } + + if (columnsLine.Count < 2) return false; + if (headerLine.Count < columnsLine.Count) return false; + + var lastTableLine = 1; + + // it's a table! + List tableParts = new List { columnsPart, headersPart }; + var takingCharsForTable = columnsPart.Length + headersPart.Length; + for (var i = 2; i < parts.Length; i++) + { + var hasPipe = false; + var part = parts[i]; + + if (part.Length <= 0) + continue; + + string strLine = part.Source.Substring(part.StartIndex, part.Length); + + int indexOfPipe = strLine.IndexOf('|'); + hasPipe = indexOfPipe == 0; + + while (!hasPipe) + { + if (indexOfPipe > 0 && strLine[indexOfPipe - 1] == '\\') + { + indexOfPipe = strLine.IndexOf('|', indexOfPipe); + } + else if (indexOfPipe > 0) + { + hasPipe = true; + break; + } + else + { + break; + } + } + + if (!hasPipe) break; + + tableParts.Add(part); + takingCharsForTable += part.Length; + lastTableLine = i; + } + + bool hasTrailingParts = false; + for (var i = lastTableLine + 1; i < parts.Length; i++) + { + var part = parts[i]; + if (part.Length <= 0) + continue; + + hasTrailingParts = true; + break; + } + + // No need to break, the whole block is a table now + if (!hasTrailingParts) + { + b.Tag = BlockTag.Table; + b.TableHeaderAlignments = headerAlignment; + + // create table rows + MakeTableRows(b, sb); + return true; + } + + // get the text of the table separate + var tableBlockString = b.StringContent.TakeFromStart(takingCharsForTable, trim: true); + var newBlock = new Block(BlockTag.Paragraph, b.SourcePosition + tableBlockString.Length); + + // create the trailing paragraph, and set it's text and source positions + var newParagraph = b.Clone(); + newParagraph.StringContent = b.StringContent; + if (settings.TrackSourcePosition) + { + newParagraph.SourcePosition = b.SourcePosition + tableBlockString.Length; + newParagraph.SourceLastPosition = newParagraph.SourcePosition + (b.SourceLength - tableBlockString.Length); + } + + // update the text of the table block + b.Tag = BlockTag.Table; + b.TableHeaderAlignments = headerAlignment; + b.StringContent = new StringContent(); + foreach (StringPart part in tableParts) + { + b.StringContent.Append(part.Source, part.StartIndex, part.Length); + } + if (settings.TrackSourcePosition) + { + b.SourceLastPosition = b.SourcePosition + tableBlockString.Length; + } + + // create table rows + MakeTableRows(b, sb); + + // put the new paragraph after the table + newParagraph.NextSibling = b.NextSibling; + b.NextSibling = newParagraph; + + BlockMethods.Finalize(newParagraph, line, settings); + + return true; + } + } +} From b2b9f578edc5179648ddcbd75b40bc23b1c0b779 Mon Sep 17 00:00:00 2001 From: Viktor Ekblom Date: Wed, 22 Mar 2017 21:41:21 +0100 Subject: [PATCH 4/4] Fixed parser to comply with official GFM specification --- CommonMark.Tests/TableTests.cs | 298 +++++++++++---------- CommonMark/Formatters/HtmlFormatterSlim.cs | 3 + CommonMark/Parser/TableMethods.cs | 106 ++++---- CommonMark/Utilities.cs | 8 + 4 files changed, 212 insertions(+), 203 deletions(-) diff --git a/CommonMark.Tests/TableTests.cs b/CommonMark.Tests/TableTests.cs index 1c17bf9..8e2e792 100644 --- a/CommonMark.Tests/TableTests.cs +++ b/CommonMark.Tests/TableTests.cs @@ -88,94 +88,6 @@ public void SimpleTable() Assert.IsNull(secondRowCell2.NextSibling); } - [TestMethod] - public void SplitTable() - { - var markdown = -@"First Header | Second Header -------------- | ------------- -Content Cell1 | Content Cell2 -Content Cell3 | Content Cell4 -Hello world -"; - markdown = markdown.Replace("\r\n", "\n"); - - var ast = - CommonMarkConverter.Parse( - markdown, - ReadSettings - ); - - string html; - using (var str = new StringWriter()) - { - CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); - html = str.ToString(); - } - Assert.AreEqual("
First HeaderSecond Header
Content Cell1Content Cell2
Content Cell3Content Cell4
\r\n

Hello world

\r\n", html); - - var firstChild = ast.FirstChild; - var secondChild = firstChild.NextSibling; - Assert.AreEqual(BlockTag.Table, firstChild.Tag); - var firstMarkdown = markdown.Substring(firstChild.SourcePosition, firstChild.SourceLength); - var shouldMatch = @"First Header | Second Header -------------- | ------------- -Content Cell1 | Content Cell2 -Content Cell3 | Content Cell4 -"; - shouldMatch = shouldMatch.Replace("\r\n", "\n"); - - Assert.AreEqual(shouldMatch, firstMarkdown); - Assert.IsNotNull(firstChild.TableHeaderAlignments); - Assert.AreEqual(2, firstChild.TableHeaderAlignments.Length); - Assert.AreEqual(TableHeaderAlignment.None, firstChild.TableHeaderAlignments[0]); - Assert.AreEqual(TableHeaderAlignment.None, firstChild.TableHeaderAlignments[1]); - - var headerRow = firstChild.FirstChild; - Assert.AreEqual(BlockTag.TableRow, headerRow.Tag); - Assert.AreEqual("First Header | Second Header\n", markdown.Substring(headerRow.SourcePosition, headerRow.SourceLength)); - - var headerCell1 = headerRow.FirstChild; - Assert.AreEqual(BlockTag.TableCell, headerCell1.Tag); - Assert.AreEqual("First Header", markdown.Substring(headerCell1.SourcePosition, headerCell1.SourceLength)); - - var headerCell2 = headerCell1.NextSibling; - Assert.AreEqual(BlockTag.TableCell, headerCell2.Tag); - Assert.AreEqual("Second Header", markdown.Substring(headerCell2.SourcePosition, headerCell2.SourceLength)); - Assert.IsNull(headerCell2.NextSibling); - - var firstRow = headerRow.NextSibling; - Assert.AreEqual(BlockTag.TableRow, firstRow.Tag); - Assert.AreEqual("Content Cell1 | Content Cell2\n", markdown.Substring(firstRow.SourcePosition, firstRow.SourceLength)); - - var firstRowCell1 = firstRow.FirstChild; - Assert.AreEqual(BlockTag.TableCell, firstRowCell1.Tag); - Assert.AreEqual("Content Cell1", markdown.Substring(firstRowCell1.SourcePosition, firstRowCell1.SourceLength)); - - var firstRowCell2 = firstRowCell1.NextSibling; - Assert.AreEqual(BlockTag.TableCell, firstRowCell2.Tag); - Assert.AreEqual("Content Cell2", markdown.Substring(firstRowCell2.SourcePosition, firstRowCell2.SourceLength)); - Assert.IsNull(firstRowCell2.NextSibling); - - var secondRow = firstRow.NextSibling; - Assert.AreEqual(BlockTag.TableRow, secondRow.Tag); - Assert.AreEqual("Content Cell3 | Content Cell4\n", markdown.Substring(secondRow.SourcePosition, secondRow.SourceLength)); - Assert.IsNull(secondRow.NextSibling); - - var secondRowCell1 = secondRow.FirstChild; - Assert.AreEqual(BlockTag.TableCell, secondRowCell1.Tag); - Assert.AreEqual("Content Cell3", markdown.Substring(secondRowCell1.SourcePosition, secondRowCell1.SourceLength)); - - var secondRowCell2 = secondRowCell1.NextSibling; - Assert.AreEqual(BlockTag.TableCell, secondRowCell2.Tag); - Assert.AreEqual("Content Cell4", markdown.Substring(secondRowCell2.SourcePosition, secondRowCell2.SourceLength)); - Assert.IsNull(secondRowCell2.NextSibling); - - Assert.AreEqual(BlockTag.Paragraph, secondChild.Tag); - var secondMarkdown = markdown.Substring(secondChild.SourcePosition, secondChild.SourceLength); - Assert.AreEqual("Hello world\n", secondMarkdown); - } - [TestMethod] public void WrappedTable() { @@ -201,11 +113,12 @@ Hello world CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); html = str.ToString(); } - Assert.AreEqual("

Nope nope.

\r\n
First HeaderSecond Header
Content CellContent Cell
Content CellContent Cell
\r\n

Hello world

\r\n", html); + string expected = "

Nope nope.

\r\n
First HeaderSecond Header
Content CellContent Cell
Content CellContent Cell
Hello world
"; + + Assert.AreEqual(expected, html); Assert.AreEqual(BlockTag.Paragraph, ast.FirstChild.Tag); Assert.AreEqual(BlockTag.Table, ast.FirstChild.NextSibling.Tag); - Assert.AreEqual(BlockTag.Paragraph, ast.FirstChild.NextSibling.NextSibling.Tag); } [TestMethod] @@ -231,62 +144,6 @@ public void TableWithInlines() Assert.AreEqual("
NameDescription
HelpDisplay the help window.
CloseCloses a window
", html); } - [TestMethod] - public void TableWithExtraPipes() - { - var markdown = "| First Header | Second Header |\n| ------------- | ------------- |\n| cell #11 | cell #12 |\n| cell #21 | cell #22 |\n"; - - var ast = - CommonMarkConverter.Parse( - markdown, - ReadSettings - ); - - var firstChild = ast.FirstChild; - Assert.AreEqual(BlockTag.Table, firstChild.Tag); - Assert.AreEqual(markdown, markdown.Substring(firstChild.SourcePosition, firstChild.SourceLength)); - - var headerRow = firstChild.FirstChild; - Assert.AreEqual(BlockTag.TableRow, headerRow.Tag); - Assert.AreEqual("| First Header | Second Header |\n", markdown.Substring(headerRow.SourcePosition, headerRow.SourceLength)); - - var headerCell1 = headerRow.FirstChild; - Assert.AreEqual(BlockTag.TableCell, headerCell1.Tag); - Assert.AreEqual("First Header", markdown.Substring(headerCell1.SourcePosition, headerCell1.SourceLength)); - - var headerCell2 = headerCell1.NextSibling; - Assert.AreEqual(BlockTag.TableCell, headerCell2.Tag); - Assert.AreEqual("Second Header", markdown.Substring(headerCell2.SourcePosition, headerCell2.SourceLength)); - Assert.IsNull(headerCell2.NextSibling); - - var firstRow = headerRow.NextSibling; - Assert.AreEqual(BlockTag.TableRow, firstRow.Tag); - Assert.AreEqual("| cell #11 | cell #12 |\n", markdown.Substring(firstRow.SourcePosition, firstRow.SourceLength)); - - var firstRowCell1 = firstRow.FirstChild; - Assert.AreEqual(BlockTag.TableCell, firstRowCell1.Tag); - Assert.AreEqual("cell #11", markdown.Substring(firstRowCell1.SourcePosition, firstRowCell1.SourceLength)); - - var firstRowCell2 = firstRowCell1.NextSibling; - Assert.AreEqual(BlockTag.TableCell, firstRowCell2.Tag); - Assert.AreEqual("cell #12", markdown.Substring(firstRowCell2.SourcePosition, firstRowCell2.SourceLength)); - Assert.IsNull(firstRowCell2.NextSibling); - - var secondRow = firstRow.NextSibling; - Assert.AreEqual(BlockTag.TableRow, secondRow.Tag); - Assert.AreEqual("| cell #21 | cell #22 |\n", markdown.Substring(secondRow.SourcePosition, secondRow.SourceLength)); - Assert.IsNull(secondRow.NextSibling); - - var secondRowCell1 = secondRow.FirstChild; - Assert.AreEqual(BlockTag.TableCell, secondRowCell1.Tag); - Assert.AreEqual("cell #21", markdown.Substring(secondRowCell1.SourcePosition, secondRowCell1.SourceLength)); - - var secondRowCell2 = secondRowCell1.NextSibling; - Assert.AreEqual(BlockTag.TableCell, secondRowCell2.Tag); - Assert.AreEqual("cell #22", markdown.Substring(secondRowCell2.SourcePosition, secondRowCell2.SourceLength)); - Assert.IsNull(secondRowCell2.NextSibling); - } - [TestMethod] public void TableCellMismatch() { @@ -332,5 +189,154 @@ public void TableAlignment() } Assert.AreEqual("
H1H2H3H4
1234
", html); } + + + [TestMethod] + public void Example189() + { + var markdown = @"| foo | bar | +| --- | --- | +| baz | bim | "; + + var ast = CommonMarkConverter.Parse(markdown, ReadSettings); + + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + Assert.AreEqual("
foobar
bazbim
", html); + } + + [TestMethod] + public void Example190() + { + var markdown = @"| abc | defghi | +:-: | -----------: +bar | baz"; + + var ast = CommonMarkConverter.Parse(markdown, ReadSettings); + + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + Assert.AreEqual("
abcdefghi
barbaz
", html); + } + + [TestMethod] + public void Example191() + { + var markdown = @"| f\|oo | +| ------ | +| b `|` az | +| b **|** im |"; + + var ast = CommonMarkConverter.Parse(markdown, ReadSettings); + + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + Assert.AreEqual("
f|oo
b | az
b | im
", html); + } + + [TestMethod] + public void Example192() + { + var markdown = @"| abc | def | +| --- | --- | +| bar | baz | +> bar"; + + var ast = CommonMarkConverter.Parse(markdown, ReadSettings); + + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + + string expected = @"
abcdef
barbaz
+
+

bar

+
+"; + + Assert.AreEqual(expected, html); + } + + [TestMethod] + public void Example193() + { + var markdown = @"| abc | def | +| --- | --- | +| bar | baz | +bar + +bar"; + + var ast = CommonMarkConverter.Parse(markdown, ReadSettings); + + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + + string expected = @"
abcdef
barbaz
bar
+

bar

+"; + + + Assert.AreEqual(expected, html); + } + + [TestMethod] + public void Example194() + { + var markdown = @"| abc | def | +| --- | +| bar |"; + + var ast = CommonMarkConverter.Parse(markdown, ReadSettings); + + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + Assert.AreEqual(@"

| abc | def | +| --- | +| bar |

+", html); + } + + [TestMethod] + public void Example195() + { + var markdown = @"| abc | def | +| --- | --- | +| bar | +| bar | baz | boo |"; + + var ast = CommonMarkConverter.Parse(markdown, ReadSettings); + + string html; + using (var str = new StringWriter()) + { + CommonMarkConverter.ProcessStage3(ast, str, WriteSettings); + html = str.ToString(); + } + Assert.AreEqual("
abcdef
bar
barbaz
", html); + } } } \ No newline at end of file diff --git a/CommonMark/Formatters/HtmlFormatterSlim.cs b/CommonMark/Formatters/HtmlFormatterSlim.cs index 4238b2b..a90dbc9 100644 --- a/CommonMark/Formatters/HtmlFormatterSlim.cs +++ b/CommonMark/Formatters/HtmlFormatterSlim.cs @@ -232,6 +232,9 @@ static void WriteTable(Block table, HtmlTextWriter writer, CommonMarkSettings se var curHeaderCell = header.FirstChild; while (curHeaderCell != null) { + if (numHeadings >= table.TableHeaderAlignments.Length) + break; + var alignment = table.TableHeaderAlignments[numHeadings]; numHeadings++; diff --git a/CommonMark/Parser/TableMethods.cs b/CommonMark/Parser/TableMethods.cs index e898e5b..11ee985 100644 --- a/CommonMark/Parser/TableMethods.cs +++ b/CommonMark/Parser/TableMethods.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Text; using CommonMark.Syntax; @@ -8,6 +9,9 @@ internal static class TableMethods { static List ParseTableLine(StringPart part, StringBuilder sb) { + if (part.Source == null) + return null; + string line = part.Source.Substring(part.StartIndex, part.Length); line = line.TrimEnd('\n'); @@ -69,36 +73,13 @@ static void MakeTableCells(Block row, StringBuilder sb) { var c = asStr[i]; - if (c == '|') + if (c == '|' && (i == 0 || !Utilities.IsInlineSpanSymbol(asStr[i - 1]))) { var text = sb.ToString(); Utilities.ClearStringBuilder(sb); if (text.Length > 0) - { - int length = text.Length; - string trimmedText = text.TrimStart(); - var leadingWhiteSpace = length - trimmedText.Length; - trimmedText = trimmedText.TrimEnd(); - var trailingWhiteSpace = length - leadingWhiteSpace - text.Length; - - var cell = new Block(BlockTag.TableCell, row.SourcePosition + offset + leadingWhiteSpace); - cell.SourceLastPosition = cell.SourcePosition + trimmedText.Length; - cell.StringContent = new StringContent(); - cell.StringContent.Append(trimmedText, 0, trimmedText.Length); - - if (row.LastChild == null) - { - row.FirstChild = row.LastChild = cell; - } - else - { - row.LastChild.NextSibling = cell; - row.LastChild = cell; - } - - cell.IsOpen = false; - } + MakeCell(text, row, ref offset); offset += text.Length; @@ -130,33 +111,34 @@ static void MakeTableCells(Block row, StringBuilder sb) Utilities.ClearStringBuilder(sb); if (text.Length > 0) - { - var leadingWhiteSpace = 0; - while (leadingWhiteSpace < text.Length && char.IsWhiteSpace(text[leadingWhiteSpace])) leadingWhiteSpace++; - var trailingWhiteSpace = 0; - while (trailingWhiteSpace < text.Length && char.IsWhiteSpace(text[text.Length - trailingWhiteSpace - 1])) trailingWhiteSpace++; - - if (text.Length - leadingWhiteSpace - trailingWhiteSpace > 0) - { - var cell = new Block(BlockTag.TableCell, row.SourcePosition + offset + leadingWhiteSpace); - cell.SourceLastPosition = cell.SourcePosition + text.Length - trailingWhiteSpace - leadingWhiteSpace; - cell.StringContent = new StringContent(); - cell.StringContent.Append(text, leadingWhiteSpace, text.Length - leadingWhiteSpace - trailingWhiteSpace); - - if (row.LastChild == null) - { - row.FirstChild = row.LastChild = cell; - } - else - { - row.LastChild.NextSibling = cell; - row.LastChild = cell; - } + MakeCell(text, row, ref offset); + } + } - cell.IsOpen = false; - } - } + private static void MakeCell(string text, Block row, ref int offset) + { + int length = text.Length; + string trimmedText = text.TrimStart(); + var leadingWhiteSpace = length - trimmedText.Length; + trimmedText = trimmedText.TrimEnd(); + var trailingWhiteSpace = length - leadingWhiteSpace - text.Length; + + var cell = new Block(BlockTag.TableCell, row.SourcePosition + offset + leadingWhiteSpace); + cell.SourceLastPosition = cell.SourcePosition + trimmedText.Length; + cell.StringContent = new StringContent(); + cell.StringContent.Append(trimmedText, 0, trimmedText.Length); + + if (row.LastChild == null) + { + row.FirstChild = row.LastChild = cell; + } + else + { + row.LastChild.NextSibling = cell; + row.LastChild = cell; } + + cell.IsOpen = false; } static void MakeTableRows(Block table, StringBuilder sb) @@ -214,11 +196,13 @@ internal static bool TryMakeTable(Block b, LineInfo line, CommonMarkSettings set var columnsPart = parts[0]; var columnsLine = ParseTableLine(columnsPart, sb); - if (columnsLine.Count == 1) return false; + if (columnsLine == null || columnsLine.Count == 0) return false; var headersPart = parts[1]; var headerLine = ParseTableLine(headersPart, sb); - if (headerLine.Count == 1) return false; + if (headerLine == null || headerLine.Count == 0) return false; + + if (headerLine.Count != columnsLine.Count) return false; TableHeaderAlignment[] headerAlignment = new TableHeaderAlignment[headerLine.Count]; @@ -265,14 +249,12 @@ internal static bool TryMakeTable(Block b, LineInfo line, CommonMarkSettings set } } - if (columnsLine.Count < 2) return false; - if (headerLine.Count < columnsLine.Count) return false; - var lastTableLine = 1; // it's a table! List tableParts = new List { columnsPart, headersPart }; var takingCharsForTable = columnsPart.Length + headersPart.Length; + bool prevWasTableRow = false; for (var i = 2; i < parts.Length; i++) { var hasPipe = false; @@ -303,7 +285,17 @@ internal static bool TryMakeTable(Block b, LineInfo line, CommonMarkSettings set } } - if (!hasPipe) break; + if (!hasPipe) + { + if(!prevWasTableRow) + break; + + prevWasTableRow = false; + } + else + { + prevWasTableRow = true; + } tableParts.Add(part); takingCharsForTable += part.Length; diff --git a/CommonMark/Utilities.cs b/CommonMark/Utilities.cs index a7f7b18..80855d6 100644 --- a/CommonMark/Utilities.cs +++ b/CommonMark/Utilities.cs @@ -56,6 +56,14 @@ public static bool IsWhitespace(char c) return c == ' ' || c == '\t' || c == '\r' || c == '\n' || c == '\f'; } +#if OptimizeFor45 + [System.Runtime.CompilerServices.MethodImpl(System.Runtime.CompilerServices.MethodImplOptions.AggressiveInlining)] +#endif + public static bool IsInlineSpanSymbol(char c) + { + return c == '*' || c == '`' || c == '_' || c == '~' || c == '<' || c == '['; + } + /// /// Checks if the given character is an Unicode space or punctuation character. ///