Skip to content

Commit

Permalink
Parse markdown files
Browse files Browse the repository at this point in the history
  • Loading branch information
josefpihrt committed Jul 24, 2023
1 parent e773b63 commit acd1509
Show file tree
Hide file tree
Showing 10 changed files with 172 additions and 23 deletions.
5 changes: 5 additions & 0 deletions src/CommandLine/CaptureSlim.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
// Copyright (c) Josef Pihrt. All rights reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

namespace Orang.CommandLine;

internal readonly record struct CaptureSlim(string Value, int Index, int Length);
3 changes: 2 additions & 1 deletion src/CommandLine/CommandLine.csproj
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,13 @@
</PropertyGroup>

<ItemGroup Condition="'$(CI)' == 'true'">
<PackageReference Include="Microsoft.SourceLink.GitHub" Version="1.1.1" PrivateAssets="All"/>
<PackageReference Include="Microsoft.SourceLink.GitHub" Version="1.1.1" PrivateAssets="All" />
</ItemGroup>

<ItemGroup>
<PackageReference Include="CommandLineParser" Version="2.8.0" />
<PackageReference Include="Microsoft.CodeAnalysis.CSharp" Version="4.0.1" />
<PackageReference Include="Markdig.Signed" Version="0.31.0" />
</ItemGroup>

<ItemGroup>
Expand Down
5 changes: 3 additions & 2 deletions src/CommandLine/Commands/CommonReplaceCommand`1.cs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ private void ExecuteInput(SearchContext context, string input)
predicate: contentFilter.Predicate,
captures: groups);

IEnumerable<ICapture> captures = GetCaptures(groups, context.CancellationToken)
IEnumerable<ICapture> captures = GetCaptures(groups, null, context.CancellationToken)
?? groups.Select(f => (ICapture)new RegexCapture(f));

using (IEnumerator<ICapture> en = captures.GetEnumerator())
Expand Down Expand Up @@ -151,7 +151,7 @@ protected override void ExecuteMatchWithContentCore(
predicate: Options.ContentFilter!.Predicate,
captures: groups);

List<ICapture>? captures = GetCaptures(groups, context.CancellationToken);
List<ICapture>? captures = GetCaptures(groups, fileMatch, context.CancellationToken);

using (IEnumerator<ICapture> en = (captures ?? groups.Select(f => (ICapture)new RegexCapture(f))).GetEnumerator())
{
Expand Down Expand Up @@ -447,6 +447,7 @@ private void WriteMatches(ContentWriter writer, IEnumerator<ICapture> en, Search

protected virtual List<ICapture>? GetCaptures(
List<Capture> groups,
FileMatch? fileMatch,
CancellationToken cancellationToken)
{
return null;
Expand Down
51 changes: 35 additions & 16 deletions src/CommandLine/Commands/SpellcheckCommand.cs
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
using System.Text.RegularExpressions;
using System.Threading;
using Microsoft.CodeAnalysis.Text;
using Orang.FileSystem;
using Orang.Spelling;

namespace Orang.CommandLine;
Expand Down Expand Up @@ -36,33 +37,51 @@ protected override void ExecuteCore(SearchContext context)
return;
}

protected override List<ICapture>? GetCaptures(List<Capture> groups, CancellationToken cancellationToken)
protected override List<ICapture>? GetCaptures(
List<Capture> groups,
FileMatch? fileMatch,
CancellationToken cancellationToken)
{
var captures = new List<ICapture>();
List<TextSpan>? filteredSpans = null;

foreach (Capture capture in groups)
{
foreach (SpellingMatch spellingMatch in SpellcheckState.Spellchecker.AnalyzeText(capture.Value))
IEnumerable<CaptureSlim>? subcaptures = null;

if (fileMatch is not null
&& FileSystemHelpers.HasExtension(fileMatch.Path, "md"))
{
subcaptures = MarkdownProcessor.ProcessText(capture.Value);
}
else
{
var captureInfo = new SpellingCapture(
spellingMatch.Value,
capture.Index + spellingMatch.Index,
containingValue: spellingMatch.Parent,
containingValueIndex: spellingMatch.ParentIndex);
subcaptures = new[] { new CaptureSlim(capture.Value, capture.Index, capture.Length) };
}

if (filteredSpans is null)
filteredSpans = GetFilteredSpans(groups, cancellationToken);
foreach (CaptureSlim subcapture in subcaptures)
{
foreach (SpellingMatch spellingMatch in SpellcheckState.Spellchecker.AnalyzeText(subcapture.Value))
{
var captureInfo = new SpellingCapture(
spellingMatch.Value,
capture.Index + subcapture.Index + spellingMatch.Index,
containingValue: spellingMatch.Parent,
containingValueIndex: spellingMatch.ParentIndex);

var captureSpan = new TextSpan(captureInfo.Index, captureInfo.Length);
if (filteredSpans is null)
filteredSpans = GetFilteredSpans(groups, cancellationToken);

foreach (TextSpan filteredSpan in filteredSpans)
{
if (filteredSpan.IntersectsWith(captureSpan))
continue;
}
var captureSpan = new TextSpan(captureInfo.Index, captureInfo.Length);

foreach (TextSpan filteredSpan in filteredSpans)
{
if (filteredSpan.IntersectsWith(captureSpan))
continue;
}

captures.Add(captureInfo);
captures.Add(captureInfo);
}
}
}

Expand Down
103 changes: 103 additions & 0 deletions src/CommandLine/Markdown/MarkdownProcessor.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
// Copyright (c) Josef Pihrt. All rights reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

using System.Collections.Generic;
using System.Diagnostics;
using Markdig;
using Markdig.Extensions.CustomContainers;
using Markdig.Extensions.Tables;
using Markdig.Helpers;
using Markdig.Syntax;
using Markdig.Syntax.Inlines;

namespace Orang.CommandLine;

internal static class MarkdownProcessor
{
private static readonly MarkdownPipeline _pipeline = new MarkdownPipelineBuilder().UseAdvancedExtensions().Build();

public static IEnumerable<CaptureSlim> ProcessText(string text)
{
MarkdownDocument document = Markdown.Parse(text, _pipeline);

foreach (MarkdownObject item in document.Descendants())
{
switch (item)
{
case CodeInline code:
{
yield return new CaptureSlim(code.Content, code.Span.Start + code.DelimiterCount, code.Span.Length);
break;
}
case LiteralInline literal:
{
string value = literal.Content.ToString();
SourceSpan span = literal.Span;
int offset = (literal.IsFirstCharacterEscaped) ? 1 : 0;

yield return new CaptureSlim(value, span.Start + offset, span.Length + offset);
break;
}
case LinkInline link:
{
string? label = link.Label;
string? title = link.Title;

if (!string.IsNullOrEmpty(label))
yield return new CaptureSlim(label, link.LabelSpan.Start, link.LabelSpan.Length);

if (!string.IsNullOrEmpty(title))
yield return new CaptureSlim(title, link.TitleSpan.Start, link.TitleSpan.Length);

break;
}
case LinkReferenceDefinition linkReferenceDef:
{
string? label = linkReferenceDef.Label;
string? title = linkReferenceDef.Title;

if (!string.IsNullOrEmpty(label))
yield return new CaptureSlim(label, linkReferenceDef.LabelSpan.Start, linkReferenceDef.LabelSpan.Length);

if (!string.IsNullOrEmpty(title))
yield return new CaptureSlim(title, linkReferenceDef.TitleSpan.Start, linkReferenceDef.TitleSpan.Length);

break;
}
case CodeBlock codeBlock:
{
foreach (StringLine line in codeBlock.Lines.Lines)
{
StringSlice slice = line.Slice;
yield return new CaptureSlim(slice.ToString(), slice.Start, slice.Length);
}

break;
}
case ContainerInline: // EmphasisInline, DelimiterInline, EmphasisDelimiterInline, LinkDelimiterInline
case AutolinkInline:
case HtmlEntityInline:
case LineBreakInline:
case HtmlInline:
case HeadingBlock:
case ListBlock:
case ListItemBlock:
case ParagraphBlock:
case ThematicBreakBlock:
case LinkReferenceDefinitionGroup:
case Table:
case TableRow:
case TableCell:
case QuoteBlock:
case CustomContainer:
{
break;
}
default:
{
Debug.Fail(item.GetType().FullName);
break;
}
}
}
}
}
6 changes: 3 additions & 3 deletions src/CommandLine/OptionValues.cs
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,9 @@ internal static class OptionValues

public static readonly KeyValuePairOptionValue Encoding = KeyValuePairOptionValue.Create("encoding", MetaValues.Encoding, shortKey: "e");

public static readonly KeyValuePairOptionValue FileProperty_CreationTime = KeyValuePairOptionValue.Create("creation-time", "<DATE>", shortKey: "ct", helpValue: "c[reation-]t[ime][=<DATE>]", description: "Show file's creation time and optionally define condition (See 'Expression syntax' for other expressions).", canContainExpression: true);
public static readonly KeyValuePairOptionValue FileProperty_ModifiedTime = KeyValuePairOptionValue.Create("modified-time", "<DATE>", shortKey: "mt", helpValue: "m[odified-]t[ime][=<DATE>]", description: "Show file's modified time and optionally define condition (See 'Expression syntax' for other expressions).", canContainExpression: true);
public static readonly KeyValuePairOptionValue FileProperty_Size = KeyValuePairOptionValue.Create("size", "<NUM>", helpValue: "s[ize][=<NUM>]", description: "Show file's size and optionally define condition (See 'Expression syntax' for other expressions).", canContainExpression: true);
public static readonly KeyValuePairOptionValue FileProperty_CreationTime = KeyValuePairOptionValue.Create("creation-time", "<DATE>", shortKey: "ct", helpValue: "c[reation-]t[ime]=<DATE>", description: "Show file's creation time and optionally define condition (See 'Expression syntax' for other expressions).", canContainExpression: true);
public static readonly KeyValuePairOptionValue FileProperty_ModifiedTime = KeyValuePairOptionValue.Create("modified-time", "<DATE>", shortKey: "mt", helpValue: "m[odified-]t[ime]=<DATE>", description: "Show file's modified time and optionally define condition (See 'Expression syntax' for other expressions).", canContainExpression: true);
public static readonly KeyValuePairOptionValue FileProperty_Size = KeyValuePairOptionValue.Create("size", "<NUM>", helpValue: "s[ize]=<NUM>", description: "Show file's size and optionally define condition (See 'Expression syntax' for other expressions).", canContainExpression: true);

public static readonly KeyValuePairOptionValue Group = KeyValuePairOptionValue.Create("group", "<GROUP_NAME>", shortKey: "g");
public static readonly KeyValuePairOptionValue Length = KeyValuePairOptionValue.Create("length", "<NUM>", shortKey: "", description: "Include matches whose length matches the expression (See 'Expression syntax' for other expressions).", canContainExpression: true);
Expand Down
2 changes: 1 addition & 1 deletion src/CommandLine/Properties/launchSettings.json
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"profiles": {
"CommandLine": {
"commandName": "Project",
"commandLineArgs": "help -v d"
"commandLineArgs": "spellcheck \"C:\\code\\jp\\josefpihrt.github.io\\docs\" --min-word-length 3 --max-word-length 35 -o c:/spellcheck/tmp.txt v=n -v d -i .git,.vs l li e ne --words c:/spellcheck/words -e txt,md,mdx,rst,adoc --interactive"
}
}
}
1 change: 1 addition & 0 deletions src/Core/Properties/AssemblyInfo.cs
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
[assembly: InternalsVisibleTo("Orang, PublicKey=00240000048000009400000006020000002400005253413100040000010001009ff202171ab25d708192b490c52c1a373c74a2849c734fd9f545bfedc92b61d4e10d356cd26213ef6d96af669a9b570cd6277d590c338cfc00ccc9a15d6ad5b08ac3a8a09db3eae536d653f4acb9c7e992162129b67b4bc72c08af7d67a48ecde99c53a5d2cd44b1e8179368f6db2ec7665061e3ef4029703df4b49952bd0de4")]
[assembly: InternalsVisibleTo("Orang.CommandLine.Core, PublicKey=00240000048000009400000006020000002400005253413100040000010001009ff202171ab25d708192b490c52c1a373c74a2849c734fd9f545bfedc92b61d4e10d356cd26213ef6d96af669a9b570cd6277d590c338cfc00ccc9a15d6ad5b08ac3a8a09db3eae536d653f4acb9c7e992162129b67b4bc72c08af7d67a48ecde99c53a5d2cd44b1e8179368f6db2ec7665061e3ef4029703df4b49952bd0de4")]
[assembly: InternalsVisibleTo("Orang.FileSystem, PublicKey=00240000048000009400000006020000002400005253413100040000010001009ff202171ab25d708192b490c52c1a373c74a2849c734fd9f545bfedc92b61d4e10d356cd26213ef6d96af669a9b570cd6277d590c338cfc00ccc9a15d6ad5b08ac3a8a09db3eae536d653f4acb9c7e992162129b67b4bc72c08af7d67a48ecde99c53a5d2cd44b1e8179368f6db2ec7665061e3ef4029703df4b49952bd0de4")]
[assembly: InternalsVisibleTo("Orang.Spelling, PublicKey=00240000048000009400000006020000002400005253413100040000010001009ff202171ab25d708192b490c52c1a373c74a2849c734fd9f545bfedc92b61d4e10d356cd26213ef6d96af669a9b570cd6277d590c338cfc00ccc9a15d6ad5b08ac3a8a09db3eae536d653f4acb9c7e992162129b67b4bc72c08af7d67a48ecde99c53a5d2cd44b1e8179368f6db2ec7665061e3ef4029703df4b49952bd0de4")]
9 changes: 9 additions & 0 deletions src/FileSystem/FileSystem/FileSystemHelpers.cs
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,15 @@ public static int GetExtensionIndex(string path)
return path.Length;
}

public static bool HasExtension(string path, string extension)
{
int index = GetExtensionIndex(path);

return (index >= 0)
&& index < path.Length - 1
&& string.CompareOrdinal(path, index + 1, extension, 0, extension.Length) == 0;
}

public static bool IsDirectorySeparator(char ch)
{
return ch == Path.DirectorySeparatorChar
Expand Down
10 changes: 10 additions & 0 deletions src/FileSystem/IsExternalInit.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
// Copyright (c) Josef Pihrt. All rights reserved. Licensed under the Apache License, Version 2.0. See License.txt in the project root for license information.

namespace System.Runtime.CompilerServices;

using global::System.ComponentModel;

[EditorBrowsable(EditorBrowsableState.Never)]
internal static class IsExternalInit
{
}

0 comments on commit acd1509

Please sign in to comment.