diff --git a/build.sbt b/build.sbt index 73e1b427d..e02328484 100644 --- a/build.sbt +++ b/build.sbt @@ -66,6 +66,7 @@ lazy val mdoc = project "com.vladsch.flexmark" % "flexmark-all" % "0.26.4", "com.lihaoyi" %% "fansi" % "0.2.5", "io.methvin" % "directory-watcher" % "0.7.0", + "me.xdrop" % "fuzzywuzzy" % "1.1.9", // for link hygiene "did you mean?" "ch.epfl.scala" %% "scalafix-core" % V.scalafix ) ) diff --git a/docs/readme.md b/docs/readme.md index e6f3a2bf2..f895c7600 100644 --- a/docs/readme.md +++ b/docs/readme.md @@ -337,9 +337,13 @@ invalid. # My title Link to [my title](#my-title). +Link to [typo section](#mytitle). Link to [old section](#doesnotexist). ``` +Observe that mdoc suggests a fix if there exists a header that is similar to the +unknown link. + ### Script semantics Mdoc interprets code fences as normal Scala programs instead of as if they're diff --git a/mdoc-docs/src/main/scala/mdoc/docs/MdocModifier.scala b/mdoc-docs/src/main/scala/mdoc/docs/MdocModifier.scala index 009ecdfe2..ef6278208 100644 --- a/mdoc-docs/src/main/scala/mdoc/docs/MdocModifier.scala +++ b/mdoc-docs/src/main/scala/mdoc/docs/MdocModifier.scala @@ -25,7 +25,7 @@ class MdocModifier(context: Context) extends StringModifier { val cleanInput = Input.VirtualFile(code.filename, code.text) val markdown = Markdown.toMarkdown(cleanInput, markdownSettings, myReporter, context.settings) val links = DocumentLinks.fromMarkdown(GitHubIdGenerator, RelativePath("readme.md"), cleanInput) - LinkHygiene.lint(List(links), myReporter) + LinkHygiene.lint(List(links), myReporter, verbose = false) val stdout = fansi.Str(myStdout.toString()).plainText if (myReporter.hasErrors || myReporter.hasWarnings) { if (info != "crash") { diff --git a/mdoc/src/main/scala/mdoc/internal/cli/MainOps.scala b/mdoc/src/main/scala/mdoc/internal/cli/MainOps.scala index 819bbc165..1adc5b934 100644 --- a/mdoc/src/main/scala/mdoc/internal/cli/MainOps.scala +++ b/mdoc/src/main/scala/mdoc/internal/cli/MainOps.scala @@ -29,7 +29,7 @@ final class MainOps( def lint(): Unit = { val docs = DocumentLinks.fromGeneratedSite(settings, reporter) - LinkHygiene.lint(docs, reporter) + LinkHygiene.lint(docs, reporter, settings.verbose) } def handleMarkdown(file: InputFile): Exit = synchronized { diff --git a/mdoc/src/main/scala/mdoc/internal/cli/Settings.scala b/mdoc/src/main/scala/mdoc/internal/cli/Settings.scala index 19e5d7fe6..b78e345ea 100644 --- a/mdoc/src/main/scala/mdoc/internal/cli/Settings.scala +++ b/mdoc/src/main/scala/mdoc/internal/cli/Settings.scala @@ -51,6 +51,8 @@ case class Settings( ) @ExtraName("test") check: Boolean = false, + @Description("Include additional diagnostics for debuggin potential problems.") + verbose: Boolean = false, @Description( "Classpath to use when compiling Scala code examples. " + "Defaults to the current thread's classpath." diff --git a/mdoc/src/main/scala/mdoc/internal/markdown/DocumentLinks.scala b/mdoc/src/main/scala/mdoc/internal/markdown/DocumentLinks.scala index c3b3f9f7b..38a003090 100644 --- a/mdoc/src/main/scala/mdoc/internal/markdown/DocumentLinks.scala +++ b/mdoc/src/main/scala/mdoc/internal/markdown/DocumentLinks.scala @@ -51,8 +51,7 @@ object DocumentLinks { val ls = FileIO.listAllFilesRecursively(settings.out) ls.files.foreach { relpath => val isMarkdown = PathIO.extension(relpath.toNIO) == "md" - val hasMatchingInputFile = settings.in.resolve(relpath).isFile - if (isMarkdown && hasMatchingInputFile) { + if (isMarkdown) { val abspath = ls.root.resolve(relpath) val input = Input.VirtualFile(relpath.toString(), FileIO.slurp(abspath, settings.charset)) links += DocumentLinks.fromMarkdown(settings.headerIdGenerator, relpath, input) diff --git a/mdoc/src/main/scala/mdoc/internal/markdown/LinkHygiene.scala b/mdoc/src/main/scala/mdoc/internal/markdown/LinkHygiene.scala index 808a3bbe1..ff9952985 100644 --- a/mdoc/src/main/scala/mdoc/internal/markdown/LinkHygiene.scala +++ b/mdoc/src/main/scala/mdoc/internal/markdown/LinkHygiene.scala @@ -2,9 +2,10 @@ package mdoc.internal.markdown import java.net.URI import mdoc.Reporter +import me.xdrop.fuzzywuzzy.FuzzySearch object LinkHygiene { - def lint(docs: List[DocumentLinks], reporter: Reporter): Unit = { + def lint(docs: List[DocumentLinks], reporter: Reporter, verbose: Boolean): Unit = { val isValidHeading = docs.iterator.flatMap(_.absoluteDefinitions).toSet for { doc <- docs @@ -15,14 +16,42 @@ object LinkHygiene { if !isValidHeading(uri) } { val isAbsolutePath = uri.getPath.startsWith("/") + val debug = + if (verbose) { + val query = uri.toString + val candidates = isValidHeading + .map { candidate => + val score = FuzzySearch.ratio(candidate.toString, query) + score -> f"$score%-3s $candidate" + } + .toSeq + .sortBy(-_._1) + .map(_._2) + .mkString("\n ") + s"\nisValidHeading:\n $candidates" + } else "" + val help = getSimilarHeading(isValidHeading, uri) match { + case None => "." + case Some(similar) => s", did you mean '$similar'?" + } val hint = if (isAbsolutePath) - s". To fix this problem, either make the link relative or turn it into complete URL such as http://example.com$uri." + s" To fix this problem, either make the link relative or turn it into complete URL such as http://example.com$uri." else "" - reporter.warning(reference.pos, s"Unknown link '$uri'$hint") + reporter.warning(reference.pos, s"Unknown link '$uri'$help$hint$debug") } } + private def getSimilarHeading(candidates: Set[URI], query: URI): Option[URI] = { + val queryString = query.toString + val similar = for { + candidate <- candidates.iterator + score = FuzzySearch.ratio(queryString, candidate.toString) + if score > 90 // discard noisy candidates + } yield score -> candidate + if (similar.isEmpty) None + else Some(similar.maxBy(_._1)._2) + } private def resolve(baseUri: URI, reference: String): Option[URI] = { try { Some(baseUri.resolve(reference).normalize()) diff --git a/readme.md b/readme.md index 07a1f8e6f..fa7430dac 100644 --- a/readme.md +++ b/readme.md @@ -503,18 +503,25 @@ Before: # My title Link to [my title](#my-title). +Link to [typo section](#mytitle). Link to [old section](#doesnotexist). ```` Error: ```` -warning: readme.md:4:9: warning: Unknown link 'readme.md#doesnotexist' +warning: readme.md:4:9: warning: Unknown link 'readme.md#mytitle', did you mean 'readme.md#my-title'? +Link to [typo section](#mytitle). + ^^^^^^^^^^^^^^^^^^^^^^^^ +warning: readme.md:5:9: warning: Unknown link 'readme.md#doesnotexist'. Link to [old section](#doesnotexist). ^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ```` +Observe that mdoc suggests a fix if there exists a header that is similar to the +unknown link. + ### Script semantics Mdoc interprets code fences as normal Scala programs instead of as if they're @@ -727,6 +734,9 @@ Common options: produce a diff against an existing site. Useful for asserting in CI that a site is up-to-date. + --verbose + Include additional diagnostics for debuggin potential problems. + --classpath String (default: "") Classpath to use when compiling Scala code examples. Defaults to the current thread's classpath. diff --git a/tests/unit/src/test/scala/tests/markdown/LinkHygieneSuite.scala b/tests/unit/src/test/scala/tests/markdown/LinkHygieneSuite.scala index 13582dcc8..b74913b2e 100644 --- a/tests/unit/src/test/scala/tests/markdown/LinkHygieneSuite.scala +++ b/tests/unit/src/test/scala/tests/markdown/LinkHygieneSuite.scala @@ -13,7 +13,7 @@ import mdoc.internal.markdown.LinkHygiene class LinkHygieneSuite extends FunSuite with DiffAssertions { private val myOut = new ByteArrayOutputStream() private val reporter = new ConsoleReporter(new PrintStream(myOut)) - def check(name: String, original: String, expected: String): Unit = { + def check(name: String, original: String, expected: String, verbose: Boolean = false): Unit = { test(name) { myOut.reset() reporter.reset() @@ -22,7 +22,7 @@ class LinkHygieneSuite extends FunSuite with DiffAssertions { .default(root) .copy(reportRelativePaths = true, in = root, out = root) val links = DocumentLinks.fromGeneratedSite(settings, reporter) - LinkHygiene.lint(links, reporter) + LinkHygiene.lint(links, reporter, verbose) val obtained = fansi.Str(myOut.toString()).plainText assertNoDiffOrPrintExpected(obtained, expected) } @@ -55,10 +55,10 @@ class LinkHygieneSuite extends FunSuite with DiffAssertions { |* [name](a.md#name) | """.stripMargin, - """|warning: a.md:3:7: warning: Unknown link 'a.md#does-not-exist' + """|warning: a.md:3:7: warning: Unknown link 'a.md#does-not-exist'. |Error [link](#does-not-exist) failed. | ^^^^^^^^^^^^^^^^^^^^^^^ - |warning: a.md:4:6: warning: Unknown link 'a.md#sectionn' + |warning: a.md:4:6: warning: Unknown link 'a.md#sectionn', did you mean 'a.md#section'? |Typo [section](#sectionn) failed. | ^^^^^^^^^^^^^^^^^^^^ """.stripMargin @@ -113,4 +113,25 @@ class LinkHygieneSuite extends FunSuite with DiffAssertions { """.stripMargin ) + check( + "verbose", + """ + |/a.md + |# Header 1 + |[2](b.md#header) + |/b.md + |# Header 2 + """.stripMargin, + """|warning: a.md:2:1: warning: Unknown link 'b.md#header', did you mean 'b.md#header-2'? + |isValidHeading: + | 92 b.md#header-2 + | 83 a.md#header-1 + | 53 b.md + | 40 a.md + |[2](b.md#header) + |^^^^^^^^^^^^^^^^ + |""".stripMargin, + verbose = true + ) + }