From de7a3d775fbd6eea964bb901f2a4e49124fafac5 Mon Sep 17 00:00:00 2001 From: Olivier Bruchez Date: Sat, 9 Feb 2019 19:28:23 +0100 Subject: [PATCH] Add support for multiline clippings --- build.sbt | 4 +- project/build.properties | 2 +- project/plugins.sbt | 2 +- .../kindleclippings/KindleClippings.scala | 67 +++++++++++++------ 4 files changed, 49 insertions(+), 26 deletions(-) diff --git a/build.sbt b/build.sbt index 56da13a..f5a11a7 100644 --- a/build.sbt +++ b/build.sbt @@ -1,7 +1,7 @@ name := "Kindle clippings to Markdown" -version := "1.1" +version := "1.2" -scalaVersion := "2.12.4" +scalaVersion := "2.12.8" scalafmtOnCompile in ThisBuild := true diff --git a/project/build.properties b/project/build.properties index 247195e..1fc4b80 100644 --- a/project/build.properties +++ b/project/build.properties @@ -1 +1 @@ -sbt.version=1.0.1 \ No newline at end of file +sbt.version=1.2.8 \ No newline at end of file diff --git a/project/plugins.sbt b/project/plugins.sbt index 3ad5362..0451f44 100644 --- a/project/plugins.sbt +++ b/project/plugins.sbt @@ -2,5 +2,5 @@ logLevel := Level.Warn resolvers += Resolver.sonatypeRepo("releases") -addSbtPlugin("com.lucidchart" % "sbt-scalafmt" % "1.14") +addSbtPlugin("com.lucidchart" % "sbt-scalafmt" % "1.16") addSbtPlugin("org.scalastyle" %% "scalastyle-sbt-plugin" % "1.0.0") diff --git a/src/main/scala/org/bruchez/olivier/kindleclippings/KindleClippings.scala b/src/main/scala/org/bruchez/olivier/kindleclippings/KindleClippings.scala index 9edfbd0..8b5c1bb 100644 --- a/src/main/scala/org/bruchez/olivier/kindleclippings/KindleClippings.scala +++ b/src/main/scala/org/bruchez/olivier/kindleclippings/KindleClippings.scala @@ -42,34 +42,57 @@ object KindleClippings { } def apply(lines: List[String]): KindleClippings = { - val LinesPerBook = 5 + val MinLinesPerBook = 5 - val clippingsByBook = mutable.HashMap[Book, Vector[Clipping]]() + @annotation.tailrec + def clippingsByBook(remainingLines: List[String], + acc: List[(Book, Clipping)] = List()): Seq[(Book, Clipping)] = + if (remainingLines.size < MinLinesPerBook) { + acc.reverse + } else { + val title :: pageOrLocation :: empty :: clippingContentsAndRemainingLines = remainingLines - for { - title :: pageOrlocation :: empty :: clippingContents :: separator :: Nil <- lines.grouped( - LinesPerBook) - trimmedTitle = title.trim.replaceAll("\uFEFF", "") - trimmedClippingContents = clippingContents.trim - if trimmedClippingContents.nonEmpty - book = Book(trimmedTitle) - clippingsForBook = clippingsByBook.getOrElse(book, Vector[Clipping]()) - pageOption = pageOrlocation match { - case Page(page) => Try(page.toInt).toOption - case _ => None - } - locationOption = pageOrlocation match { - case Location(location) => Some(location) - case _ => None + val newRemainingLines = + clippingContentsAndRemainingLines.dropWhile(line => !separator(line)).tail + + val clippingContents = clippingContentsAndRemainingLines + .take(clippingContentsAndRemainingLines.size - newRemainingLines.size - 1) + .map(_.trim) + .dropWhile(_.isEmpty) + .reverse + .dropWhile(_.isEmpty) + .reverse + + val newAcc = + if (clippingContents.isEmpty) { + acc + } else { + val clippingContentsAsString = clippingContents.mkString("\n") + + val trimmedTitle = title.trim.replaceAll("\uFEFF", "") + val book = Book(trimmedTitle) + val pageOption = pageOrLocation match { + case Page(page) => Try(page.toInt).toOption + case _ => None + } + val locationOption = pageOrLocation match { + case Location(location) => Some(location) + case _ => None + } + val clipping = Clipping(clippingContentsAsString, pageOption, locationOption) + + (book -> clipping) :: acc + } + + clippingsByBook(newRemainingLines, acc = newAcc) } - clipping = Clipping(trimmedClippingContents, pageOption, locationOption) - } { - clippingsByBook.update(book, clippingsForBook :+ clipping) - } - KindleClippings(Map(clippingsByBook.toSeq.map(kv => kv._1 -> kv._2.distinct): _*)) + KindleClippings( + clippingsByBook(lines).groupBy(_._1).map(kv => kv._1 -> kv._2.map(_._2).distinct)) } + private def separator(string: String): Boolean = string.trim.toSet == Set('=') + private val Page = """.*[Pp]age (\d+) .*""".r private val Location = """.*Loc(?:\.|ation) ([^ ]+) .*""".r