From 726efc9ec1b275a8ce5afaa0d4e5afa4f26d2b75 Mon Sep 17 00:00:00 2001
From: heitorbarbieri
Date: Fri, 3 Apr 2020 13:44:18 -0300
Subject: [PATCH] version 3.0.0 issues #8 #9 #10 #12 #13 #14
---
AntesDoAssembly.txt | 0
COPYRIGHT.md | 0
DeCSHighlighter.html | 0
README.md | 0
build.sbt | 21 +-
dicas.txt | 0
project/assembly.sbt | 1 -
project/build.properties | 2 +-
project/build.sbt | 0
project/license.sbt | 0
project/metals.sbt | 4 +
project/plugins.sbt | 4 +-
src/main/scala/org/bireme/dh/CharSeq.scala | 2 +-
.../scala/org/bireme/dh/DeCS2Lucene.scala | 186 ++++++++++
.../org/bireme/dh/HighlightServlet.scala | 81 +++--
.../org/bireme/dh/HighlightWebServlet.scala | 145 +++++++-
.../scala/org/bireme/dh/Highlighter.scala | 328 ++++++++++++++----
.../scala/org/bireme/dh/Str2HexFile.scala | 0
src/main/scala/org/bireme/dh/Tools.scala | 4 +-
src/main/webapp/META-INF/context.xml | 0
src/main/webapp/WEB-INF/web.xml | 4 +-
src/main/webapp/index.html | 27 +-
.../org/bireme/test/dh/HighlighterTest.scala | 61 ++--
.../org/bireme/test/dh/HighlighterTest2.scala | 159 +++++++++
24 files changed, 869 insertions(+), 160 deletions(-)
mode change 100644 => 100755 AntesDoAssembly.txt
mode change 100644 => 100755 COPYRIGHT.md
mode change 100644 => 100755 DeCSHighlighter.html
mode change 100644 => 100755 README.md
mode change 100644 => 100755 build.sbt
mode change 100644 => 100755 dicas.txt
delete mode 100755 project/assembly.sbt
mode change 100644 => 100755 project/build.properties
mode change 100644 => 100755 project/build.sbt
mode change 100644 => 100755 project/license.sbt
create mode 100755 project/metals.sbt
mode change 100644 => 100755 project/plugins.sbt
mode change 100644 => 100755 src/main/scala/org/bireme/dh/CharSeq.scala
create mode 100755 src/main/scala/org/bireme/dh/DeCS2Lucene.scala
mode change 100644 => 100755 src/main/scala/org/bireme/dh/HighlightServlet.scala
mode change 100644 => 100755 src/main/scala/org/bireme/dh/HighlightWebServlet.scala
mode change 100644 => 100755 src/main/scala/org/bireme/dh/Highlighter.scala
mode change 100644 => 100755 src/main/scala/org/bireme/dh/Str2HexFile.scala
mode change 100644 => 100755 src/main/scala/org/bireme/dh/Tools.scala
mode change 100644 => 100755 src/main/webapp/META-INF/context.xml
mode change 100644 => 100755 src/main/webapp/WEB-INF/web.xml
mode change 100644 => 100755 src/main/webapp/index.html
mode change 100644 => 100755 src/test/scala/org/bireme/test/dh/HighlighterTest.scala
create mode 100644 src/test/scala/org/bireme/test/dh/HighlighterTest2.scala
diff --git a/AntesDoAssembly.txt b/AntesDoAssembly.txt
old mode 100644
new mode 100755
diff --git a/COPYRIGHT.md b/COPYRIGHT.md
old mode 100644
new mode 100755
diff --git a/DeCSHighlighter.html b/DeCSHighlighter.html
old mode 100644
new mode 100755
diff --git a/README.md b/README.md
old mode 100644
new mode 100755
diff --git a/build.sbt b/build.sbt
old mode 100644
new mode 100755
index 75495aa..818811a
--- a/build.sbt
+++ b/build.sbt
@@ -2,23 +2,25 @@ name := "DeCSHighlighter"
version := "0.1"
-scalaVersion := "2.13.1" // "2.12.9"
+scalaVersion := "2.13.1"
-val circeVersion = "0.13.0-M2" //"0.12.0-M4" //"0.11.1" //"0.10.0"
+val playJsonVersion = "2.8.1"
val scalajVersion = "2.4.2" //"2.4.1"
val servletApiVersion = "4.0.1" //"3.0.1"
//val hairyfotrVersion = "0.1.17"
-val scalaTestVersion = "3.2.0-M2" //"3.1.0-SNAP13" //"3.0.8" //"3.0.7"
+val scalaTestVersion = "3.3.0-SNAP2" //"3.2.0-M2"
val supersafeVersion = "1.1.7"
+val luceneVersion = "8.5.0" //"8.4.1"
libraryDependencies ++= Seq(
- "io.circe" %% "circe-core" % circeVersion,
- "io.circe" %% "circe-generic" % circeVersion,
- "io.circe" %% "circe-parser" % circeVersion,
+ "com.typesafe.play" %% "play-json" % playJsonVersion,
"org.scalaj" %% "scalaj-http" % scalajVersion,
"javax.servlet" % "javax.servlet-api" % servletApiVersion % "provided",
+ "org.apache.lucene" % "lucene-core" % luceneVersion,
+ "org.apache.lucene" % "lucene-analyzers-common" % luceneVersion,
"org.scalactic" %% "scalactic" % scalaTestVersion,
"org.scalatest" %% "scalatest" % scalaTestVersion % "test"
+
//"com.artima.supersafe" %% "supersafe" % supersafeVersion
)
@@ -38,3 +40,10 @@ javaOptions in Jetty ++= Seq(
)
containerPort := 7171
+
+assemblyMergeStrategy in assembly := {
+ case "module-info.class" => MergeStrategy.discard
+ case x =>
+ val oldStrategy = (assemblyMergeStrategy in assembly).value
+ oldStrategy(x)
+}
diff --git a/dicas.txt b/dicas.txt
old mode 100644
new mode 100755
diff --git a/project/assembly.sbt b/project/assembly.sbt
deleted file mode 100755
index 26ac3e5..0000000
--- a/project/assembly.sbt
+++ /dev/null
@@ -1 +0,0 @@
-addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10")
diff --git a/project/build.properties b/project/build.properties
old mode 100644
new mode 100755
index 04c59a2..a919a9b
--- a/project/build.properties
+++ b/project/build.properties
@@ -1 +1 @@
-sbt.version = 1.3.3
+sbt.version=1.3.8
diff --git a/project/build.sbt b/project/build.sbt
old mode 100644
new mode 100755
diff --git a/project/license.sbt b/project/license.sbt
old mode 100644
new mode 100755
diff --git a/project/metals.sbt b/project/metals.sbt
new file mode 100755
index 0000000..ab498bf
--- /dev/null
+++ b/project/metals.sbt
@@ -0,0 +1,4 @@
+// DO NOT EDIT! This file is auto-generated.
+// This file enables sbt-bloop to create bloop config files.
+
+addSbtPlugin("ch.epfl.scala" % "sbt-bloop" % "1.4.0-RC1")
diff --git a/project/plugins.sbt b/project/plugins.sbt
old mode 100644
new mode 100755
index a99c689..11e12e0
--- a/project/plugins.sbt
+++ b/project/plugins.sbt
@@ -1 +1,3 @@
-addSbtPlugin("com.earldouglas" % "xsbt-web-plugin" % "4.0.2")
+addSbtPlugin("com.earldouglas" % "xsbt-web-plugin" % "4.2.0")
+addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10")
+
diff --git a/src/main/scala/org/bireme/dh/CharSeq.scala b/src/main/scala/org/bireme/dh/CharSeq.scala
old mode 100644
new mode 100755
index b5a9ad5..6f51a1f
--- a/src/main/scala/org/bireme/dh/CharSeq.scala
+++ b/src/main/scala/org/bireme/dh/CharSeq.scala
@@ -23,5 +23,5 @@ case class CharSeq(ch: Char,
other: mutable.Buffer[CharSeq], // Changed from Set because Memory overflow error
id: StringBuilder)
object CharSeq {
- def apply(ch: Char): CharSeq = new CharSeq(ch, mutable.Buffer[CharSeq](), new mutable.StringBuilder())
+ def apply(ch: Char): CharSeq = new CharSeq(ch, mutable.ListBuffer[CharSeq](), new mutable.StringBuilder(0))
}
diff --git a/src/main/scala/org/bireme/dh/DeCS2Lucene.scala b/src/main/scala/org/bireme/dh/DeCS2Lucene.scala
new file mode 100755
index 0000000..cb800e9
--- /dev/null
+++ b/src/main/scala/org/bireme/dh/DeCS2Lucene.scala
@@ -0,0 +1,186 @@
+package org.bireme.dh
+
+import java.io.File
+import java.nio.file.Path
+
+import bruma.master.{Master, MasterFactory, Record}
+import org.apache.lucene.analysis.core.KeywordAnalyzer
+import org.apache.lucene.document.{Document, Field, StringField}
+import org.apache.lucene.index.{IndexWriter, IndexWriterConfig, IndexableField}
+import org.apache.lucene.store.{Directory, FSDirectory}
+import org.bireme.dh.Tools.uniformString
+
+import scala.jdk.CollectionConverters._
+
+object DeCS2Lucene extends App {
+ private def usage(): Unit = {
+ System.err.println("usage: DeCS2Lucene ")
+ System.err.println("options:")
+ System.err.println("-isis= : path to DeCS Isis master")
+ System.err.println("-lucene= : path to Lucene DeCS index to be created")
+ }
+ if (args.length < 2) usage()
+
+ val parameters = args.foldLeft[Map[String,String]](Map()) {
+ case (map, par) =>
+ val split = par.split(" *= *", 2)
+ split.length match {
+ case 1 => map + ((split(0).substring(2), ""))
+ case _ => map + ((split(0).substring(1), split(1)))
+ }
+ }
+
+ create(parameters("isis"), parameters("lucene"))
+
+ val stopwords = Set("la", "foram", "amp", "www") // are common words and have other meanings in other languages
+
+
+ def create(isisPath: String,
+ lucenePath: String): Unit = {
+ val mst: Master = MasterFactory.getInstance(isisPath).open()
+
+ val analyzer: KeywordAnalyzer = new KeywordAnalyzer()
+ val indexPath: Path = new File(lucenePath).toPath
+ val directory: Directory = FSDirectory.open(indexPath)
+ val config: IndexWriterConfig = new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE)
+ val iwriter: IndexWriter = new IndexWriter(directory, config)
+
+ mst.iterator().asScala.foreach(createDocuments(_, iwriter))
+ mst.close()
+ iwriter.forceMerge(1)
+ iwriter.close()
+ directory.close()
+ }
+
+ def createDocuments(rec: Record,
+ writer: IndexWriter): Unit = {
+ if (rec.getStatus == Record.Status.ACTIVE) {
+ val id: String = getField(rec, 99).head._2
+ val uid: String = getField(rec, 480).headOption.getOrElse(("",""))._2
+ val publType: String = getField(rec, 105) match {
+ case set if set.isEmpty => ""
+ case set => set.head._2
+ }
+ val preCod: Boolean = isPreCod(rec)
+ val ptDoc: Seq[Document] = getPtDocument(id, uid, publType, preCod, rec)
+ val esDoc: Seq[Document] = getEsDocument(id, uid, publType, preCod, rec)
+ val enDoc: Seq[Document] = getEnDocument(id, uid, publType, preCod, rec)
+ val frDoc: Seq[Document] = getFrDocument(id, uid, publType, preCod, rec)
+
+ ptDoc.foreach(writer.addDocument)
+ esDoc.foreach(writer.addDocument)
+ enDoc.foreach(writer.addDocument)
+ frDoc.foreach(writer.addDocument)
+ }
+ }
+
+ private def isPreCod(rec: Record): Boolean = getField(rec, fldTag=106).exists(x => x._2.trim.head.equals('c'))
+
+ private def getEnDocument(id: String,
+ uid: String,
+ publType: String,
+ preCod: Boolean,
+ rec: Record): Seq[Document] = {
+ getDocument(id, uid, 1, 'i', "en", publType, preCod, rec)
+ }
+
+ private def getEsDocument(id: String,
+ uid: String,
+ publType: String,
+ preCod: Boolean,
+ rec: Record): Seq[Document] = {
+ getDocument(id, uid, 2, 'e', "es", publType, preCod, rec)
+ }
+
+ private def getPtDocument(id: String,
+ uid: String,
+ publType: String,
+ preCod: Boolean,
+ rec: Record): Seq[Document] = {
+ getDocument(id, uid, 3, 'p', "pt", publType, preCod, rec)
+ }
+
+ private def getFrDocument(id: String,
+ uid: String,
+ publType: String,
+ preCod: Boolean,
+ rec: Record): Seq[Document] = {
+ getDocument(id, uid, 16, 'f', "fr", publType, preCod, rec)
+ }
+
+ private def getDocument(id: String,
+ uid: String,
+ field: Int,
+ subFld: Char,
+ lang: String,
+ publType: String,
+ preCod: Boolean,
+ rec: Record): Seq[Document] = {
+ val descr: Seq[(IndexableField, IndexableField)] = getDescriptors(field, rec)
+ val synonyms: Seq[(IndexableField, IndexableField)] = getSynonyms(subFld, rec)
+
+ descr.map {
+ fld =>
+ val doc = new Document()
+ doc.add(new StringField("id", id, Field.Store.YES))
+ doc.add(new StringField("uniqueId", uid, Field.Store.YES))
+ doc.add(new StringField("publicationType", publType, Field.Store.YES))
+ doc.add(new StringField("preCod", if (preCod) "t" else "f", Field.Store.YES))
+ doc.add(new StringField("lang", lang, Field.Store.YES))
+ doc.add(new StringField("termType", "descriptor", Field.Store.YES))
+ doc.add(fld._1)
+ doc.add(fld._2)
+ doc
+ } ++
+ synonyms.map {
+ fld =>
+ val doc = new Document()
+ doc.add(new StringField("id", id, Field.Store.YES))
+ doc.add(new StringField("uniqueId", uid, Field.Store.YES))
+ doc.add(new StringField("publicationType", publType, Field.Store.YES))
+ doc.add(new StringField("preCod", if (preCod) "t" else "f", Field.Store.YES))
+ doc.add(new StringField("lang", lang, Field.Store.YES))
+ doc.add(new StringField("termType", "synonym", Field.Store.YES))
+ doc.add(fld._1)
+ doc.add(fld._2)
+ doc
+ }
+ }
+
+ private def getDescriptors(field: Int,
+ rec: Record): Seq[(IndexableField, IndexableField)] = {
+ getField(rec, field).map {
+ x => (
+ new StringField("term", x._1, Field.Store.YES),
+ new StringField("term_normalized", x._2, Field.Store.YES)
+ )
+ }
+ }
+
+ private def getSynonyms(subFld: Char,
+ rec: Record): Seq[(IndexableField, IndexableField)] = {
+ rec.getFieldList(50).asScala.flatMap(_.getTagSubfields(subFld).asScala.map {
+ fld =>
+ val content: String = fld.getContent
+ (
+ new StringField("term", content, Field.Store.YES),
+ new StringField("term_normalized", uniformString(content), Field.Store.YES)
+ )
+ }).toSeq
+ }
+
+ /**
+ * Retrieve a record field content from an Isis database
+ * @param rec the Isis database record object
+ * @param fldTag the tag of the field to be retrieved
+ * @return a sequence of (field content,field content normalized)
+ */
+ private def getField(rec: Record,
+ fldTag: Int): Seq[(String, String)] = {
+ rec.getFieldList(fldTag).asScala.foldLeft(Seq[(String, String)]()) {
+ case (seq, fld) =>
+ val content: String = fld.getContent
+ seq :+ (content, uniformString(content))
+ }
+ }
+}
diff --git a/src/main/scala/org/bireme/dh/HighlightServlet.scala b/src/main/scala/org/bireme/dh/HighlightServlet.scala
old mode 100644
new mode 100755
index 0ddfb50..1143286
--- a/src/main/scala/org/bireme/dh/HighlightServlet.scala
+++ b/src/main/scala/org/bireme/dh/HighlightServlet.scala
@@ -9,7 +9,8 @@ package org.bireme.dh
import java.io.PrintWriter
-import io.circe.Json
+import play.api.libs.json._
+
import javax.servlet.ServletConfig
import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
@@ -22,7 +23,7 @@ import scala.collection.mutable
* date: September - 2018
*/
class HighlightServlet extends HttpServlet {
- var tree: Map[Char, CharSeq] = _
+ var highlighter: Highlighter = _
/**
* Do initial web app configuration
@@ -32,13 +33,10 @@ class HighlightServlet extends HttpServlet {
super.init(config)
val decsPath: String = config.getServletContext.getInitParameter("DECS_PATH")
- if ((decsPath == null) || decsPath.isEmpty )
+ if ((decsPath == null) || decsPath.isEmpty)
throw new NullPointerException(s"DECS_PATH = $decsPath")
- val terms: Predef.Map[String, String] = Tools.decs2Set(decsPath)
-
- tree = Highlighter.createTermTree(terms)
-
+ highlighter = new Highlighter(decsPath)
println("HighlightServlet is listening ...")
}
@@ -70,39 +68,68 @@ class HighlightServlet extends HttpServlet {
if ((doc == null) || doc.isEmpty ) response.sendError(400, "Missing document parameter")
else {
// Parse parameters
- val prefix0 = request.getParameter("prefix")
- val suffix0 = request.getParameter("suffix")
+ val prefix0: String = request.getParameter("prefix")
+ val suffix0: String = request.getParameter("suffix")
+
+ val scanLang: Option[String] = Option(request.getParameter("scanLang")).flatMap {
+ opt => opt match {
+ case "en" | "es" | "pt" | "fr" => Some(opt)
+ case _ => None
+ }
+ }
+ val outLang: Option[String] = Option(request.getParameter("outLang")).flatMap {
+ opt => opt match {
+ case "en" | "es" | "pt" | "fr" => Some(opt)
+ case _ => None
+ }
+ }
+ val pubType: Option[Char] = Option(request.getParameter("pubType")).map(_.trim.toLowerCase.charAt(0)).flatMap {
+ opt => opt match {
+ case 'h' | 'q' | 't' => Some(opt)
+ case _ => None
+ }
+ }
+ val scanDescriptors: Boolean = true
+ val scanSynonyms: Boolean = Option(request.getParameter("scanSynonyms"))
+ .map(x => x.isEmpty || (x.toLowerCase.head == 't')).getOrElse(true)
+ val onlyPreCod: Boolean = Option(request.getParameter("onlyPreCod"))
+ .map(x => x.isEmpty || (x.toLowerCase.head == 't')).getOrElse(false)
+ val conf: Config = Config(scanLang, outLang, pubType, scanDescriptors, scanSynonyms, onlyPreCod)
+
val prefix = if ((prefix0 == null) || prefix0.isEmpty ) "" else prefix0
val suffix = if ((suffix0 == null) || suffix0.isEmpty ) "" else suffix0
- val sText: String = request.getParameter("showText")
- val sPositions: String = request.getParameter("showPositions")
- val sDescriptors: String = request.getParameter("showDescriptors")
- val showText: Boolean = (sText != null) && (sText.isEmpty || sText.toBoolean)
- val showPositions: Boolean = (sPositions != null) && (sPositions.isEmpty || sPositions.toBoolean)
- val showDescriptors: Boolean = (sDescriptors != null) && (sDescriptors.isEmpty || sDescriptors.toBoolean)
+ val showText: Boolean = Option(request.getParameter("showText"))
+ .map(x => x.isEmpty || (x.toLowerCase.head == 't')).getOrElse(true)
+ val showPositions: Boolean = Option(request.getParameter("showPositions"))
+ .map(x => x.isEmpty || (x.toLowerCase.head == 't')).getOrElse(true)
+ val showDescriptors: Boolean = Option(request.getParameter("showDescriptors"))
+ .map(x => x.isEmpty || (x.toLowerCase.head == 't')).getOrElse(true)
+
+ println(s"scanSynonyms=$scanSynonyms showText=$showText showPositions=$showPositions showDescriptors=$showDescriptors")
// Highlight the input text
- val (marked: String, seq: Seq[(Int, Int, String, String)], set: Seq[String]) = Highlighter.highlight(prefix, suffix, doc, tree)
- val result: mutable.Buffer[(String, Json)] = mutable.Buffer[(String, Json)]()
+ val (marked: String, seq: Seq[(Int, Int, String, String)], set: Seq[String]) =
+ highlighter.highlight(prefix, suffix, doc, conf)
+ val result: mutable.Map[String, JsValue] = mutable.Map[String, JsValue]()
// Show all output (text, positions and descriptors) if the showText, showPositions and showDescriptors parameters
// are absent.
if (!showText && !showPositions && !showDescriptors) {
- result += "text" -> Json.fromString(marked)
- result += ("positions" -> Json.fromValues(seq.map(
- elem => Json.obj("begin" -> Json.fromInt(elem._1), "end" -> Json.fromInt(elem._2),
- "id" -> Json.fromString(elem._3), "descriptor" -> Json.fromString(elem._4)))))
- result += ("descriptors" -> Json.fromValues(set.map(d => Json.fromString(d))))
+ result += "text" -> JsString(marked)
+ result += "positions" -> JsArray(seq.map(
+ elem => JsObject(Map("begin" -> JsNumber(elem._1), "end" -> JsNumber(elem._2), "id" -> JsString(elem._3),
+ "descriptor" -> JsString(elem._4)))))
+ result += ("descriptors" -> JsArray(set.map(d => JsString(d))))
} else {
- if (showText) result += "text" -> Json.fromString(marked)
- if (showPositions) result += ("positions" -> Json.fromValues(seq.map(
- elem => Json.obj("begin" -> Json.fromInt(elem._1), "end" -> Json.fromInt(elem._2), "id" -> Json.fromString(elem._3)))))
- if (showDescriptors) result += ("descriptors" -> Json.fromValues(set.map(d => Json.fromString(d))))
+ if (showText) result += "text" -> JsString(marked)
+ if (showPositions) result += "positions" -> JsArray(seq.map(
+ elem => JsObject(Map("begin" -> JsNumber(elem._1), "end" -> JsNumber(elem._2), "id" -> JsString(elem._3)))))
+ if (showDescriptors) result += "descriptors" -> JsArray(set.map(d => JsString(d)))
}
response.setContentType("application/json")
// Transform the json object into a String and print it
- val resultStr = Json.obj(result.toSeq: _*).spaces2
+ val resultStr = Json.stringify(JsObject(result))
val writer: PrintWriter = response.getWriter
writer.write(resultStr)
writer.close()
diff --git a/src/main/scala/org/bireme/dh/HighlightWebServlet.scala b/src/main/scala/org/bireme/dh/HighlightWebServlet.scala
old mode 100644
new mode 100755
index 0dcd947..d66715e
--- a/src/main/scala/org/bireme/dh/HighlightWebServlet.scala
+++ b/src/main/scala/org/bireme/dh/HighlightWebServlet.scala
@@ -19,8 +19,7 @@ import javax.servlet.http.{HttpServlet, HttpServletRequest, HttpServletResponse}
* date: September - 2018
**/
class HighlightWebServlet extends HttpServlet {
-
- var tree: Map[Char, CharSeq] = _
+ var highlighter: Highlighter = _
/**
* Do initial web app configuration
@@ -33,9 +32,7 @@ class HighlightWebServlet extends HttpServlet {
if ((decsPath == null) || decsPath.isEmpty )
throw new NullPointerException("DECS_PATH")
- val terms: Predef.Map[String, String] = Tools.decs2Set(decsPath)
-
- tree = Highlighter.createTermTree(terms)
+ highlighter = new Highlighter(decsPath)
println("HighlightWebServlet is listening ...")
}
@@ -70,8 +67,20 @@ class HighlightWebServlet extends HttpServlet {
val result: String = if ((doc == null) || doc.isEmpty ) {
html.replace("{{text}}", "").replace("{{descriptors}}", "")
} else {
- val (marked: String, _, set: Seq[String]) = Highlighter.highlight(presu, doc, tree)
- html.replace("{{text}}", marked).replace("{{descriptors}}", set.mkString("\n"))
+ val doc1 = doc.replaceAll("]+?>([^<].+?)", "$1")
+ /*request.getParameterMap.asScala.foreach {
+ case (k,v) => println(s"$k=${v.headOption.getOrElse("")}")
+ }*/
+ val scanLang: Option[String] = Option(request.getParameter("scanLang")).flatMap(x => if ("same".equals(x)) None else Some(x))
+ val outLang: Option[String] = Option(request.getParameter("outLang")).flatMap(x => if ("same".equals(x)) None else Some(x))
+ val pubType: Option[Char] = Option(request.getParameter("pubType")).flatMap(x => if ("all".equals(x)) None else x.headOption)
+ val scanDescriptors: Boolean = true
+ val scanSynonyms: Boolean = Option(request.getParameter("scanSynonyms")).exists("true".equals)
+ val onlyPreCod: Boolean = Option(request.getParameter("onlyPreCod")).exists("true".equals)
+ val conf = Config(scanLang, outLang, pubType, scanDescriptors, scanSynonyms, onlyPreCod)
+ val (marked: String, _, set: Seq[String]) = highlighter.highlight(presu, doc1, conf)
+
+ restoreState(html, conf).replace("{{text}}", marked).replace("{{descriptors}}", set.mkString("\n"))
}
response.setCharacterEncoding("UTF-8")
response.setContentType("text/html;charset=UTF-8")
@@ -81,6 +90,39 @@ class HighlightWebServlet extends HttpServlet {
writer.close()
}
+ private def restoreState(htm: String,
+ conf: Config): String = {
+ val conv = Map("en" -> "english", "es" -> "spanish", "pt" -> "portuguese", "fr" -> "french")
+
+ val h1 = conf.scanLang.map {
+ lang =>
+ val lang1 = conv.getOrElse(lang, "")
+ htm.replace("option id=\"scanLang\" value=\"" + lang + "\">" + lang1 + "",
+ "option id=\"scanLang\" value=\"" + lang + "\" selected>" + lang1 + "")
+ }.getOrElse(htm)
+ val h2 = conf.outLang.map {
+ lang =>
+ val lang1 = conv.getOrElse(lang, "")
+ h1.replace("option id=\"outLang\" value=\"" + lang + "\">" + lang1 + "",
+ "option id=\"outLang\" value=\"" + lang + "\" selected>" + lang1 + "")
+ }.getOrElse(h1)
+ val h3 = conf.pubType.map {
+ ch =>
+ h2.replace("",
+ "")
+ }.getOrElse(h2)
+ val h4 = if (conf.scanSynonyms) {
+ h3.replace("",
+ "")
+ } else h3
+ val h5 = if (conf.onlyPreCod) {
+ h4.replace("",
+ "")
+ } else h4
+
+ h5
+ }
+
// Html that will be shown by the application
val html: String =
"""
@@ -178,11 +220,56 @@ class HighlightWebServlet extends HttpServlet {
| var form = document.createElement("form");
| form.setAttribute("method", "post");
| form.setAttribute("action", "#");
+ |
| var hiddenField = document.createElement("input");
| hiddenField.setAttribute("type", "hidden");
| hiddenField.setAttribute("name", "document");
| hiddenField.setAttribute("value", strip);
| form.appendChild(hiddenField);
+ |
+ | var e1 = document.getElementById("scanLang");
+ | var option1 = e1.options[e1.selectedIndex];
+ | var data1 = option1.getAttribute("value");
+ | var hiddenField1 = document.createElement("input");
+ | hiddenField1.setAttribute("type", "hidden");
+ | hiddenField1.setAttribute("name", "scanLang");
+ | hiddenField1.setAttribute("value", data1);
+ | form.appendChild(hiddenField1);
+ |
+ | var e2 = document.getElementById("outLang");
+ | var option2 = e2.options[e2.selectedIndex];
+ | var data2 = option2.getAttribute("value");
+ | var hiddenField2 = document.createElement("input");
+ | hiddenField2.setAttribute("type", "hidden");
+ | hiddenField2.setAttribute("name", "outLang");
+ | hiddenField2.setAttribute("value", data2);
+ | form.appendChild(hiddenField2);
+ |
+ | var e3 = document.getElementById("pubType");
+ | var option3 = e3.options[e3.selectedIndex];
+ | var data3 = option3.getAttribute("value");
+ | var hiddenField3 = document.createElement("input");
+ | hiddenField3.setAttribute("type", "hidden");
+ | hiddenField3.setAttribute("name", "pubType");
+ | hiddenField3.setAttribute("value", data3);
+ | form.appendChild(hiddenField3);
+ |
+ | var e4 = document.getElementById("scanSynonyms");
+ | var checked4 = e4.checked
+ | var hiddenField4 = document.createElement("input");
+ | hiddenField4.setAttribute("type", "hidden");
+ | hiddenField4.setAttribute("name", "scanSynonyms");
+ | hiddenField4.setAttribute("value", checked4.toString());
+ | form.appendChild(hiddenField4);
+ |
+ | var e5 = document.getElementById("onlyPreCod");
+ | var checked5 = e5.checked
+ | var hiddenField5 = document.createElement("input");
+ | hiddenField5.setAttribute("type", "hidden");
+ | hiddenField5.setAttribute("name", "onlyPreCod");
+ | hiddenField5.setAttribute("value", checked5.toString());
+ | form.appendChild(hiddenField5);
+ |
| document.body.appendChild(form);
| form.submit();
| }
@@ -197,7 +284,49 @@ class HighlightWebServlet extends HttpServlet {
|
|
|
- |
Where:
- text - outputs the highlighted input text
- positions - indicates the begin and the end positions for each descriptor/synonym in the input text and its DeCs indentifier
- descriptors - shows each descriptor/synonym found.
+ text - shows the highlighted input text.
+ positions - shows the begin and the end positions for each descriptor/synonym in the input text, the DeCs indentifier and DeCS descriptor/synonym.
+ descriptors - shows each descriptor found.
Example application:
- Description: A web application that displays the highlighted input document.
- Path: http://<host>:<port>/app
+ A web application that displays the highlighted input document.
DeCSHighlighter © Pan American Health Organization, 2018.
-See License at: https://github.com/bireme/DecsHighlighter/blob/master/LICENSE.txt
+See License at: https://github.com/bireme/DecsHighlighter/blob/master/LICENSE.txt