diff --git a/parser/src/main/scala/org/globalnames/parser/Parser.scala b/parser/src/main/scala/org/globalnames/parser/Parser.scala index 926d29a..ecc36af 100644 --- a/parser/src/main/scala/org/globalnames/parser/Parser.scala +++ b/parser/src/main/scala/org/globalnames/parser/Parser.scala @@ -78,7 +78,6 @@ class Parser(preprocessorResult: Preprocessor.Result, val r = FactoryAST.name(uninomial = uninomial1M, species = sp.some, infraspecies = ig) - .changeWarningsRef((n1M.node, n2M.node), (n1M.node.uninomial, uninomial1M.node)) (hc, r.some) case Right((hc, n)) => (hc, n) } @@ -136,7 +135,6 @@ class Parser(preprocessorResult: Preprocessor.Result, val u1 = FactoryAST.uninomial(u) val nm = FactoryAST.name(uninomial = u1, species = s, comparison = c.some) nm.add(warnings = Seq((3, "Name comparison"))) - .changeWarningsRef((u.node, u1.node)) } } @@ -165,9 +163,9 @@ class Parser(preprocessorResult: Preprocessor.Result, infrOpt <- lift(maybeInfraspeciesGroupM) } yield Name(u1, comparison = cmp, species = species.some, infraspecies = infrOpt) - nm.changeWarningsRef((ssM.node, uM1.node)) + nm } - name.changeWarningsRef((uwM.node, uM1.node)) + name } } @@ -276,7 +274,7 @@ class Parser(preprocessorResult: Preprocessor.Result, (uninomial ~ softSpace ~ rankUninomial ~ softSpace ~ uninomial) ~> { (u1M: NodeMeta[Uninomial], rM: NodeMeta[Rank], u2M: NodeMeta[Uninomial]) => val r = for { u1 <- u1M; r <- rM; u2 <- u2M } yield u2.copy(rank = r.some, parent = u1.some) - r.changeWarningsRef((u2M.node, r.node)) + r } } @@ -284,7 +282,7 @@ class Parser(preprocessorResult: Preprocessor.Result, uninomialWord ~ (space ~ authorship).? ~> { (uM: NodeMeta[UninomialWord], aM: Option[NodeMeta[Authorship]]) => val r = for { u <- uM; a <- lift(aM) } yield Uninomial(u, a) - r.changeWarningsRef((uM.node, r.node)) + r } } @@ -324,7 +322,7 @@ class Parser(preprocessorResult: Preprocessor.Result, (uwM: NodeMeta[UninomialWord], wM: NodeMeta[UninomialWord]) => val uw1M = for { uw <- uwM; w <- wM } yield uw.copy(pos = CapturePosition(uw.pos.start, w.pos.end)) - uw1M.changeWarningsRef((uwM.node, uw1M.node), (wM.node, uw1M.node)) + uw1M } } @@ -429,8 +427,6 @@ class Parser(preprocessorResult: Preprocessor.Result, yield bau.copy(authors = authors1) bau1M.add(warnings = Seq((2, "Ex authors are not required"))) - .changeWarningsRef((bauM.node.authors, authors1M.node), (bauM.node, bau1M.node), - (exM.node, bau1M.node)) } } @@ -442,8 +438,7 @@ class Parser(preprocessorResult: Preprocessor.Result, val bau1M = for { bau <- bauM; authors1 <- authors1M; _ <- emendM } yield bau.copy(authors = authors1) - bau1M.changeWarningsRef((bauM.node.authors, authors1M.node), (bauM.node, bau1M.node), - (emendauM.node, bau1M.node)) + bau1M } } @@ -452,7 +447,7 @@ class Parser(preprocessorResult: Preprocessor.Result, (bauM: NodeMeta[Authorship], cauM: NodeMeta[Authorship]) => val r = for { bau <- bauM; cau <- cauM } yield bau.copy(combination = cau.authors.some, basionymParsed = true) - r.changeWarningsRef((bauM.node, r.node)) + r } } @@ -462,7 +457,6 @@ class Parser(preprocessorResult: Preprocessor.Result, val authors1 = aM.map { a => a.copy(authors = a.authors.copy(year = yM.node.some)) } FactoryAST.authorship(authors = authors1, inparenthesis = true, basionymParsed = true) .add(warnings = Seq((2, "Misformed basionym year"))) - .changeWarningsRef((aM.node, authors1.node)) } } @@ -474,7 +468,7 @@ class Parser(preprocessorResult: Preprocessor.Result, '(' ~ softSpace ~ authorship1 ~ softSpace ~ ')' ~> { (aM: NodeMeta[Authorship]) => val r = aM.map { a => a.copy(basionymParsed = true, inparenthesis = true) } - r.changeWarningsRef((aM.node, r.node)) + r } } @@ -483,12 +477,20 @@ class Parser(preprocessorResult: Preprocessor.Result, (aM: NodeMeta[Authorship]) => val r = aM.map { a => a.copy(basionymParsed = true, inparenthesis = true) } r.add(warnings = Seq((3, "Authroship in double parentheses"))) - .changeWarningsRef((aM.node, r.node)) } } def authorship1: RuleNodeMeta[Authorship] = rule { - authorsGroup ~> { (a: NodeMeta[AuthorsGroup]) => FactoryAST.authorship(a) } + (authorsYear | authorsGroup) ~> { (a: NodeMeta[AuthorsGroup]) => FactoryAST.authorship(a) } + } + + def authorsYear: RuleNodeMeta[AuthorsGroup] = rule { + authorsGroup ~ softSpace ~ (',' ~ softSpace).? ~ year ~> { + (aM: NodeMeta[AuthorsGroup], yM: NodeMeta[Year]) => + val a1 = for { a <- aM; y <- yM } + yield a.copy(authors = a.authors.copy(year = y.some)) + a1 + } } def authorsGroup: RuleNodeMeta[AuthorsGroup] = rule { @@ -528,7 +530,7 @@ class Parser(preprocessorResult: Preprocessor.Result, }) ~ (softSpace ~ (',' ~ softSpace).? ~ year).? ~> { (atM: NodeMeta[AuthorsTeam], yM: Option[NodeMeta[Year]]) => val at1M = for { at <- atM; y <- lift(yM) } yield at.copy(year = y) - at1M.changeWarningsRef((atM.node, at1M.node)) + at1M } } @@ -561,7 +563,7 @@ class Parser(preprocessorResult: Preprocessor.Result, author2 ~ softSpace ~ filius ~> { (auM: NodeMeta[Author], filiusM: NodeMeta[AuthorWord]) => val au1M = for { au <- auM; filius <- filiusM } yield au.copy(filius = filius.some) - au1M.changeWarningsRef((auM.node, au1M.node)) + au1M } } @@ -583,11 +585,11 @@ class Parser(preprocessorResult: Preprocessor.Result, def authorWordSep: RuleNodeMeta[AuthorWord] = rule { (ch(dash) ~ authorWordSoft ~> { (awM: NodeMeta[AuthorWord]) => val aw1M = for (aw <- awM) yield aw.copy(separator = AuthorWordSeparator.Dash) - aw1M.changeWarningsRef((awM.node, aw1M.node)) + aw1M }) | (softSpace ~ authorWord ~> { (awM: NodeMeta[AuthorWord]) => - val aw1M = awM.node.copy(separator = AuthorWordSeparator.Space) - aw1M.changeWarningsRef((awM.node, aw1M.node)) + val aw1M = for (aw <- awM) yield { aw.copy(separator = AuthorWordSeparator.Space) } + aw1M }) } @@ -657,7 +659,6 @@ class Parser(preprocessorResult: Preprocessor.Result, (yStartM: NodeMeta[Year], yEnd: CapturePosition) => val yrM = yStartM.map { yStart => yStart.copy(approximate = true, rangeEnd = Some(yEnd)) } yrM.add(warnings = Seq((3, "Years range"))) - .changeWarningsRef((yStartM.node, yrM.node)) } } @@ -670,7 +671,6 @@ class Parser(preprocessorResult: Preprocessor.Result, (yM: NodeMeta[Year]) => val yrM = yM.map { y => y.copy(approximate = true) } yrM.add(warnings = Seq((3, "Year with square brakets"))) - .changeWarningsRef((yM.node, yrM.node)) } } @@ -685,7 +685,6 @@ class Parser(preprocessorResult: Preprocessor.Result, (yM: NodeMeta[Year]) => val y1M = yM.map { y => y.copy(approximate = true) } y1M.add(warnings = Seq((2, "Year with parentheses"))) - .changeWarningsRef((yM.node, y1M.node)) } } @@ -694,7 +693,6 @@ class Parser(preprocessorResult: Preprocessor.Result, (yM: NodeMeta[Year], pos: CapturePosition) => val y1M = yM.map { y => y.copy(alpha = pos.some) } y1M.add(warnings = Seq((2, "Year with latin character"))) - .changeWarningsRef((yM.node, y1M.node)) } } @@ -763,15 +761,7 @@ object Parser { case class NodeMeta[T <: AstNode](node: T, warnings: Vector[Warning] = Vector.empty) extends NodeMetaBase[T] { - val rawWarnings = warnings.map { w => (w.level, w.message) } - - def changeWarningsRef(substitutions: (AstNode, AstNode)*): NodeMeta[T] = { - val substWarnsMap = substitutions.toMap - val ws = warnings.map { w => - substWarnsMap.get(w.node).map { subst => w.copy(node = subst) }.getOrElse(w) - } - this.copy(warnings = ws) - } + val rawWarnings: Vector[(Int, String)] = warnings.map { w => (w.level, w.message) } def add(warnings: Seq[(Int, String)] = Seq.empty): NodeMeta[T] = { if (warnings.isEmpty) this diff --git a/parser/src/test/resources/test_data.txt b/parser/src/test/resources/test_data.txt index 51fc87b..98b4311 100644 --- a/parser/src/test/resources/test_data.txt +++ b/parser/src/test/resources/test_data.txt @@ -1969,16 +1969,16 @@ Anthoscopus Cabanis [1851?] 6b12b541-b58b-5f11-ba66-bb314b53813f|Anthoscopus Cabanis [1851?]|Anthoscopus|Anthoscopus|Cabanis (1851?)|(1851?)|3 Trismegistia monodii Ando, 1973 [1974] -{"name_string_id":"f396d2d0-b14e-537f-ae8f-c383310f813e","parsed":true,"quality":3,"quality_warnings":[[3,"Unparseable tail"]],"parser_version":"test_version","verbatim":"Trismegistia monodii Ando, 1973 [1974]","normalized":"Trismegistia monodii Ando 1973","canonical_name":{"value":"Trismegistia monodii","value_ranked":"Trismegistia monodii"},"hybrid":false,"surrogate":false,"unparsed_tail":" [1974]","virus":false,"bacteria":false,"details":[{"genus":{"value":"Trismegistia"},"specific_epithet":{"value":"monodii","authorship":{"value":"Ando 1973","basionym_authorship":{"authors":["Ando"],"year":{"value":"1973"}}}}}],"positions":[["genus",0,12],["specific_epithet",13,20],["author_word",21,25],["year",27,31]]} -f396d2d0-b14e-537f-ae8f-c383310f813e|Trismegistia monodii Ando, 1973 [1974]|Trismegistia monodii|Trismegistia monodii|Ando 1973|1973|3 +{"name_string_id":"f396d2d0-b14e-537f-ae8f-c383310f813e","parsed":true,"quality":3,"quality_warnings":[[3,"Year with square brakets"]],"parser_version":"test_version","verbatim":"Trismegistia monodii Ando, 1973 [1974]","normalized":"Trismegistia monodii Ando (1974)","canonical_name":{"value":"Trismegistia monodii","value_ranked":"Trismegistia monodii"},"hybrid":false,"surrogate":false,"virus":false,"bacteria":false,"details":[{"genus":{"value":"Trismegistia"},"specific_epithet":{"value":"monodii","authorship":{"value":"Ando (1974)","basionym_authorship":{"authors":["Ando"],"year":{"value":"1974","approximate":true}}}}}],"positions":[["genus",0,12],["specific_epithet",13,20],["author_word",21,25],["approximate_year",33,37]]} +f396d2d0-b14e-537f-ae8f-c383310f813e|Trismegistia monodii Ando, 1973 [1974]|Trismegistia monodii|Trismegistia monodii|Ando (1974)|(1974)|3 Zygaena witti Wiegel [1973] {"name_string_id":"76eef612-f125-54f9-b241-6b3a9be0a6c6","parsed":true,"quality":3,"quality_warnings":[[3,"Year with square brakets"]],"parser_version":"test_version","verbatim":"Zygaena witti Wiegel [1973]","normalized":"Zygaena witti Wiegel (1973)","canonical_name":{"value":"Zygaena witti","value_ranked":"Zygaena witti"},"hybrid":false,"surrogate":false,"virus":false,"bacteria":false,"details":[{"genus":{"value":"Zygaena"},"specific_epithet":{"value":"witti","authorship":{"value":"Wiegel (1973)","basionym_authorship":{"authors":["Wiegel"],"year":{"value":"1973","approximate":true}}}}}],"positions":[["genus",0,7],["specific_epithet",8,13],["author_word",14,20],["approximate_year",22,26]]} 76eef612-f125-54f9-b241-6b3a9be0a6c6|Zygaena witti Wiegel [1973]|Zygaena witti|Zygaena witti|Wiegel (1973)|(1973)|3 Deyeuxia coarctata Kunth, 1815 [1816] -{"name_string_id":"2f479365-40be-5181-b194-8a24fc743f73","parsed":true,"quality":3,"quality_warnings":[[3,"Unparseable tail"]],"parser_version":"test_version","verbatim":"Deyeuxia coarctata Kunth, 1815 [1816]","normalized":"Deyeuxia coarctata Kunth 1815","canonical_name":{"value":"Deyeuxia coarctata","value_ranked":"Deyeuxia coarctata"},"hybrid":false,"surrogate":false,"unparsed_tail":" [1816]","virus":false,"bacteria":false,"details":[{"genus":{"value":"Deyeuxia"},"specific_epithet":{"value":"coarctata","authorship":{"value":"Kunth 1815","basionym_authorship":{"authors":["Kunth"],"year":{"value":"1815"}}}}}],"positions":[["genus",0,8],["specific_epithet",9,18],["author_word",19,24],["year",26,30]]} -2f479365-40be-5181-b194-8a24fc743f73|Deyeuxia coarctata Kunth, 1815 [1816]|Deyeuxia coarctata|Deyeuxia coarctata|Kunth 1815|1815|3 +{"name_string_id":"2f479365-40be-5181-b194-8a24fc743f73","parsed":true,"quality":3,"quality_warnings":[[3,"Year with square brakets"]],"parser_version":"test_version","verbatim":"Deyeuxia coarctata Kunth, 1815 [1816]","normalized":"Deyeuxia coarctata Kunth (1816)","canonical_name":{"value":"Deyeuxia coarctata","value_ranked":"Deyeuxia coarctata"},"hybrid":false,"surrogate":false,"virus":false,"bacteria":false,"details":[{"genus":{"value":"Deyeuxia"},"specific_epithet":{"value":"coarctata","authorship":{"value":"Kunth (1816)","basionym_authorship":{"authors":["Kunth"],"year":{"value":"1816","approximate":true}}}}}],"positions":[["genus",0,8],["specific_epithet",9,18],["author_word",19,24],["approximate_year",32,36]]} +2f479365-40be-5181-b194-8a24fc743f73|Deyeuxia coarctata Kunth, 1815 [1816]|Deyeuxia coarctata|Deyeuxia coarctata|Kunth (1816)|(1816)|3 #> #SECTION: Names with broken conversion between encodings< diff --git a/parser/src/test/scala/org/globalnames/parser/ScientificNameParserSpec.scala b/parser/src/test/scala/org/globalnames/parser/ScientificNameParserSpec.scala index 7081165..a2b18db 100644 --- a/parser/src/test/scala/org/globalnames/parser/ScientificNameParserSpec.scala +++ b/parser/src/test/scala/org/globalnames/parser/ScientificNameParserSpec.scala @@ -73,49 +73,5 @@ class ScientificNameParserSpec extends Specification { val pr = scientificNameParser.fromString(expectedName.verbatim) Set(pr.warnings: _*).size === pr.warnings.size } - - s"contain no orphans in warnings" in { - def hasRefNode(sourceNode: AstNode, targetNode: AstNode): Boolean = targetNode match { - case tn if tn == sourceNode => true - case sn: ScientificName => sn.namesGroup.exists { ng => hasRefNode(sourceNode, ng) } - case ng: NamesGroup => - hasRefNode(sourceNode, ng.name) || - ng.hybridParts.exists { case (hc, name) => - hasRefNode(sourceNode, hc) || name.exists { n => hasRefNode(sourceNode, n) } - } - case n: Name => hasRefNode(sourceNode, n.uninomial) || - n.subgenus.exists { sg => hasRefNode(sourceNode, sg) } || - n.species.exists { sp => hasRefNode(sourceNode, sp) } || - n.infraspecies.exists { is => hasRefNode(sourceNode, is) } || - n.comparison.exists { cmp => hasRefNode(sourceNode, cmp) } || - n.approximation.exists { aprx => hasRefNode(sourceNode, aprx) } - case sg: SubGenus => hasRefNode(sourceNode, sg.word) - case u: Uninomial => u.authorship.exists { auth => hasRefNode(sourceNode, auth)} || - u.rank.exists { rk => hasRefNode(sourceNode, rk) } || - u.parent.exists { par => hasRefNode(sourceNode, par) } - case ig: InfraspeciesGroup => ig.group.exists { is => hasRefNode(sourceNode, is) } - case is: Infraspecies => hasRefNode(sourceNode, is.word) || - is.rank.exists { rk => hasRefNode(sourceNode, rk) } || - is.authorship.exists { a => hasRefNode(sourceNode, a) } - case sp: Species => hasRefNode(sourceNode, sp.word) || - sp.authorship.exists { a => hasRefNode(sourceNode, a) } - case auth: Authorship => hasRefNode(sourceNode, auth.authors) || - auth.combination.exists { cmb => hasRefNode(sourceNode, cmb) } - case ag: AuthorsGroup => hasRefNode(sourceNode, ag.authors) || - ag.authorsEx.exists { auEx => hasRefNode(sourceNode, auEx) } || - ag.authorsEmend.exists { auEm => hasRefNode(sourceNode, auEm) } - case at: AuthorsTeam => at.authors.exists { a => hasRefNode(sourceNode, a) } || - at.year.exists { y => hasRefNode(sourceNode, y) } - case au: Author => au.words.exists { aw => hasRefNode(sourceNode, aw) } || - au.filius.exists { f => hasRefNode(sourceNode, f) } - case _ => false - } - - val pr = scientificNameParser.fromString(expectedName.verbatim) - - pr.warnings.filterNot { - warning => hasRefNode(warning.node, pr.scientificName) - } should beEmpty - } } }