From f8dca63629989b9dd51c7b4038f26c3f345ceea3 Mon Sep 17 00:00:00 2001 From: andrewresearch Date: Fri, 28 Jul 2017 13:53:46 +1000 Subject: [PATCH] athanor-server-19 Constituent tree needs to reference nodes rather than words --- .../utscic/athanorserver/athanor/Athanor.scala | 2 +- .../corenlp/ConstituentTreeParser.scala | 5 ++++- .../athanorserver/corenlp/SentenceParser.scala | 1 + .../athanorserver/corenlp/TextParser.scala | 2 +- .../au/edu/utscic/athanorserver/TestData.scala | 16 +++++++++------- .../athanorserver/athanor/AthanorSpec.scala | 13 +++++++------ .../athanorserver/corenlp/CoreNlpSpec.scala | 11 +++++++++-- 7 files changed, 32 insertions(+), 18 deletions(-) diff --git a/src/main/scala/au/edu/utscic/athanorserver/athanor/Athanor.scala b/src/main/scala/au/edu/utscic/athanorserver/athanor/Athanor.scala index 56c0ae6..0c22ad6 100644 --- a/src/main/scala/au/edu/utscic/athanorserver/athanor/Athanor.scala +++ b/src/main/scala/au/edu/utscic/athanorserver/athanor/Athanor.scala @@ -44,7 +44,7 @@ object Athanor { def parsedSentenceToJsonString(parsedSent:ParsedSentence):String = { implicit val formats = Serialization.formats(NoTypeHints) val l = write(parsedSent._1) - val c = write(parsedSent._2) + val c = write(parsedSent._2).replaceAll("""(\"(?=[0-9]))|((?<=[0-9])\")""","") //remove quotes around Ints for json val d = write(parsedSent._3) s"[$l,$c,$d]" } diff --git a/src/main/scala/au/edu/utscic/athanorserver/corenlp/ConstituentTreeParser.scala b/src/main/scala/au/edu/utscic/athanorserver/corenlp/ConstituentTreeParser.scala index 88d5b06..74870c5 100644 --- a/src/main/scala/au/edu/utscic/athanorserver/corenlp/ConstituentTreeParser.scala +++ b/src/main/scala/au/edu/utscic/athanorserver/corenlp/ConstituentTreeParser.scala @@ -23,7 +23,10 @@ object ConstituentTreeParser { def process(tree:Tree):Any = { import scala.collection.JavaConverters._ if(tree.numChildren()==0) { - tree.yieldWords().asScala.map(_.value()).mkString(",") + //val words = tree.yieldWords().asScala.map(_.value()).mkString(",") + val num = tree.labels.asScala.flatMap(_.toString.split("-")).last + //println(s"WORD: $words NUM: $num") + num } else { tree.label().toString +: diff --git a/src/main/scala/au/edu/utscic/athanorserver/corenlp/SentenceParser.scala b/src/main/scala/au/edu/utscic/athanorserver/corenlp/SentenceParser.scala index 2bc7114..f0ccef0 100644 --- a/src/main/scala/au/edu/utscic/athanorserver/corenlp/SentenceParser.scala +++ b/src/main/scala/au/edu/utscic/athanorserver/corenlp/SentenceParser.scala @@ -53,6 +53,7 @@ object SentenceParser { dependencies match { case None => List() case Some(deps) => { + Dependency("root",0,deps.getFirstRoot.index()) +: deps.edgeListSorted().asScala.toList.map { d => Dependency( d.getRelation.toString, diff --git a/src/main/scala/au/edu/utscic/athanorserver/corenlp/TextParser.scala b/src/main/scala/au/edu/utscic/athanorserver/corenlp/TextParser.scala index dd08c4d..0044309 100644 --- a/src/main/scala/au/edu/utscic/athanorserver/corenlp/TextParser.scala +++ b/src/main/scala/au/edu/utscic/athanorserver/corenlp/TextParser.scala @@ -22,7 +22,7 @@ object TextParser { val pipeline:StanfordCoreNLP = { val props = new Properties - props.setProperty("annotators", "tokenize, ssplit, pos, lemma, parse") //parse ner dcoref + props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner, parse") //parse ner dcoref new StanfordCoreNLP(props) } diff --git a/src/test/scala/au/edu/utscic/athanorserver/TestData.scala b/src/test/scala/au/edu/utscic/athanorserver/TestData.scala index 1d2dfa1..a3aa030 100644 --- a/src/test/scala/au/edu/utscic/athanorserver/TestData.scala +++ b/src/test/scala/au/edu/utscic/athanorserver/TestData.scala @@ -20,17 +20,19 @@ object TestData { val textAnodes1:LexicalNodes = SortedMap(0 -> Node(0,"ROOT",None,None,None,None,None,None), 1 -> Node(1,"DT",Some("The"),Some("the"),Some("O"),None,Some(0),Some(3)), 2 -> Node(2,"NN",Some("technology"),Some("technology"),Some("O"),None,Some(4),Some(14)), 3 -> Node(3,"MD",Some("should"),Some("should"),Some("O"),None,Some(15),Some(21)), 4 -> Node(4,"VB",Some("be"),Some("be"),Some("O"),None,Some(22),Some(24)), 5 -> Node(5,"JJ",Some("able"),Some("able"),Some("O"),None,Some(25),Some(29)), 6 -> Node(6,"TO",Some("to"),Some("to"),Some("O"),None,Some(30),Some(32)), 7 -> Node(7,"VB",Some("parse"),Some("parse"),Some("O"),None,Some(33),Some(38)), 8 -> Node(8,"DT",Some("this"),Some("this"),Some("O"),None,Some(39),Some(43)), 9 -> Node(9,"NN",Some("sentence"),Some("sentence"),Some("O"),None,Some(44),Some(52)), 10 -> Node(10,".",Some("."),Some("."),Some("O"),None,Some(52),Some(53))) - val textAconstTree1:ConstituentTree = List("ROOT", List("S", List("NP", List("DT", "The"), List("NN", "technology")), List("VP", List("MD", "should"), List("VP", List("VB", "be"), List("ADJP", List("JJ", "able"), List("S", List("VP", List("TO", "to"), List("VP", List("VB", "parse"), List("NP", List("DT", "this"), List("NN", "sentence")))))))), List(".", "."))) + val textAconstTree1:ConstituentTree = List("ROOT", List("S", List("NP", List("DT", "1"), List("NN", "2")), List("VP", List("MD", "3"), List("VP", List("VB", "4"), List("ADJP", List("JJ", "5"), List("S", List("VP", List("TO", "6"), List("VP", List("VB", "7"), List("NP", List("DT", "8"), List("NN", "9")))))))), List(".", "10"))) + //val textAconstTree1:ConstituentTree = List("ROOT", List("S", List("NP", List("DT", "The"), List("NN", "technology")), List("VP", List("MD", "should"), List("VP", List("VB", "be"), List("ADJP", List("JJ", "able"), List("S", List("VP", List("TO", "to"), List("VP", List("VB", "parse"), List("NP", List("DT", "this"), List("NN", "sentence")))))))), List(".", "."))) + val textAconstTreeJson1:String = """["ROOT",["S",["NP",["DT","1"],["NN","2"]],["VP",["MD","3"],["VP",["VB","4"],["ADJP",["JJ","5"],["S",["VP",["TO","6"],["VP",["VB","7"],["NP",["DT","8"],["NN","9"]]]]]]]],[".","10"]]]""" + //val textAconstTreeJson1:String = """["ROOT",["S",["NP",["DT","The"],["NN","technology"]],["VP",["MD","should"],["VP",["VB","be"],["ADJP",["JJ","able"],["S",["VP",["TO","to"],["VP",["VB","parse"],["NP",["DT","this"],["NN","sentence"]]]]]]]],[".","."]]]""" - val textAconstTreeJson1:String = """["ROOT",["S",["NP",["DT","The"],["NN","technology"]],["VP",["MD","should"],["VP",["VB","be"],["ADJP",["JJ","able"],["S",["VP",["TO","to"],["VP",["VB","parse"],["NP",["DT","this"],["NN","sentence"]]]]]]]],[".","."]]]""" - - val textAdepend1:Dependencies = List(Dependency("det",2,1), Dependency("nsubj",5,2), Dependency("nsubj:xsubj",7,2), Dependency("aux",5,3), Dependency("cop",5,4), Dependency("mark",7,6), Dependency("xcomp",5,7), Dependency("det",9,8), Dependency("dobj",7,9), Dependency("punct",5,10)) + val textAdepend1:Dependencies = List(Dependency("root",0,5),Dependency("det",2,1), Dependency("nsubj",5,2), Dependency("nsubj:xsubj",7,2), Dependency("aux",5,3), Dependency("cop",5,4), Dependency("mark",7,6), Dependency("xcomp",5,7), Dependency("det",9,8), Dependency("dobj",7,9), Dependency("punct",5,10)) val textAparsed1:ParsedSentence = (textAnodes1,textAconstTree1,textAdepend1) - val textAparsed2:ParsedSentence = (SortedMap(0 -> Node(0,"ROOT",None,None,None,None,None,None), 1 -> Node(1,"PRP",Some("It"),Some("it"),Some("O"),None,Some(54),Some(56)), 2 -> Node(2,"MD",Some("should"),Some("should"),Some("O"),None,Some(57),Some(63)), 3 -> Node(3,"RB",Some("also"),Some("also"),Some("O"),None,Some(64),Some(68)), 4 -> Node(4,"VB",Some("work"),Some("work"),Some("O"),None,Some(69),Some(73)), 5 -> Node(5,"IN",Some("with"),Some("with"),Some("O"),None,Some(74),Some(78)), 6 -> Node(6,"DT",Some("this"),Some("this"),Some("O"),None,Some(79),Some(83)), 7 -> Node(7,"CD",Some("one"),Some("one"),Some("NUMBER"),None,Some(84),Some(87)), 8 -> Node(8,".",Some("."),Some("."),Some("O"),None,Some(87),Some(88))),List("ROOT", List("S", List("NP", List("PRP", "It")), List("VP", List("MD", "should"), List("ADVP", List("RB", "also")), List("VP", List("VB", "work"), List("PP", List("IN", "with"), List("NP", List("DT", "this"), List("CD", "one"))))), List(".", "."))),List(Dependency("nsubj",4,1), Dependency("aux",4,2), Dependency("advmod",4,3), Dependency("case",7,5), Dependency("det",7,6), Dependency("nmod:with",4,7), Dependency("punct",4,8))) + val textAparsed2:ParsedSentence = (SortedMap(0 -> Node(0,"ROOT",None,None,None,None,None,None), 1 -> Node(1,"PRP",Some("It"),Some("it"),Some("O"),None,Some(54),Some(56)), 2 -> Node(2,"MD",Some("should"),Some("should"),Some("O"),None,Some(57),Some(63)), 3 -> Node(3,"RB",Some("also"),Some("also"),Some("O"),None,Some(64),Some(68)), 4 -> Node(4,"VB",Some("work"),Some("work"),Some("O"),None,Some(69),Some(73)), 5 -> Node(5,"IN",Some("with"),Some("with"),Some("O"),None,Some(74),Some(78)), 6 -> Node(6,"DT",Some("this"),Some("this"),Some("O"),None,Some(79),Some(83)), 7 -> Node(7,"CD",Some("one"),Some("one"),Some("NUMBER"),None,Some(84),Some(87)), 8 -> Node(8,".",Some("."),Some("."),Some("O"),None,Some(87),Some(88))),List("ROOT", List("S", List("NP", List("PRP", "1")), List("VP", List("MD", "2"), List("ADVP", List("RB", "3")), List("VP", List("VB", "4"), List("PP", List("IN", "5"), List("NP", List("DT", "6"), List("CD", "7"))))), List(".", "8"))),List(Dependency("root",0,4),Dependency("nsubj",4,1), Dependency("aux",4,2), Dependency("advmod",4,3), Dependency("case",7,5), Dependency("det",7,6), Dependency("nmod:with",4,7), Dependency("punct",4,8))) - val jsonString:String = """[{"0":{"POS":"ROOT","id":0},"1":{"id":1,"surface":"Oddly","left":0,"lemma":"oddly","right":5,"POS":"RB","NER":"O","Speaker":"PER0"},"2":{"id":2,"surface":"enough","left":6,"lemma":"enough","right":12,"POS":"RB","NER":"O","Speaker":"PER0"},"3":{"id":3,"surface":"I","left":13,"lemma":"I","right":14,"POS":"PRP","NER":"O","Speaker":"PER0"},"4":{"id":4,"surface":"found","left":15,"lemma":"find","right":20,"POS":"VBD","NER":"O","Speaker":"PER0"},"5":{"id":5,"surface":"it","left":21,"lemma":"it","right":23,"POS":"PRP","NER":"O","Speaker":"PER0"},"6":{"id":6,"surface":"quite","left":24,"lemma":"quite","right":29,"POS":"RB","NER":"O","Speaker":"PER0"},"7":{"id":7,"surface":"empowering","left":30,"lemma":"empower","right":40,"POS":"VBG","NER":"O","Speaker":"PER0"},"8":{"id":8,"surface":"to","left":41,"lemma":"to","right":43,"POS":"TO","NER":"O","Speaker":"PER0"},"9":{"id":9,"surface":"hear","left":44,"lemma":"hear","right":48,"POS":"VB","NER":"O","Speaker":"PER0"},"10":{"id":10,"surface":"Natalia","left":49,"lemma":"Natalia","right":56,"POS":"NNP","NER":"PERSON","Speaker":"PER0"},"11":{"id":11,"surface":"state","left":57,"lemma":"state","right":62,"POS":"NN","NER":"O","Speaker":"PER0"},"12":{"id":12,"surface":"the","left":63,"lemma":"the","right":66,"POS":"DT","NER":"O","Speaker":"PER0"},"13":{"id":13,"surface":"following","left":67,"lemma":"follow","right":76,"POS":"VBG","NER":"O","Speaker":"PER0"},"14":{"id":14,"surface":"``","left":77,"lemma":"``","right":79,"POS":"``","NER":"O"},"15":{"id":15,"surface":"Real","left":79,"lemma":"real","right":83,"POS":"JJ","NER":"O","Speaker":"PER1"},"16":{"id":16,"surface":"life","left":84,"lemma":"life","right":88,"POS":"NN","NER":"O","Speaker":"PER1"},"17":{"id":17,"surface":"dilemmas","left":89,"lemma":"dilemma","right":97,"POS":"NNS","NER":"O","Speaker":"PER1"},"18":{"id":18,"surface":"often","left":98,"lemma":"often","right":103,"POS":"RB","NER":"O","Speaker":"PER1"},"19":{"id":19,"surface":"present","left":104,"lemma":"present","right":111,"POS":"JJ","NER":"DATE","Speaker":"PER1"},"20":{"id":20,"surface":"choices","left":112,"lemma":"choice","right":119,"POS":"NNS","NER":"O","Speaker":"PER1"},"21":{"id":21,"surface":"between","left":120,"lemma":"between","right":127,"POS":"IN","NER":"O","Speaker":"PER1"},"22":{"id":22,"surface":"equally","left":128,"lemma":"equally","right":135,"POS":"RB","NER":"O","Speaker":"PER1"},"23":{"id":23,"surface":"unfavorable","left":136,"lemma":"unfavorable","right":147,"POS":"JJ","NER":"O","Speaker":"PER1"},"24":{"id":24,"surface":"or","left":148,"lemma":"or","right":150,"POS":"CC","NER":"O","Speaker":"PER1"},"25":{"id":25,"surface":"disagreeable","left":151,"lemma":"disagreeable","right":163,"POS":"JJ","NER":"O","Speaker":"PER1"},"26":{"id":26,"surface":"alternatives","left":164,"lemma":"alternative","right":176,"POS":"NNS","NER":"O","Speaker":"PER1"},"27":{"id":27,"surface":".","left":176,"lemma":".","right":177,"POS":".","NER":"O","Speaker":"PER1"}},["ROOT",["S",["ADVP",["RB",1],["RB",2]],["NP",["PRP",3]],["VP",["VBD",4],["NP",["PRP",5]],["ADVP",["RB",6],["VP",["VBG",7],["S",["VP",["TO",8],["VP",["VB",9],["S",["NP",["NNP",10]],["NP",["NP",["NN",11],["DT",12]],["PP",["VBG",13],["NP",["`",14],["NP",["JJ",15],["NN",16],["NNS",17]],["ADVP",["RB",18],["NP",["JJ",19],["NNS",20]]],["PP",["IN",21],["NP",["NP",["RB",22],["JJ",23]],["CC",24],["NP",["JJ",25],["NNS",26]]]]]]]]]]]]]],[".",27]]],[{"name":"root","governor":0,"dependent":4},{"name":"advmod","governor":2,"dependent":1},{"name":"advmod","governor":4,"dependent":2},{"name":"nsubj","governor":4,"dependent":3},{"name":"dobj","governor":4,"dependent":5},{"name":"advmod","governor":4,"dependent":6},{"name":"dep","governor":6,"dependent":7},{"name":"mark","governor":9,"dependent":8},{"name":"xcomp","governor":7,"dependent":9},{"name":"nsubj","governor":11,"dependent":10},{"name":"xcomp","governor":9,"dependent":11},{"name":"dep","governor":11,"dependent":12},{"name":"case","governor":17,"dependent":13},{"name":"punct","governor":17,"dependent":14},{"name":"amod","governor":17,"dependent":15},{"name":"compound","governor":17,"dependent":16},{"name":"nmod","governor":11,"dependent":17},{"name":"advmod","governor":17,"dependent":18},{"name":"amod","governor":20,"dependent":19},{"name":"nmod:npmod","governor":18,"dependent":20},{"name":"case","governor":23,"dependent":21},{"name":"advmod","governor":23,"dependent":22},{"name":"nmod","governor":17,"dependent":23},{"name":"cc","governor":23,"dependent":24},{"name":"amod","governor":26,"dependent":25},{"name":"conj","governor":23,"dependent":26},{"name":"punct","governor":4,"dependent":27}]]""" + val athSentence:String = "Oddly enough I found it quite empowering to hear Natalia state the following`` Real life dilemmas often present choices between equally unfavorable or disagreeable alternatives." + val athJsonString:String = """[{"0":{"POS":"ROOT","id":0},"1":{"id":1,"surface":"Oddly","left":0,"lemma":"oddly","right":5,"POS":"RB","NER":"O","Speaker":"PER0"},"2":{"id":2,"surface":"enough","left":6,"lemma":"enough","right":12,"POS":"RB","NER":"O","Speaker":"PER0"},"3":{"id":3,"surface":"I","left":13,"lemma":"I","right":14,"POS":"PRP","NER":"O","Speaker":"PER0"},"4":{"id":4,"surface":"found","left":15,"lemma":"find","right":20,"POS":"VBD","NER":"O","Speaker":"PER0"},"5":{"id":5,"surface":"it","left":21,"lemma":"it","right":23,"POS":"PRP","NER":"O","Speaker":"PER0"},"6":{"id":6,"surface":"quite","left":24,"lemma":"quite","right":29,"POS":"RB","NER":"O","Speaker":"PER0"},"7":{"id":7,"surface":"empowering","left":30,"lemma":"empower","right":40,"POS":"VBG","NER":"O","Speaker":"PER0"},"8":{"id":8,"surface":"to","left":41,"lemma":"to","right":43,"POS":"TO","NER":"O","Speaker":"PER0"},"9":{"id":9,"surface":"hear","left":44,"lemma":"hear","right":48,"POS":"VB","NER":"O","Speaker":"PER0"},"10":{"id":10,"surface":"Natalia","left":49,"lemma":"Natalia","right":56,"POS":"NNP","NER":"PERSON","Speaker":"PER0"},"11":{"id":11,"surface":"state","left":57,"lemma":"state","right":62,"POS":"NN","NER":"O","Speaker":"PER0"},"12":{"id":12,"surface":"the","left":63,"lemma":"the","right":66,"POS":"DT","NER":"O","Speaker":"PER0"},"13":{"id":13,"surface":"following","left":67,"lemma":"follow","right":76,"POS":"VBG","NER":"O","Speaker":"PER0"},"14":{"id":14,"surface":"``","left":77,"lemma":"``","right":79,"POS":"``","NER":"O"},"15":{"id":15,"surface":"Real","left":79,"lemma":"real","right":83,"POS":"JJ","NER":"O","Speaker":"PER1"},"16":{"id":16,"surface":"life","left":84,"lemma":"life","right":88,"POS":"NN","NER":"O","Speaker":"PER1"},"17":{"id":17,"surface":"dilemmas","left":89,"lemma":"dilemma","right":97,"POS":"NNS","NER":"O","Speaker":"PER1"},"18":{"id":18,"surface":"often","left":98,"lemma":"often","right":103,"POS":"RB","NER":"O","Speaker":"PER1"},"19":{"id":19,"surface":"present","left":104,"lemma":"present","right":111,"POS":"JJ","NER":"DATE","Speaker":"PER1"},"20":{"id":20,"surface":"choices","left":112,"lemma":"choice","right":119,"POS":"NNS","NER":"O","Speaker":"PER1"},"21":{"id":21,"surface":"between","left":120,"lemma":"between","right":127,"POS":"IN","NER":"O","Speaker":"PER1"},"22":{"id":22,"surface":"equally","left":128,"lemma":"equally","right":135,"POS":"RB","NER":"O","Speaker":"PER1"},"23":{"id":23,"surface":"unfavorable","left":136,"lemma":"unfavorable","right":147,"POS":"JJ","NER":"O","Speaker":"PER1"},"24":{"id":24,"surface":"or","left":148,"lemma":"or","right":150,"POS":"CC","NER":"O","Speaker":"PER1"},"25":{"id":25,"surface":"disagreeable","left":151,"lemma":"disagreeable","right":163,"POS":"JJ","NER":"O","Speaker":"PER1"},"26":{"id":26,"surface":"alternatives","left":164,"lemma":"alternative","right":176,"POS":"NNS","NER":"O","Speaker":"PER1"},"27":{"id":27,"surface":".","left":176,"lemma":".","right":177,"POS":".","NER":"O","Speaker":"PER1"}},["ROOT",["S",["ADVP",["RB",1],["RB",2]],["NP",["PRP",3]],["VP",["VBD",4],["NP",["PRP",5]],["ADVP",["RB",6],["VP",["VBG",7],["S",["VP",["TO",8],["VP",["VB",9],["S",["NP",["NNP",10]],["NP",["NP",["NN",11],["DT",12]],["PP",["VBG",13],["NP",["`",14],["NP",["JJ",15],["NN",16],["NNS",17]],["ADVP",["RB",18],["NP",["JJ",19],["NNS",20]]],["PP",["IN",21],["NP",["NP",["RB",22],["JJ",23]],["CC",24],["NP",["JJ",25],["NNS",26]]]]]]]]]]]]]],[".",27]]],[{"name":"root","governor":0,"dependent":4},{"name":"advmod","governor":2,"dependent":1},{"name":"advmod","governor":4,"dependent":2},{"name":"nsubj","governor":4,"dependent":3},{"name":"dobj","governor":4,"dependent":5},{"name":"advmod","governor":4,"dependent":6},{"name":"dep","governor":6,"dependent":7},{"name":"mark","governor":9,"dependent":8},{"name":"xcomp","governor":7,"dependent":9},{"name":"nsubj","governor":11,"dependent":10},{"name":"xcomp","governor":9,"dependent":11},{"name":"dep","governor":11,"dependent":12},{"name":"case","governor":17,"dependent":13},{"name":"punct","governor":17,"dependent":14},{"name":"amod","governor":17,"dependent":15},{"name":"compound","governor":17,"dependent":16},{"name":"nmod","governor":11,"dependent":17},{"name":"advmod","governor":17,"dependent":18},{"name":"amod","governor":20,"dependent":19},{"name":"nmod:npmod","governor":18,"dependent":20},{"name":"case","governor":23,"dependent":21},{"name":"advmod","governor":23,"dependent":22},{"name":"nmod","governor":17,"dependent":23},{"name":"cc","governor":23,"dependent":24},{"name":"amod","governor":26,"dependent":25},{"name":"conj","governor":23,"dependent":26},{"name":"punct","governor":4,"dependent":27}]]""" - val parsedSentence:ParsedSentence = ( SortedMap( 0 -> Node(0,"ROOT",None,None,None,None,None,None), 1 -> Node(1,"RB",Some("Oddly"),Some("oddly"),Some("O"),Some("PER0"),Some(0),Some(5)), 2 -> Node(2,"RB",Some("enough"),Some("enough"),Some("O"),Some("PER0"),Some(6),Some(12)), 3 -> Node(3,"PRP",Some("I"),Some("I"),Some("O"),Some("PER0"),Some(13),Some(14)), 4 -> Node(4,"VBD",Some("found"),Some("find"),Some("O"),Some("PER0"),Some(15),Some(20)), 5 -> Node(5,"PRP",Some("it"),Some("it"),Some("O"),Some("PER0"),Some(21),Some(23)), 6 -> Node(6,"RB",Some("quite"),Some("quite"),Some("O"),Some("PER0"),Some(24),Some(29)), 7 -> Node(7,"VBG",Some("empowering"),Some("empower"),Some("O"),Some("PER0"),Some(30),Some(40)), 8 -> Node(8,"TO",Some("to"),Some("to"),Some("O"),Some("PER0"),Some(41),Some(43)), 9 -> Node(9,"VB",Some("hear"),Some("hear"),Some("O"),Some("PER0"),Some(44),Some(48)), 10 -> Node(10,"NNP",Some("Natalia"),Some("Natalia"),Some("PERSON"),Some("PER0"),Some(49),Some(56)), 11 -> Node(11,"NN",Some("state"),Some("state"),Some("O"),Some("PER0"),Some(57),Some(62)), 12 -> Node(12,"DT",Some("the"),Some("the"),Some("O"),Some("PER0"),Some(63),Some(66)), 13 -> Node(13,"VBG",Some("following"),Some("follow"),Some("O"),Some("PER0"),Some(67),Some(76)), 14 -> Node(14,"``",Some("``"),Some("``"),Some("O"),None,Some(77),Some(79)), 15 -> Node(15,"JJ",Some("Real"),Some("real"),Some("O"),Some("PER1"),Some(79),Some(83)), 16 -> Node(16,"NN",Some("life"),Some("life"),Some("O"),Some("PER1"),Some(84),Some(88)), 17 -> Node(17,"NNS",Some("dilemmas"),Some("dilemma"),Some("O"),Some("PER1"),Some(89),Some(97)), 18 -> Node(18,"RB",Some("often"),Some("often"),Some("O"),Some("PER1"),Some(98),Some(103)), 19 -> Node(19,"JJ",Some("present"),Some("present"),Some("DATE"),Some("PER1"),Some(104),Some(111)), 20 -> Node(20,"NNS",Some("choices"),Some("choice"),Some("O"),Some("PER1"),Some(112),Some(119)), 21 -> Node(21,"IN",Some("between"),Some("between"),Some("O"),Some("PER1"),Some(120),Some(127)), 22 -> Node(22,"RB",Some("equally"),Some("equally"),Some("O"),Some("PER1"),Some(128),Some(135)), 23 -> Node(23,"JJ",Some("unfavorable"),Some("unfavorable"),Some("O"),Some("PER1"),Some(136),Some(147)), 24 -> Node(24,"CC",Some("or"),Some("or"),Some("O"),Some("PER1"),Some(148),Some(150)), 25 -> Node(25,"JJ",Some("disagreeable"),Some("disagreeable"),Some("O"),Some("PER1"),Some(151),Some(163)), 26 -> Node(26,"NNS",Some("alternatives"),Some("alternative"),Some("O"),Some("PER1"),Some(164),Some(176)), 27 -> Node(27,".",Some("."),Some("."),Some("O"),Some("PER1"),Some(176),Some(177))), List("ROOT", List("S", List("ADVP", List("RB",1), List("RB",2)), List("NP", List("PRP",3)), List("VP", List("VBD",4), List("NP", List("PRP",5)), List("ADVP", List("RB",6), List("VP", List("VBG",7), List("S", List("VP", List("TO",8), List("VP", List("VB",9), List("S", List("NP", List("NNP", 10)), List("NP", List("NP", List("NN", 11), List("DT", 12)), List("PP", List("VBG", 13), List("NP", List("`", 14), List("NP", List("JJ", 15), List("NN", 16), List("NNS", 17)), List("ADVP", List("RB", 18), List("NP", List("JJ", 19), List("NNS", 20))), List("PP", List("IN", 21), List("NP", List("NP", List("RB", 22), List("JJ", 23)), List("CC", 24), List("NP", List("JJ", 25), List("NNS", 26)))))))))))))), List(".", 27))), List(Dependency("root",0,4), Dependency("advmod",2,1), Dependency("advmod",4,2), Dependency("nsubj",4,3), Dependency("dobj",4,5), Dependency("advmod",4,6), Dependency("dep",6,7), Dependency("mark",9,8), Dependency("xcomp",7,9), Dependency("nsubj",11,10), Dependency("xcomp",9,11), Dependency("dep",11,12), Dependency("case",17,13), Dependency("punct",17,14), Dependency("amod",17,15), Dependency("compound",17,16), Dependency("nmod",11,17), Dependency("advmod",17,18), Dependency("amod",20,19), Dependency("nmod:npmod",18,20), Dependency("case",23,21), Dependency("advmod",23,22), Dependency("nmod",17,23), Dependency("cc",23,24), Dependency("amod",26,25), Dependency("conj",23,26), Dependency("punct",4,27))) + val athParsedSentence:ParsedSentence = ( SortedMap( 0 -> Node(0,"ROOT",None,None,None,None,None,None), 1 -> Node(1,"RB",Some("Oddly"),Some("oddly"),Some("O"),Some("PER0"),Some(0),Some(5)), 2 -> Node(2,"RB",Some("enough"),Some("enough"),Some("O"),Some("PER0"),Some(6),Some(12)), 3 -> Node(3,"PRP",Some("I"),Some("I"),Some("O"),Some("PER0"),Some(13),Some(14)), 4 -> Node(4,"VBD",Some("found"),Some("find"),Some("O"),Some("PER0"),Some(15),Some(20)), 5 -> Node(5,"PRP",Some("it"),Some("it"),Some("O"),Some("PER0"),Some(21),Some(23)), 6 -> Node(6,"RB",Some("quite"),Some("quite"),Some("O"),Some("PER0"),Some(24),Some(29)), 7 -> Node(7,"VBG",Some("empowering"),Some("empower"),Some("O"),Some("PER0"),Some(30),Some(40)), 8 -> Node(8,"TO",Some("to"),Some("to"),Some("O"),Some("PER0"),Some(41),Some(43)), 9 -> Node(9,"VB",Some("hear"),Some("hear"),Some("O"),Some("PER0"),Some(44),Some(48)), 10 -> Node(10,"NNP",Some("Natalia"),Some("Natalia"),Some("PERSON"),Some("PER0"),Some(49),Some(56)), 11 -> Node(11,"NN",Some("state"),Some("state"),Some("O"),Some("PER0"),Some(57),Some(62)), 12 -> Node(12,"DT",Some("the"),Some("the"),Some("O"),Some("PER0"),Some(63),Some(66)), 13 -> Node(13,"VBG",Some("following"),Some("follow"),Some("O"),Some("PER0"),Some(67),Some(76)), 14 -> Node(14,"``",Some("``"),Some("``"),Some("O"),None,Some(77),Some(79)), 15 -> Node(15,"JJ",Some("Real"),Some("real"),Some("O"),Some("PER1"),Some(79),Some(83)), 16 -> Node(16,"NN",Some("life"),Some("life"),Some("O"),Some("PER1"),Some(84),Some(88)), 17 -> Node(17,"NNS",Some("dilemmas"),Some("dilemma"),Some("O"),Some("PER1"),Some(89),Some(97)), 18 -> Node(18,"RB",Some("often"),Some("often"),Some("O"),Some("PER1"),Some(98),Some(103)), 19 -> Node(19,"JJ",Some("present"),Some("present"),Some("DATE"),Some("PER1"),Some(104),Some(111)), 20 -> Node(20,"NNS",Some("choices"),Some("choice"),Some("O"),Some("PER1"),Some(112),Some(119)), 21 -> Node(21,"IN",Some("between"),Some("between"),Some("O"),Some("PER1"),Some(120),Some(127)), 22 -> Node(22,"RB",Some("equally"),Some("equally"),Some("O"),Some("PER1"),Some(128),Some(135)), 23 -> Node(23,"JJ",Some("unfavorable"),Some("unfavorable"),Some("O"),Some("PER1"),Some(136),Some(147)), 24 -> Node(24,"CC",Some("or"),Some("or"),Some("O"),Some("PER1"),Some(148),Some(150)), 25 -> Node(25,"JJ",Some("disagreeable"),Some("disagreeable"),Some("O"),Some("PER1"),Some(151),Some(163)), 26 -> Node(26,"NNS",Some("alternatives"),Some("alternative"),Some("O"),Some("PER1"),Some(164),Some(176)), 27 -> Node(27,".",Some("."),Some("."),Some("O"),Some("PER1"),Some(176),Some(177))), List("ROOT", List("S", List("ADVP", List("RB",1), List("RB",2)), List("NP", List("PRP",3)), List("VP", List("VBD",4), List("NP", List("PRP",5)), List("ADVP", List("RB",6), List("VP", List("VBG",7), List("S", List("VP", List("TO",8), List("VP", List("VB",9), List("S", List("NP", List("NNP", 10)), List("NP", List("NP", List("NN", 11), List("DT", 12)), List("PP", List("VBG", 13), List("NP", List("`", 14), List("NP", List("JJ", 15), List("NN", 16), List("NNS", 17)), List("ADVP", List("RB", 18), List("NP", List("JJ", 19), List("NNS", 20))), List("PP", List("IN", 21), List("NP", List("NP", List("RB", 22), List("JJ", 23)), List("CC", 24), List("NP", List("JJ", 25), List("NNS", 26)))))))))))))), List(".", 27))), List(Dependency("root",0,4), Dependency("advmod",2,1), Dependency("advmod",4,2), Dependency("nsubj",4,3), Dependency("dobj",4,5), Dependency("advmod",4,6), Dependency("dep",6,7), Dependency("mark",9,8), Dependency("xcomp",7,9), Dependency("nsubj",11,10), Dependency("xcomp",9,11), Dependency("dep",11,12), Dependency("case",17,13), Dependency("punct",17,14), Dependency("amod",17,15), Dependency("compound",17,16), Dependency("nmod",11,17), Dependency("advmod",17,18), Dependency("amod",20,19), Dependency("nmod:npmod",18,20), Dependency("case",23,21), Dependency("advmod",23,22), Dependency("nmod",17,23), Dependency("cc",23,24), Dependency("amod",26,25), Dependency("conj",23,26), Dependency("punct",4,27))) } diff --git a/src/test/scala/au/edu/utscic/athanorserver/athanor/AthanorSpec.scala b/src/test/scala/au/edu/utscic/athanorserver/athanor/AthanorSpec.scala index 6cbbbc4..0072fd2 100644 --- a/src/test/scala/au/edu/utscic/athanorserver/athanor/AthanorSpec.scala +++ b/src/test/scala/au/edu/utscic/athanorserver/athanor/AthanorSpec.scala @@ -23,16 +23,16 @@ class AthanorSpec extends UnitSpec { // } it should "parseJsonSentence" in { - val ps = Athanor.parseJsonSentence(TestData.jsonString) - assert(ps==TestData.parsedSentence) + val ps = Athanor.parseJsonSentence(TestData.athJsonString) + assert(ps==TestData.athParsedSentence) } it should "parsedSentenceToJsonString" in { import org.json4s._ import org.json4s.jackson.JsonMethods._ //Get strings - val expected = TestData.jsonString - val actual = Athanor.parsedSentenceToJsonString(TestData.parsedSentence) + val expected = TestData.athJsonString + val actual = Athanor.parsedSentenceToJsonString(TestData.athParsedSentence) //Check that the strings can actually be parsed into json val expJson = parse(expected) val actJson = parse(actual) @@ -51,16 +51,17 @@ class AthanorSpec extends UnitSpec { } it should "analyseParsedSentence" in { - val result = Athanor.analyseParsedSentence(TestData.parsedSentence) + val result = Athanor.analyseParsedSentence(TestData.athParsedSentence) assert(result.toSet==rhetoricalMoves.toSet) //Order doesn't matter } it should "analyseJsonSentence" in { - val result = Athanor.analyseJson(TestData.jsonString) + val result = Athanor.analyseJson(TestData.athJsonString) assert(result.toSet==rhetoricalMoves.toSet) //Order doesn't matter } + } diff --git a/src/test/scala/au/edu/utscic/athanorserver/corenlp/CoreNlpSpec.scala b/src/test/scala/au/edu/utscic/athanorserver/corenlp/CoreNlpSpec.scala index 0eb309a..0ee57cc 100644 --- a/src/test/scala/au/edu/utscic/athanorserver/corenlp/CoreNlpSpec.scala +++ b/src/test/scala/au/edu/utscic/athanorserver/corenlp/CoreNlpSpec.scala @@ -1,6 +1,7 @@ package au.edu.utscic.athanorserver.corenlp import au.edu.utscic.athanorserver.UnitSpec +import au.edu.utscic.athanorserver.athanor.Athanor import edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations._ import edu.stanford.nlp.trees.TreeCoreAnnotations.TreeAnnotation @@ -44,7 +45,8 @@ class CoreNlpSpec extends UnitSpec { val ps1s = ps1._2(1).asInstanceOf[List[Any]] assert(ps1s.head=="S") //Tree has starting sentence assert(ps1s.length==4) //S + 3 top level phrases - assert(ps1._3.size==10) //Correct number of dependencies + assert(ps1._3.size==11) //Correct number of dependencies + assert(ps1._3.head.name=="root") //Dependencies has root //Sentence 2 assert(ps2._1.size==9) //Correct number of nodes assert(ps2._1.map(_._1).toSeq==Seq(0,1,2,3,4,5,6,7,8)) //Is sorted @@ -52,7 +54,8 @@ class CoreNlpSpec extends UnitSpec { val ps2s = ps2._2(1).asInstanceOf[List[Any]] assert(ps2s.head=="S") //Tree has starting sentence assert(ps2s.length==4) //S + 3 top level phrases - assert(ps2._3.size==7) //Correct number of dependencies + assert(ps2._3.size==8) //Correct number of dependencies + assert(ps1._3.head.name=="root") //Dependencies has root } it should "parse" in { @@ -60,6 +63,10 @@ class CoreNlpSpec extends UnitSpec { assert(ps.length==2) assert(ps(0)==textAparsed1) assert(ps(1)==textAparsed2) + //Athanor sentence test + val demoPS = TextParser.parse(athSentence) + val json = Athanor.parsedSentenceToJsonString(demoPS.head) + assert(json==athJsonString) } behavior of "SentenceParser"