From 3ecff2a329673c092093884facf8e0e1cb1d43b0 Mon Sep 17 00:00:00 2001 From: HakiRose Date: Fri, 30 Aug 2019 12:07:27 -0400 Subject: [PATCH] Revised version of grammar (symbol and literal chunks) --- textworld/textgen/__init__.py | 66 +++++++++++++++++++++++++++++------ 1 file changed, 55 insertions(+), 11 deletions(-) diff --git a/textworld/textgen/__init__.py b/textworld/textgen/__init__.py index 0aceab23..5edb27d3 100644 --- a/textworld/textgen/__init__.py +++ b/textworld/textgen/__init__.py @@ -1,5 +1,5 @@ from tatsu.model import NodeWalker -from typing import Iterable, Optional, Tuple +from typing import Iterable, Optional, Tuple, List from textworld.textgen.model import TextGrammarModelBuilderSemantics from textworld.textgen.parser import TextGrammarParser @@ -18,32 +18,75 @@ def full_form(self, include_adj=True) -> str: return adj + "|" + noun -class LiteralAlternative(Alternative): +class LiteralChunk: """ - An alternative from a literal string. + It creates an object with a [str] value for every single literal. + literal is defined as any string which is not a symbol, i.e. it is not bounded by hashtags. """ + def __init__(self, value: str): + self._value = value + - def __init__(self, value: str): +class SymbolChunk: + """ + It creates an object with a [str] value for every single symbol. + symbol is defined as any string in between two consecutive hashtags, e.g. #it_is_a_symbol#. + """ + def __init__(self, value: str): self._value = value + +class LiteralAlternative(Alternative): + """ + An alternative from a literal string and represents it as a chunk of literal and symbol objects. + """ + def __init__(self, node: str): + self._node = node + # self._val_chunk contains the objects which make the string. + # It is equivalent to self._value in LiteralAlternative. + self._val_chunk = self._symbol_finder(self._node) + + def _symbol_finder(self, node): + self.chunks = [] + while node: + is_has_tag = [i for i, ltr in enumerate(node) if ltr == '#'] + if is_has_tag: + if node[:is_has_tag[0]]: + self.chunks.append(LiteralChunk(node[:is_has_tag[0]])) + self.chunks.append(SymbolChunk(node[is_has_tag[0]:is_has_tag[1] + 1])) + else: + self.chunks.append(SymbolChunk(node[is_has_tag[0]:is_has_tag[1] + 1])) + + node = node[is_has_tag[1] + 1:] + else: + if node: + self.chunks.append(LiteralChunk(node)) + break + return self.chunks + def split_form(self, include_adj=True) -> Tuple[Optional[str], str]: - return None, self._value + return None, self._node -class AdjectiveNounAlternative(Alternative): +class AdjectiveNounAlternative(LiteralAlternative): """ An alternative that specifies an adjective and a noun. """ - def __init__(self, adjective: str, noun: str): - self._adjective = adjective - self._noun = noun + def __init__(self, adj_node: str, n_node: str): + self._adj_node = adj_node + self._n_node = n_node + # self._adj_chunk contains the objects which make the adjective string. + # self._noun_chunk contains the objects which make the noun string. + # These are equivalent to self._adjective and self._noun in AdjectiveNounAlternative. + self._adj_chunk = self._symbol_finder(self._adj_node) + self._noun_chunk = self._symbol_finder(self._n_node) def split_form(self, include_adj=True) -> Tuple[Optional[str], str]: if include_adj: - return self._adjective, self._noun + return self._adj_node, self._n_node else: - return None, self._noun + return None, self._n_node class MatchAlternative(Alternative): @@ -70,6 +113,7 @@ def __init__(self, symbol: str, alternatives: Iterable[Alternative]): class _Converter(NodeWalker): + def walk_list(self, node): return [self.walk(child) for child in node]