Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New text grammar #181

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
66 changes: 55 additions & 11 deletions textworld/textgen/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from tatsu.model import NodeWalker
from typing import Iterable, Optional, Tuple
from typing import Iterable, Optional, Tuple, List

from textworld.textgen.model import TextGrammarModelBuilderSemantics
from textworld.textgen.parser import TextGrammarParser
Expand All @@ -18,32 +18,75 @@ def full_form(self, include_adj=True) -> str:
return adj + "|" + noun


class LiteralAlternative(Alternative):
class LiteralChunk:
"""
An alternative from a literal string.
It creates an object with a [str] value for every single literal.
literal is defined as any string which is not a symbol, i.e. it is not bounded by hashtags.
"""
def __init__(self, value: str):
self._value = value


def __init__(self, value: str):
class SymbolChunk:
"""
It creates an object with a [str] value for every single symbol.
symbol is defined as any string in between two consecutive hashtags, e.g. #it_is_a_symbol#.
"""
def __init__(self, value: str):
self._value = value


class LiteralAlternative(Alternative):
"""
An alternative from a literal string and represents it as a chunk of literal and symbol objects.
"""
def __init__(self, node: str):
self._node = node
# self._val_chunk contains the objects which make the string.
# It is equivalent to self._value in LiteralAlternative.
self._val_chunk = self._symbol_finder(self._node)

def _symbol_finder(self, node):
self.chunks = []
while node:
is_has_tag = [i for i, ltr in enumerate(node) if ltr == '#']
if is_has_tag:
if node[:is_has_tag[0]]:
self.chunks.append(LiteralChunk(node[:is_has_tag[0]]))
self.chunks.append(SymbolChunk(node[is_has_tag[0]:is_has_tag[1] + 1]))
else:
self.chunks.append(SymbolChunk(node[is_has_tag[0]:is_has_tag[1] + 1]))

node = node[is_has_tag[1] + 1:]
else:
if node:
self.chunks.append(LiteralChunk(node))
break
return self.chunks

def split_form(self, include_adj=True) -> Tuple[Optional[str], str]:
return None, self._value
return None, self._node


class AdjectiveNounAlternative(Alternative):
class AdjectiveNounAlternative(LiteralAlternative):
"""
An alternative that specifies an adjective and a noun.
"""

def __init__(self, adjective: str, noun: str):
self._adjective = adjective
self._noun = noun
def __init__(self, adj_node: str, n_node: str):
self._adj_node = adj_node
self._n_node = n_node
# self._adj_chunk contains the objects which make the adjective string.
# self._noun_chunk contains the objects which make the noun string.
# These are equivalent to self._adjective and self._noun in AdjectiveNounAlternative.
self._adj_chunk = self._symbol_finder(self._adj_node)
self._noun_chunk = self._symbol_finder(self._n_node)

def split_form(self, include_adj=True) -> Tuple[Optional[str], str]:
if include_adj:
return self._adjective, self._noun
return self._adj_node, self._n_node
else:
return None, self._noun
return None, self._n_node


class MatchAlternative(Alternative):
Expand All @@ -70,6 +113,7 @@ def __init__(self, symbol: str, alternatives: Iterable[Alternative]):


class _Converter(NodeWalker):

def walk_list(self, node):
return [self.walk(child) for child in node]

Expand Down