diff --git a/chebai/preprocessing/reader.py b/chebai/preprocessing/reader.py index 0db6d1a7..99abdabb 100644 --- a/chebai/preprocessing/reader.py +++ b/chebai/preprocessing/reader.py @@ -181,10 +181,10 @@ def name(cls): def _read_data(self, raw_data): try: - tokenized = sf.split_selfies(sf.encoder(raw_data, strict=True)) + tokenized = sf.split_selfies(sf.encoder(raw_data.strip(), strict=True)) tokenized = [self._get_token_index(v) for v in tokenized] except Exception as e: - print(f'could not process {raw_data} (type: {type(raw_data)}') + print(f'could not process {raw_data}') print(f'\t{e}') self.error_count += 1 print(f'\terror count: {self.error_count}')