Skip to content

Commit

Permalink
Subset.as_dataframe() hides targets instead of raising an error (#258)
Browse files Browse the repository at this point in the history
* `Subset.as_dataframe()` hides targets instead of raising an error

Resolves #257

* add tests for new dataframe behavior

review comment: #258 (comment)
  • Loading branch information
JacksonBurns authored Jan 31, 2025
1 parent 6747c58 commit fe3bff3
Show file tree
Hide file tree
Showing 2 changed files with 18 additions and 6 deletions.
15 changes: 9 additions & 6 deletions polaris/dataset/_subset.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,16 +200,19 @@ def as_dataframe(self) -> pd.DataFrame:
This method loads the entire dataset in memory.
"""
# Create an empty dataframe
cols = self.input_cols + self.target_cols
cols = self.input_cols
if not self._hide_targets:
cols += self.target_cols
df = pd.DataFrame(columns=cols)

# Fill the dataframe
targets = self.targets
if not self.is_multi_task:
targets = {self.target_cols[0]: targets}
if not self._hide_targets:
targets = self.targets
if not self.is_multi_task:
targets = {self.target_cols[0]: targets}

for k in targets:
df[k] = targets[k]
for k in targets:
df[k] = targets[k]

inputs = self.inputs
if not self.is_multi_input:
Expand Down
9 changes: 9 additions & 0 deletions tests/test_subset.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datamol as dm
import numpy as np
import pandas as pd
import pytest

from polaris.dataset import Subset
Expand Down Expand Up @@ -52,6 +53,14 @@ def test_access_to_test_set(test_single_task_benchmark):
assert all(isinstance(y, float) for x, y in train)
assert all(isinstance(train[i][1], float) for i in range(len(train)))

# as_dataframe should work for both, but contain no targets for test
train_df = train.as_dataframe()
assert isinstance(train_df, pd.DataFrame)
assert "expt" in train_df.columns
test_df = test.as_dataframe()
assert isinstance(test_df, pd.DataFrame)
assert "expt" not in test_df.columns


def test_input_featurization(test_single_task_benchmark):
# Without a transformation, we expect a SMILES string
Expand Down

0 comments on commit fe3bff3

Please sign in to comment.