Skip to content

Commit

Permalink
fix ray data
Browse files Browse the repository at this point in the history
  • Loading branch information
Cathy0908 committed Sep 9, 2024
1 parent 1bdb484 commit c02a44b
Showing 1 changed file with 8 additions and 8 deletions.
16 changes: 8 additions & 8 deletions data_juicer/core/ray_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,15 +301,15 @@ def _run_single_op(self, op, dataset: Dataset) -> Dataset:
batch_size = getattr(op, 'batch_size',
1) if op.is_batched_op() else 1
if isinstance(op, Mapper):
self.data = self.data.map_batches(op.process,
batch_size=batch_size,
batch_format='pyarrow',
num_gpus=num_gpus)
dataset = dataset.map_batches(op.process,
batch_size=batch_size,
batch_format='pyarrow',
num_gpus=num_gpus)
elif isinstance(op, Filter):
self.data = self.data.map_batches(op.compute_stats,
batch_size=batch_size,
batch_format='pyarrow',
num_gpus=num_gpus)
dataset = dataset.map_batches(op.compute_stats,
batch_size=batch_size,
batch_format='pyarrow',
num_gpus=num_gpus)
if op.stats_export_path is not None:
dataset.write_json(op.stats_export_path, force_ascii=False)
dataset = dataset.filter(op.process)
Expand Down

0 comments on commit c02a44b

Please sign in to comment.