forked from modelscope/data-juicer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtest_parquet_formatter.py
30 lines (22 loc) · 967 Bytes
/
test_parquet_formatter.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
import os
import unittest
from data_juicer.format.parquet_formatter import ParquetFormatter
from data_juicer.utils.unittest_utils import DataJuicerTestCaseBase
class CsvFormatterTest(DataJuicerTestCaseBase):
def setUp(self):
self._path = os.path.join(os.path.dirname(os.path.realpath(__file__)),
'data', 'structured')
self._file = os.path.join(self._path, 'demo-dataset.parquet')
print(self._file)
def test_parquet_file(self):
formatter = ParquetFormatter(self._file)
ds = formatter.load_dataset()
self.assertEqual(len(ds), 6)
self.assertEqual(list(ds.features.keys()), ['text', 'meta'])
def test_parquet_path(self):
formatter = ParquetFormatter(self._path)
ds = formatter.load_dataset()
self.assertEqual(len(ds), 6)
self.assertEqual(list(ds.features.keys()), ['text', 'meta'])
if __name__ == '__main__':
unittest.main()