diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md index 72f65f5..16b6757 100644 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ b/.github/ISSUE_TEMPLATE/bug_report.md @@ -14,3 +14,7 @@ A clear and concise description of what the bug is. Please provide a sample file with the given error or code to produce such a file. Without a sample file to work with, I likely cannot help you. You can upload your file either directly to GitHub or https://wetransfer.com If your data contains privacy sensitive information, consider altering it in a way to remove the information or request me for a temporal mail adress that you can send your file confidentially to. + + + +If you get `ERROR: MATLAB type not supported: XXX, (uint32)` there is probably very little I can do, as this represents a MATLAB proprietary data type, and nobody has yet figured out how to decode them. diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 58c5184..170da9a 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -15,15 +15,17 @@ jobs: runs-on: ${{ matrix.os }} strategy: matrix: - python-version: [3.6, 3.7, 3.8, 3.9, 3.10.0] + python-version: [3.7, 3.8, 3.9] os: [ubuntu-latest, macos-latest, windows-latest] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 + uses: actions/setup-python@v4 with: python-version: ${{ matrix.python-version }} + cache: 'pip' # caching pip dependencies + - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/mat73/core.py b/mat73/core.py index 48e799f..7439ad1 100644 --- a/mat73/core.py +++ b/mat73/core.py @@ -45,7 +45,7 @@ def print_tree(node): class HDF5Decoder(): def __init__(self, verbose=True, use_attrdict=False, only_include=None): - + if isinstance(only_include, str): only_include = [only_include] if only_include is not None: @@ -58,8 +58,6 @@ def __init__(self, verbose=True, use_attrdict=False, if only_include is not None: _vardict = dict(zip(only_include, [False]*len(only_include))) self._found_include_var = _vardict - - def is_included(self, hdf5): # if only_include is not specified, we always return True @@ -73,11 +71,10 @@ def is_included(self, hdf5): if s in hdf5.name or hdf5.name in s: return True return False - def mat2dict(self, hdf5): - - if '#refs#' in hdf5: + + if '#refs#' in hdf5: self.refs = hdf5['#refs#'] d = self._dict_class() for var in hdf5: @@ -98,13 +95,13 @@ def mat2dict(self, hdf5): print(hdf5.filename, 'contains the following vars:') hdf5.visit(print_tree) return d - + # @profile def unpack_mat(self, hdf5, depth=0, MATLAB_class=None): """ unpack a h5py entry: if it's a group expand, if it's a dataset convert - + for safety reasons, the depth cannot be larger than 99 """ if depth==99: @@ -118,9 +115,9 @@ def unpack_mat(self, hdf5, depth=0, MATLAB_class=None): continue if 'MATLAB_class' in elem.attrs: MATLAB_class = elem.attrs.get('MATLAB_class') - if MATLAB_class is not None: + if MATLAB_class is not None: MATLAB_class = MATLAB_class.decode() - unpacked = self.unpack_mat(elem, depth=depth+1, + unpacked = self.unpack_mat(elem, depth=depth+1, MATLAB_class=MATLAB_class) # sparse elements need to be loaded separately # see https://github.com/skjerns/mat7.3/issues/28 @@ -129,7 +126,7 @@ def unpack_mat(self, hdf5, depth=0, MATLAB_class=None): from scipy.sparse import csc_matrix data = unpacked['data'] row_ind = unpacked['ir'] - col_ind = unpacked['jc'] + col_ind = unpacked['jc'] n_rows = elem.attrs['MATLAB_sparse'] n_cols = len(col_ind) - 1 unpacked = csc_matrix((data, row_ind, col_ind), shape=(n_rows, n_cols)) @@ -148,7 +145,7 @@ def unpack_mat(self, hdf5, depth=0, MATLAB_class=None): logging.error(f'Tried loading the sparse matrix `{elem.name}`' ' but something went wrong: {e}\n{e.__traceback__}') raise e - + elif MATLAB_class=='struct' and len(elem)>1 and \ isinstance(unpacked, dict): @@ -183,28 +180,28 @@ def unpack_mat(self, hdf5, depth=0, MATLAB_class=None): return self.convert_mat(hdf5, depth, MATLAB_class=MATLAB_class) else: raise Exception(f'Unknown hdf5 type: {key}:{type(hdf5)}') - + # @profile def _has_refs(self, dataset): if len(dataset.shape)<2: return False # dataset[0]. dataset[0][0] - if isinstance(dataset[0][0], h5py.h5r.Reference): + if isinstance(dataset[0][0], h5py.h5r.Reference): return True return False - + # @profile def convert_mat(self, dataset, depth, MATLAB_class=None): """ Converts h5py.dataset into python native datatypes according to the matlab class annotation - """ + """ # all MATLAB variables have the attribute MATLAB_class # if this is not present, it is not convertible if MATLAB_class is None and 'MATLAB_class' in dataset.attrs: MATLAB_class = dataset.attrs['MATLAB_class'].decode() - + if not MATLAB_class and not self._has_refs(dataset): if self.verbose: message = 'ERROR: not a MATLAB datatype: ' + \ @@ -212,12 +209,11 @@ def convert_mat(self, dataset, depth, MATLAB_class=None): logging.error(message) return None - known_cls = ['cell', 'char', 'bool', 'logical', 'double', 'single', + known_cls = ['cell', 'char', 'bool', 'logical', 'double', 'single', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'] - if 'MATLAB_empty' in dataset.attrs.keys() and \ - MATLAB_class in ['cell', 'struct']: + if 'MATLAB_empty' in dataset.attrs.keys(): mtype = 'empty' elif MATLAB_class in known_cls: mtype = MATLAB_class @@ -244,30 +240,35 @@ def convert_mat(self, dataset, depth, MATLAB_class=None): return cell elif mtype=='empty': - dims = [x for x in dataset] - return empty(*dims) + if MATLAB_class in ['cell', 'struct']: + dims = [x for x in dataset] + return empty(*dims) + elif MATLAB_class=='char': + return '' + else: + + return None - elif mtype=='char': + elif mtype=='char': string_array = np.ravel(dataset) string_array = ''.join([chr(x) for x in string_array]) - string_array = string_array.replace('\x00', '') return string_array elif mtype=='bool': return bool(dataset) - elif mtype=='logical': + elif mtype=='logical': arr = np.array(dataset, dtype=bool).T.squeeze() if arr.size==1: arr=bool(arr) return arr - elif mtype=='canonical empty': + elif mtype=='canonical empty': return None # complex numbers need to be filtered out separately elif 'imag' in str(dataset.dtype): if dataset.attrs['MATLAB_class']==b'single': - dtype = np.complex64 + dtype = np.complex64 else: dtype = np.complex128 arr = np.array(dataset) @@ -275,7 +276,7 @@ def convert_mat(self, dataset, depth, MATLAB_class=None): return arr.T.squeeze() # if it is none of the above, we can convert to numpy array - elif mtype in ('double', 'single', 'int8', 'int16', 'int32', 'int64', + elif mtype in ('double', 'single', 'int8', 'int16', 'int32', 'int64', 'uint8', 'uint16', 'uint32', 'uint64'): arr = np.array(dataset, dtype=dataset.dtype) return arr.T.squeeze() @@ -287,13 +288,13 @@ def convert_mat(self, dataset, depth, MATLAB_class=None): '{}, ({})'.format(mtype, dataset.dtype) logging.error(message) return None - - + + def loadmat(filename, use_attrdict=False, only_include=None, verbose=True): """ Loads a MATLAB 7.3 .mat file, which is actually a HDF5 file with some custom matlab annotations inside - + :param filename: A string pointing to the file :param use_attrdict: make it possible to access structs like in MATLAB using struct.varname instead of struct['varname'] @@ -301,17 +302,17 @@ def loadmat(filename, use_attrdict=False, only_include=None, verbose=True): e.g. keys(), pop(), ... these will still be available by struct['keys'] :param verbose: print warnings - :param only_include: A list of HDF5 paths that should be loaded. - this can greatly reduce loading times. If a path + :param only_include: A list of HDF5 paths that should be loaded. + this can greatly reduce loading times. If a path contains further sub datasets, these will be loaded - as well, e.g. 'struct/' will load all subvars of + as well, e.g. 'struct/' will load all subvars of struct, 'struct/var' will load only ['struct']['var'] :returns: A dictionary with the matlab variables loaded """ assert os.path.isfile(filename), '{} does not exist'.format(filename) decoder = HDF5Decoder(verbose=verbose, use_attrdict=use_attrdict, only_include=only_include) - + ext = os.path.splitext(filename)[1].lower() if ext!='.mat': logging.warning('Can only load MATLAB .mat file, this file type might ' @@ -323,15 +324,15 @@ def loadmat(filename, use_attrdict=False, only_include=None, verbose=True): except OSError: raise TypeError('{} is not a MATLAB 7.3 file. '\ 'Load with scipy.io.loadmat() instead.'.format(filename)) - - + + def savemat(filename, verbose=True): raise NotImplementedError - + if __name__=='__main__': # for testing / debugging - d = loadmat('../tests/testfile1.mat') + d = loadmat('../tests/testfile2.mat') # file = '../tests/testfile8.mat' diff --git a/mat73/version.py b/mat73/version.py index 9482b0e..4eaf90f 100644 --- a/mat73/version.py +++ b/mat73/version.py @@ -1 +1 @@ -__version__ = '0.59' \ No newline at end of file +__version__ = '0.60' diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..d6ba020 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,2 @@ +h5py>=3.0 +numpy>=1.19 \ No newline at end of file diff --git a/tests/test_mat73.py b/tests/test_mat73.py index 8aedb4d..30ec405 100644 --- a/tests/test_mat73.py +++ b/tests/test_mat73.py @@ -33,12 +33,13 @@ class Testing(unittest.TestCase): def setUp(self): - for i in range(1,9): + """make links to test files and make sure they are present""" + for i in range(1, 9): file = 'testfile{}.mat'.format(i) if not os.path.exists(file): file = os.path.join('./tests', file) self.__setattr__ ('testfile{}'.format(i), file) - + file_npt = 'testfile9.npt' if not os.path.exists(file_npt): file_npt = os.path.join('./tests', file_npt) @@ -50,7 +51,7 @@ def test_file1_noattr(self): """ d = mat73.loadmat(self.testfile1, use_attrdict=False) data = d['data'] - + assert len(d)==3 assert len(d.keys())==3 assert len(data)==EXPECTED_VARS_FILE1 @@ -84,11 +85,11 @@ def test_file1_noattr(self): assert data['int16_']==16 assert data['int16_'].dtype==np.int16 assert data['int32_']==1115 - assert data['int32_'].dtype==np.int32 + assert data['int32_'].dtype==np.int32 assert data['int64_']==65243 - assert data['int64_'].dtype==np.int64 + assert data['int64_'].dtype==np.int64 assert data['int8_']==2 - assert data['int8_'].dtype==np.int8 + assert data['int8_'].dtype==np.int8 np.testing.assert_array_equal(data['nan_'], np.nan) assert data['nan_'].dtype==np.float64 assert data['single_']==np.array(0.1, dtype=np.float32) @@ -99,7 +100,7 @@ def test_file1_noattr(self): assert data['uint16_']==12 assert data['uint16_'].dtype==np.uint16 assert data['uint32_']==5452 - assert data['uint32_'].dtype==np.uint32 + assert data['uint32_'].dtype==np.uint32 assert data['uint64_']==32563 assert data['uint64_'].dtype==np.uint64 assert len(data['struct_'])==1 @@ -126,7 +127,7 @@ def test_file1_withattr(self): """ d = mat73.loadmat(self.testfile1, use_attrdict=True) data = d['data'] - + assert len(d)==3 assert len(d.keys())==3 assert len(data)==EXPECTED_VARS_FILE1 @@ -159,11 +160,11 @@ def test_file1_withattr(self): assert data['int16_']==16 assert data['int16_'].dtype==np.int16 assert data['int32_']==1115 - assert data['int32_'].dtype==np.int32 + assert data['int32_'].dtype==np.int32 assert data['int64_']==65243 - assert data['int64_'].dtype==np.int64 + assert data['int64_'].dtype==np.int64 assert data['int8_']==2 - assert data['int8_'].dtype==np.int8 + assert data['int8_'].dtype==np.int8 np.testing.assert_array_equal(data['nan_'], np.nan) assert data['nan_'].dtype==np.float64 assert data['single_']==np.array(0.1, dtype=np.float32) @@ -174,7 +175,7 @@ def test_file1_withattr(self): assert data['uint16_']==12 assert data['uint16_'].dtype==np.uint16 assert data['uint32_']==5452 - assert data['uint32_'].dtype==np.uint32 + assert data['uint32_'].dtype==np.uint32 assert data['uint64_']==32563 assert data['uint64_'].dtype==np.uint64 assert len(data['struct_'])==1 @@ -220,11 +221,11 @@ def test_file1_withattr(self): assert data.int16_==16 assert data.int16_.dtype==np.int16 assert data.int32_==1115 - assert data.int32_.dtype==np.int32 + assert data.int32_.dtype==np.int32 assert data.int64_==65243 - assert data.int64_.dtype==np.int64 + assert data.int64_.dtype==np.int64 assert data.int8_==2 - assert data.int8_.dtype==np.int8 + assert data.int8_.dtype==np.int8 np.testing.assert_array_equal(data.nan_, np.nan) assert data.nan_.dtype==np.float64 assert data.single_==np.array(0.1, dtype=np.float32) @@ -235,7 +236,7 @@ def test_file1_withattr(self): assert data.uint16_==12 assert data.uint16_.dtype==np.uint16 assert data.uint32_==5452 - assert data.uint32_.dtype==np.uint32 + assert data.uint32_.dtype==np.uint32 assert data.uint64_==32563 assert data.uint64_.dtype==np.uint64 assert len(data.struct_)==1 @@ -256,13 +257,13 @@ def test_file2(self): """ d = mat73.loadmat(self.testfile2) raw1 = d['raw1'] - assert raw1['label'] == ['']*5 - assert raw1['speakerType'] == ['main']*5 + self.assertEqual(raw1['label'], ['']*5) + self.assertEqual(raw1['speakerType'],['main']*5) np.testing.assert_array_equal(raw1['channel'],[1,2,3,4,5]) np.testing.assert_allclose(raw1['measGain'],[-1.0160217,-0.70729065,-1.2158508,0.68839645,2.464653]) for i in range(5): assert np.isclose(np.sum(raw1['h'][i]),-0.0007341067459898744) - + np.testing.assert_array_almost_equal(raw1['HSmooth'][0][2], [ 0.001139-4.233492e-04j, 0.00068 +8.927040e-06j, 0.002382-7.647651e-04j, -0.012677+3.767829e-03j]) @@ -273,8 +274,8 @@ def test_file3(self): """ d = mat73.loadmat(self.testfile3) raw1 = d['raw1'] - assert raw1['label'] == ['']*5 - assert raw1['speakerType'] == ['main']*5 + self.assertEqual(raw1['label'], ['']*5) + self.assertEqual(raw1['speakerType'], ['main']*5) np.testing.assert_array_equal(raw1['channel'],[1,2,3,4,5]) np.testing.assert_allclose(raw1['measGain'],[-1.0160217,-0.70729065,-1.2158508,0.68839645,2.464653]) for i in range(5): @@ -348,18 +349,18 @@ def test_file6_empty_cell_array(self): def test_file7_empty_cell_array(self): data = mat73.loadmat(self.testfile7) - + def test_can_load_other_extension(self): with self.assertLogs(level='WARNING'): data = mat73.loadmat(self.testfile_npt) - + def test_load_specific_vars(self): for key in ['keys', ['keys']]: data = mat73.loadmat(self.testfile1, only_include=key) assert len(data)==1 assert data['keys']=='must_not_overwrite' - + with self.assertLogs(level='WARNING'): data = mat73.loadmat(self.testfile1, only_include='notpresent') @@ -367,7 +368,7 @@ def test_load_specific_vars(self): assert len(data)==2 assert len(data['data'])==EXPECTED_VARS_FILE1 assert len(data['data']['cell_'])==7 - + # check if loading times are faster, should be the case. start = time.time() data = mat73.loadmat(self.testfile4) @@ -376,14 +377,16 @@ def test_load_specific_vars(self): data = mat73.loadmat(self.testfile4, only_include='Res/HRV/Param/use_custom_print_command') elapsed2 = time.time()-start assert elapsed2