From d88719a978fdcb4090b6837aec551eca77ade3ac Mon Sep 17 00:00:00 2001 From: GreenK173 Date: Wed, 8 Jan 2025 18:03:14 +0100 Subject: [PATCH] Add directory of the collection to SigMFCollection (#85) * fix path handling for collections * unit test --------- Co-authored-by: JKB Co-authored-by: Teque5 --- sigmf/sigmffile.py | 107 ++++++++++++++++++++++++--------------- tests/test_collection.py | 76 +++++++++++++++++++++++++++ 2 files changed, 142 insertions(+), 41 deletions(-) create mode 100644 tests/test_collection.py diff --git a/sigmf/sigmffile.py b/sigmf/sigmffile.py index 683cdf8..26824a6 100644 --- a/sigmf/sigmffile.py +++ b/sigmf/sigmffile.py @@ -14,6 +14,7 @@ import warnings from collections import OrderedDict from os import path +from pathlib import Path import numpy as np @@ -176,7 +177,7 @@ def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksu map_readonly: bool, default True Indicates whether assignments on the numpy.memmap are allowed. """ - super(SigMFFile, self).__init__() + super().__init__() self.data_file = None self.sample_count = 0 self._memmap = None @@ -731,22 +732,36 @@ class SigMFCollection(SigMFMetafile): ] VALID_KEYS = {COLLECTION_KEY: VALID_COLLECTION_KEYS} - def __init__(self, metafiles=None, metadata=None, skip_checksums=False): - """Create a SigMF Collection object. - - Parameters: - - metafiles -- A list of SigMF metadata filenames objects comprising the Collection, - there must be at least one file. If the files do not exist, this will - raise a SigMFFileError. - - metadata -- collection metadata to use, if not provided this will populate a - minimal set of default metadata. The core:streams field will be - regenerated automatically + def __init__(self, metafiles: list = None, metadata: dict = None, base_path=None, skip_checksums: bool = False) -> None: """ - super(SigMFCollection, self).__init__() + Create a SigMF Collection object. + + Parameters + ---------- + metafiles: list, optional + A list of SigMF metadata filenames objects comprising the Collection. + There should be at least one file. + metadata: dict, optional + Collection metadata to use, if not provided this will populate a minimal set of default metadata. + The `core:streams` field will be regenerated automatically. + base_path : str | bytes | PathLike, optional + Base path of the collection recordings. + skip_checksums : bool, optional + If true will skip calculating checksum on datasets. + + Raises + ------ + SigMFError + If metadata files do not exist. + """ + super().__init__() self.skip_checksums = skip_checksums + if base_path is None: + self.base_path = Path("") + else: + self.base_path = Path(base_path) + if metadata is None: self._metadata = {self.COLLECTION_KEY: {}} self._metadata[self.COLLECTION_KEY][self.STREAMS_KEY] = [] @@ -764,55 +779,64 @@ def __init__(self, metafiles=None, metadata=None, skip_checksums=False): if not self.skip_checksums: self.verify_stream_hashes() - def __len__(self): + def __len__(self) -> int: """ - the length of a collection is the number of streams + The length of a collection is the number of streams. """ return len(self.get_stream_names()) - def verify_stream_hashes(self): + def verify_stream_hashes(self) -> None: """ - compares the stream hashes in the collection metadata to the metadata files + Compares the stream hashes in the collection metadata to the metadata files. + + Raises + ------ + SigMFFileError + If any dataset checksums do not match saved metadata. """ streams = self.get_collection_field(self.STREAMS_KEY, []) for stream in streams: old_hash = stream.get("hash") metafile_name = get_sigmf_filenames(stream.get("name"))["meta_fn"] - if path.isfile(metafile_name): - new_hash = sigmf_hash.calculate_sha512(filename=metafile_name) + metafile_path = self.base_path / metafile_name + if path.isfile(metafile_path): + new_hash = sigmf_hash.calculate_sha512(filename=metafile_path) if old_hash != new_hash: - raise SigMFFileError(f"Calculated file hash for {metafile_name} does not match collection metadata.") + raise SigMFFileError( + f"Calculated file hash for {metafile_path} does not match collection metadata." + ) - def set_streams(self, metafiles): + def set_streams(self, metafiles) -> None: """ - configures the collection `core:streams` field from the specified list of metafiles + Configures the collection `core:streams` field from the specified list of metafiles. """ self.metafiles = metafiles streams = [] for metafile in self.metafiles: - if metafile.endswith(".sigmf-meta") and path.isfile(metafile): + metafile_path = self.base_path / metafile + if metafile.endswith(".sigmf-meta") and path.isfile(metafile_path): stream = { "name": get_sigmf_filenames(metafile)["base_fn"], - "hash": sigmf_hash.calculate_sha512(filename=metafile), + "hash": sigmf_hash.calculate_sha512(filename=metafile_path), } streams.append(stream) else: - raise SigMFFileError(f"Specifed stream file {metafile} is not a valid SigMF Metadata file") + raise SigMFFileError(f"Specifed stream file {metafile_path} is not a valid SigMF Metadata file") self.set_collection_field(self.STREAMS_KEY, streams) - def get_stream_names(self): + def get_stream_names(self) -> list: """ - returns a list of `name` object(s) from the `collection` level `core:streams` metadata + Returns a list of `name` object(s) from the `collection` level `core:streams` metadata. """ return [s.get("name") for s in self.get_collection_field(self.STREAMS_KEY, [])] - def set_collection_info(self, new_collection): + def set_collection_info(self, new_collection: dict) -> None: """ Overwrite the collection info with a new dictionary. """ self._metadata[self.COLLECTION_KEY] = new_collection.copy() - def get_collection_info(self): + def get_collection_info(self) -> dict: """ Returns a dictionary with all the collection info. """ @@ -821,19 +845,19 @@ def get_collection_info(self): except AttributeError: return {} - def set_collection_field(self, key, value): + def set_collection_field(self, key: str, value) -> None: """ Inserts a value into the collection field. """ self._metadata[self.COLLECTION_KEY][key] = value - def get_collection_field(self, key, default=None): + def get_collection_field(self, key: str, default=None): """ Return a field from the collection info, or default if the field is not set. """ return self._metadata[self.COLLECTION_KEY].get(key, default) - def tofile(self, file_path, pretty=True): + def tofile(self, file_path, pretty: bool = True) -> None: """ Write metadata file @@ -844,10 +868,10 @@ def tofile(self, file_path, pretty=True): pretty : bool, default True When True will write more human-readable output, otherwise will be flat JSON. """ - fns = get_sigmf_filenames(file_path) - with open(fns["collection_fn"], "w") as fp: - self.dump(fp, pretty=pretty) - fp.write("\n") # text files should end in carriage return + filenames = get_sigmf_filenames(file_path) + with open(filenames["collection_fn"], "w") as handle: + self.dump(handle, pretty=pretty) + handle.write("\n") # text files should end in carriage return def get_SigMFFile(self, stream_name=None, stream_index=None): """ @@ -857,11 +881,11 @@ def get_SigMFFile(self, stream_name=None, stream_index=None): if stream_name is not None: if stream_name in self.get_stream_names(): metafile = stream_name + ".sigmf_meta" - if stream_index is not None and stream_index < self.__len__(): + if stream_index is not None and stream_index < len(self): metafile = self.get_stream_names()[stream_index] + ".sigmf_meta" - if metafile is not None: - return fromfile(metafile, skip_checksum=self.skip_checksums) + metafile_path = self.base_path / metafile + return fromfile(metafile_path, skip_checksum=self.skip_checksums) def dtype_info(datatype): @@ -1022,7 +1046,8 @@ def fromfile(filename, skip_checksum=False): metadata = json.load(mdfile_reader) collection_fp.close() - return SigMFCollection(metadata=metadata, skip_checksums=skip_checksum) + dir_path = path.split(meta_fn)[0] + return SigMFCollection(metadata=metadata, base_path=dir_path, skip_checksums=skip_checksum) else: meta_fp = open(meta_fn, "rb") diff --git a/tests/test_collection.py b/tests/test_collection.py new file mode 100644 index 0000000..0f80660 --- /dev/null +++ b/tests/test_collection.py @@ -0,0 +1,76 @@ +# Copyright: Multiple Authors +# +# This file is part of sigmf-python. https://github.com/sigmf/sigmf-python +# +# SPDX-License-Identifier: LGPL-3.0-or-later + +"""Tests for collections""" + +import copy +import os +import shutil +import tempfile +import unittest +from pathlib import Path + +import numpy as np +from hypothesis import given +from hypothesis import strategies as st + +from sigmf.archive import SIGMF_COLLECTION_EXT, SIGMF_DATASET_EXT, SIGMF_METADATA_EXT +from sigmf.sigmffile import SigMFCollection, SigMFFile, fromfile + +from .testdata import TEST_FLOAT32_DATA, TEST_METADATA + + +class TestCollection(unittest.TestCase): + """unit tests for colections""" + + def setUp(self): + """create temporary path""" + self.temp_dir = Path(tempfile.mkdtemp()) + + def tearDown(self): + """remove temporary path""" + shutil.rmtree(self.temp_dir) + + @given(st.sampled_from([".", "subdir/", "sub0/sub1/sub2/"])) + def test_load_collection(self, subdir: str) -> None: + """test path handling for collections""" + data_name1 = "dat1" + SIGMF_DATASET_EXT + data_name2 = "dat2" + SIGMF_DATASET_EXT + meta_name1 = "dat1" + SIGMF_METADATA_EXT + meta_name2 = "dat2" + SIGMF_METADATA_EXT + collection_name = "collection" + SIGMF_COLLECTION_EXT + data_path1 = self.temp_dir / subdir / data_name1 + data_path2 = self.temp_dir / subdir / data_name2 + meta_path1 = self.temp_dir / subdir / meta_name1 + meta_path2 = self.temp_dir / subdir / meta_name2 + collection_path = self.temp_dir / subdir / collection_name + os.makedirs(collection_path.parent, exist_ok=True) + + # create data files + TEST_FLOAT32_DATA.tofile(data_path1) + TEST_FLOAT32_DATA.tofile(data_path2) + + # create metadata files + metadata = copy.deepcopy(TEST_METADATA) + meta1 = SigMFFile(metadata=metadata, data_file=data_path1) + meta2 = SigMFFile(metadata=metadata, data_file=data_path2) + meta1.tofile(meta_path1) + meta2.tofile(meta_path2) + + # create collection + collection = SigMFCollection( + metafiles=[meta_name1, meta_name2], + base_path=str(self.temp_dir / subdir), + ) + collection.tofile(collection_path) + + # load collection + collection_loopback = fromfile(collection_path) + meta1_loopback = collection_loopback.get_SigMFFile(stream_index=0) + meta2_loopback = collection_loopback.get_SigMFFile(stream_index=1) + + self.assertTrue(np.array_equal(TEST_FLOAT32_DATA, meta1_loopback.read_samples())) + self.assertTrue(np.array_equal(TEST_FLOAT32_DATA, meta2_loopback[:]))