Skip to content

Commit

Permalink
Add directory of the collection to SigMFCollection (#85)
Browse files Browse the repository at this point in the history
* fix path handling for collections
* unit test

---------

Co-authored-by: JKB <[email protected]>
Co-authored-by: Teque5 <[email protected]>
  • Loading branch information
3 people authored Jan 8, 2025
1 parent 2274172 commit d88719a
Show file tree
Hide file tree
Showing 2 changed files with 142 additions and 41 deletions.
107 changes: 66 additions & 41 deletions sigmf/sigmffile.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import warnings
from collections import OrderedDict
from os import path
from pathlib import Path

import numpy as np

Expand Down Expand Up @@ -176,7 +177,7 @@ def __init__(self, metadata=None, data_file=None, global_info=None, skip_checksu
map_readonly: bool, default True
Indicates whether assignments on the numpy.memmap are allowed.
"""
super(SigMFFile, self).__init__()
super().__init__()
self.data_file = None
self.sample_count = 0
self._memmap = None
Expand Down Expand Up @@ -731,22 +732,36 @@ class SigMFCollection(SigMFMetafile):
]
VALID_KEYS = {COLLECTION_KEY: VALID_COLLECTION_KEYS}

def __init__(self, metafiles=None, metadata=None, skip_checksums=False):
"""Create a SigMF Collection object.
Parameters:
metafiles -- A list of SigMF metadata filenames objects comprising the Collection,
there must be at least one file. If the files do not exist, this will
raise a SigMFFileError.
metadata -- collection metadata to use, if not provided this will populate a
minimal set of default metadata. The core:streams field will be
regenerated automatically
def __init__(self, metafiles: list = None, metadata: dict = None, base_path=None, skip_checksums: bool = False) -> None:
"""
super(SigMFCollection, self).__init__()
Create a SigMF Collection object.
Parameters
----------
metafiles: list, optional
A list of SigMF metadata filenames objects comprising the Collection.
There should be at least one file.
metadata: dict, optional
Collection metadata to use, if not provided this will populate a minimal set of default metadata.
The `core:streams` field will be regenerated automatically.
base_path : str | bytes | PathLike, optional
Base path of the collection recordings.
skip_checksums : bool, optional
If true will skip calculating checksum on datasets.
Raises
------
SigMFError
If metadata files do not exist.
"""
super().__init__()
self.skip_checksums = skip_checksums

if base_path is None:
self.base_path = Path("")
else:
self.base_path = Path(base_path)

if metadata is None:
self._metadata = {self.COLLECTION_KEY: {}}
self._metadata[self.COLLECTION_KEY][self.STREAMS_KEY] = []
Expand All @@ -764,55 +779,64 @@ def __init__(self, metafiles=None, metadata=None, skip_checksums=False):
if not self.skip_checksums:
self.verify_stream_hashes()

def __len__(self):
def __len__(self) -> int:
"""
the length of a collection is the number of streams
The length of a collection is the number of streams.
"""
return len(self.get_stream_names())

def verify_stream_hashes(self):
def verify_stream_hashes(self) -> None:
"""
compares the stream hashes in the collection metadata to the metadata files
Compares the stream hashes in the collection metadata to the metadata files.
Raises
------
SigMFFileError
If any dataset checksums do not match saved metadata.
"""
streams = self.get_collection_field(self.STREAMS_KEY, [])
for stream in streams:
old_hash = stream.get("hash")
metafile_name = get_sigmf_filenames(stream.get("name"))["meta_fn"]
if path.isfile(metafile_name):
new_hash = sigmf_hash.calculate_sha512(filename=metafile_name)
metafile_path = self.base_path / metafile_name
if path.isfile(metafile_path):
new_hash = sigmf_hash.calculate_sha512(filename=metafile_path)
if old_hash != new_hash:
raise SigMFFileError(f"Calculated file hash for {metafile_name} does not match collection metadata.")
raise SigMFFileError(
f"Calculated file hash for {metafile_path} does not match collection metadata."
)

def set_streams(self, metafiles):
def set_streams(self, metafiles) -> None:
"""
configures the collection `core:streams` field from the specified list of metafiles
Configures the collection `core:streams` field from the specified list of metafiles.
"""
self.metafiles = metafiles
streams = []
for metafile in self.metafiles:
if metafile.endswith(".sigmf-meta") and path.isfile(metafile):
metafile_path = self.base_path / metafile
if metafile.endswith(".sigmf-meta") and path.isfile(metafile_path):
stream = {
"name": get_sigmf_filenames(metafile)["base_fn"],
"hash": sigmf_hash.calculate_sha512(filename=metafile),
"hash": sigmf_hash.calculate_sha512(filename=metafile_path),
}
streams.append(stream)
else:
raise SigMFFileError(f"Specifed stream file {metafile} is not a valid SigMF Metadata file")
raise SigMFFileError(f"Specifed stream file {metafile_path} is not a valid SigMF Metadata file")
self.set_collection_field(self.STREAMS_KEY, streams)

def get_stream_names(self):
def get_stream_names(self) -> list:
"""
returns a list of `name` object(s) from the `collection` level `core:streams` metadata
Returns a list of `name` object(s) from the `collection` level `core:streams` metadata.
"""
return [s.get("name") for s in self.get_collection_field(self.STREAMS_KEY, [])]

def set_collection_info(self, new_collection):
def set_collection_info(self, new_collection: dict) -> None:
"""
Overwrite the collection info with a new dictionary.
"""
self._metadata[self.COLLECTION_KEY] = new_collection.copy()

def get_collection_info(self):
def get_collection_info(self) -> dict:
"""
Returns a dictionary with all the collection info.
"""
Expand All @@ -821,19 +845,19 @@ def get_collection_info(self):
except AttributeError:
return {}

def set_collection_field(self, key, value):
def set_collection_field(self, key: str, value) -> None:
"""
Inserts a value into the collection field.
"""
self._metadata[self.COLLECTION_KEY][key] = value

def get_collection_field(self, key, default=None):
def get_collection_field(self, key: str, default=None):
"""
Return a field from the collection info, or default if the field is not set.
"""
return self._metadata[self.COLLECTION_KEY].get(key, default)

def tofile(self, file_path, pretty=True):
def tofile(self, file_path, pretty: bool = True) -> None:
"""
Write metadata file
Expand All @@ -844,10 +868,10 @@ def tofile(self, file_path, pretty=True):
pretty : bool, default True
When True will write more human-readable output, otherwise will be flat JSON.
"""
fns = get_sigmf_filenames(file_path)
with open(fns["collection_fn"], "w") as fp:
self.dump(fp, pretty=pretty)
fp.write("\n") # text files should end in carriage return
filenames = get_sigmf_filenames(file_path)
with open(filenames["collection_fn"], "w") as handle:
self.dump(handle, pretty=pretty)
handle.write("\n") # text files should end in carriage return

def get_SigMFFile(self, stream_name=None, stream_index=None):
"""
Expand All @@ -857,11 +881,11 @@ def get_SigMFFile(self, stream_name=None, stream_index=None):
if stream_name is not None:
if stream_name in self.get_stream_names():
metafile = stream_name + ".sigmf_meta"
if stream_index is not None and stream_index < self.__len__():
if stream_index is not None and stream_index < len(self):
metafile = self.get_stream_names()[stream_index] + ".sigmf_meta"

if metafile is not None:
return fromfile(metafile, skip_checksum=self.skip_checksums)
metafile_path = self.base_path / metafile
return fromfile(metafile_path, skip_checksum=self.skip_checksums)


def dtype_info(datatype):
Expand Down Expand Up @@ -1022,7 +1046,8 @@ def fromfile(filename, skip_checksum=False):
metadata = json.load(mdfile_reader)
collection_fp.close()

return SigMFCollection(metadata=metadata, skip_checksums=skip_checksum)
dir_path = path.split(meta_fn)[0]
return SigMFCollection(metadata=metadata, base_path=dir_path, skip_checksums=skip_checksum)

else:
meta_fp = open(meta_fn, "rb")
Expand Down
76 changes: 76 additions & 0 deletions tests/test_collection.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
# Copyright: Multiple Authors
#
# This file is part of sigmf-python. https://github.com/sigmf/sigmf-python
#
# SPDX-License-Identifier: LGPL-3.0-or-later

"""Tests for collections"""

import copy
import os
import shutil
import tempfile
import unittest
from pathlib import Path

import numpy as np
from hypothesis import given
from hypothesis import strategies as st

from sigmf.archive import SIGMF_COLLECTION_EXT, SIGMF_DATASET_EXT, SIGMF_METADATA_EXT
from sigmf.sigmffile import SigMFCollection, SigMFFile, fromfile

from .testdata import TEST_FLOAT32_DATA, TEST_METADATA


class TestCollection(unittest.TestCase):
"""unit tests for colections"""

def setUp(self):
"""create temporary path"""
self.temp_dir = Path(tempfile.mkdtemp())

def tearDown(self):
"""remove temporary path"""
shutil.rmtree(self.temp_dir)

@given(st.sampled_from([".", "subdir/", "sub0/sub1/sub2/"]))
def test_load_collection(self, subdir: str) -> None:
"""test path handling for collections"""
data_name1 = "dat1" + SIGMF_DATASET_EXT
data_name2 = "dat2" + SIGMF_DATASET_EXT
meta_name1 = "dat1" + SIGMF_METADATA_EXT
meta_name2 = "dat2" + SIGMF_METADATA_EXT
collection_name = "collection" + SIGMF_COLLECTION_EXT
data_path1 = self.temp_dir / subdir / data_name1
data_path2 = self.temp_dir / subdir / data_name2
meta_path1 = self.temp_dir / subdir / meta_name1
meta_path2 = self.temp_dir / subdir / meta_name2
collection_path = self.temp_dir / subdir / collection_name
os.makedirs(collection_path.parent, exist_ok=True)

# create data files
TEST_FLOAT32_DATA.tofile(data_path1)
TEST_FLOAT32_DATA.tofile(data_path2)

# create metadata files
metadata = copy.deepcopy(TEST_METADATA)
meta1 = SigMFFile(metadata=metadata, data_file=data_path1)
meta2 = SigMFFile(metadata=metadata, data_file=data_path2)
meta1.tofile(meta_path1)
meta2.tofile(meta_path2)

# create collection
collection = SigMFCollection(
metafiles=[meta_name1, meta_name2],
base_path=str(self.temp_dir / subdir),
)
collection.tofile(collection_path)

# load collection
collection_loopback = fromfile(collection_path)
meta1_loopback = collection_loopback.get_SigMFFile(stream_index=0)
meta2_loopback = collection_loopback.get_SigMFFile(stream_index=1)

self.assertTrue(np.array_equal(TEST_FLOAT32_DATA, meta1_loopback.read_samples()))
self.assertTrue(np.array_equal(TEST_FLOAT32_DATA, meta2_loopback[:]))

0 comments on commit d88719a

Please sign in to comment.