From 79d043d543bad1b17ff3316a5143895f0f50d144 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Wed, 1 Mar 2017 15:14:33 +0000 Subject: [PATCH 01/28] Add option to have different spacing between modules --- vdsgen/vdsgen.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/vdsgen/vdsgen.py b/vdsgen/vdsgen.py index af86758..ad409b2 100644 --- a/vdsgen/vdsgen.py +++ b/vdsgen/vdsgen.py @@ -17,7 +17,8 @@ ["datasets", "frames", "height", "width", "dtype"]) VDS = namedtuple("VDS", ["shape", "spacing", "path"]) -DATASET_SPACING = 10 # Pixel spacing between each dataset in VDS +INTER_MODULE_SPACING = 10 # Pixel spacing between stripes in a module +MODULE_SPACING = 10 # Pixel spacing between modules def parse_args(): @@ -126,10 +127,17 @@ def construct_vds_metadata(source, output_file): VDS: Shape, dataset spacing and output path of virtual data set """ - datasets = len(source.datasets) - height = (source.height * datasets) + (DATASET_SPACING * (datasets - 1)) + stripes = len(source.datasets) + + spacing = [0] * stripes + for idx in range(0, stripes - 1, 2): + spacing[idx] = INTER_MODULE_SPACING + for idx in range(1, stripes, 2): + spacing[idx] = MODULE_SPACING + spacing[-1] = 0 # We don't want the final stripe to have a gap afterwards + + height = (source.height * stripes) + sum(spacing) shape = (source.frames, height, source.width) - spacing = source.height + DATASET_SPACING return VDS(shape=shape, spacing=spacing, path=output_file) @@ -149,15 +157,17 @@ def create_vds_maps(source, vds_data): vds = h5.VirtualTarget(vds_data.path, "full_frame", shape=vds_data.shape) map_list = [] + current_position = 0 for idx, dataset in enumerate(source.datasets): logging.info("Processing dataset %s", idx + 1) v_source = h5.VirtualSource(dataset, "data", shape=source_shape) - start = idx * vds_data.spacing - stop = start + source.height - v_target = vds[:, start:stop, :] + start = current_position + stop = start + source.height + vds_data.spacing[idx] + current_position = stop + v_target = vds[:, start:stop, :] v_map = h5.VirtualMap(v_source, v_target, dtype=source.dtype) map_list.append(v_map) From 49dc45ef97071a999de58e396d2a932f62e7afb9 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Wed, 1 Mar 2017 15:22:48 +0000 Subject: [PATCH 02/28] Add global variable for the location of raw data within the hdf5 files --- vdsgen/vdsgen.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/vdsgen/vdsgen.py b/vdsgen/vdsgen.py index ad409b2..59f1119 100644 --- a/vdsgen/vdsgen.py +++ b/vdsgen/vdsgen.py @@ -18,7 +18,8 @@ VDS = namedtuple("VDS", ["shape", "spacing", "path"]) INTER_MODULE_SPACING = 10 # Pixel spacing between stripes in a module -MODULE_SPACING = 10 # Pixel spacing between modules +MODULE_SPACING = 10 # Pixel spacing between modules +DATA_PATH = "data" # Location of data in HDF5 file tree def parse_args(): @@ -87,7 +88,7 @@ def grab_metadata(file_path): dict: Number of frames, height, width and data type of datasets """ - h5_data = h5.File(file_path, 'r')["data"] + h5_data = h5.File(file_path, 'r')[DATA_PATH] frames, height, width = h5_data.shape data_type = h5_data.dtype @@ -161,7 +162,7 @@ def create_vds_maps(source, vds_data): for idx, dataset in enumerate(source.datasets): logging.info("Processing dataset %s", idx + 1) - v_source = h5.VirtualSource(dataset, "data", shape=source_shape) + v_source = h5.VirtualSource(dataset, DATA_PATH, shape=source_shape) start = current_position stop = start + source.height + vds_data.spacing[idx] From 85671eba92f9ea804d94350746c3a3680c129e31 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Wed, 1 Mar 2017 15:24:23 +0000 Subject: [PATCH 03/28] Update to allow relative paths to be passed on the command line --- vdsgen/vdsgen.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/vdsgen/vdsgen.py b/vdsgen/vdsgen.py index 59f1119..31f4fd5 100644 --- a/vdsgen/vdsgen.py +++ b/vdsgen/vdsgen.py @@ -51,7 +51,7 @@ def find_files(path, prefix): files = [] for file_ in sorted(os.listdir(path)): if re.match(regex, file_): - files.append(os.path.join(path, file_)) + files.append(os.path.abspath(os.path.join(path, file_))) if len(files) == 0: raise IOError("No files matching pattern found.") @@ -186,7 +186,7 @@ def generate_vds(path, prefix): """ file_paths = find_files(path, prefix) vds_name = construct_vds_name(prefix, file_paths) - output_file = os.path.join(path, vds_name) + output_file = os.path.abspath(os.path.join(path, vds_name)) file_names = [file_.split('/')[-1] for file_ in file_paths] logging.info("Combining datasets %s into %s", From 9ad90b4ba24eca986acade623b71e25fff142180 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Wed, 1 Mar 2017 15:24:57 +0000 Subject: [PATCH 04/28] Allow files with prefix .hdf to be processed --- vdsgen/vdsgen.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vdsgen/vdsgen.py b/vdsgen/vdsgen.py index 31f4fd5..988650f 100644 --- a/vdsgen/vdsgen.py +++ b/vdsgen/vdsgen.py @@ -46,7 +46,7 @@ def find_files(path, prefix): list: HDF5 files in folder that have the given prefix """ - regex = re.compile(prefix + r"\d\.(hdf5|h5)") + regex = re.compile(prefix + r"\d+\.(hdf5|hdf|h5)") files = [] for file_ in sorted(os.listdir(path)): From 1ad0257460f3928e27c76fd9f00bbf8fdbc1e071 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Wed, 1 Mar 2017 15:26:16 +0000 Subject: [PATCH 05/28] Print given path and prefix when no files are found --- vdsgen/vdsgen.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/vdsgen/vdsgen.py b/vdsgen/vdsgen.py index 988650f..8eabfff 100644 --- a/vdsgen/vdsgen.py +++ b/vdsgen/vdsgen.py @@ -54,7 +54,8 @@ def find_files(path, prefix): files.append(os.path.abspath(os.path.join(path, file_))) if len(files) == 0: - raise IOError("No files matching pattern found.") + raise IOError("No files matching pattern found. Got path: {path}, " + "prefix: {prefix}".format(path=path, prefix=prefix)) elif len(files) < 2: raise IOError("Folder must contain more than one matching HDF5 file.") else: From b5a674ac4fd1e14f0448aecc19cef5da3cc8958f Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Wed, 1 Mar 2017 16:11:59 +0000 Subject: [PATCH 06/28] Update tests --- tests/vdsgen_test.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/tests/vdsgen_test.py b/tests/vdsgen_test.py index fbdd497..9f7f551 100644 --- a/tests/vdsgen_test.py +++ b/tests/vdsgen_test.py @@ -120,7 +120,8 @@ def test_process_source_datasets_given_mismatched_data(self, grab_mock): def test_construct_vds_metadata(self): source = vdsgen.Source(frames=3, height=256, width=2048, dtype="uint16", datasets=[""]*6) - expected_vds = vdsgen.VDS(shape=(3, 1586, 2048), spacing=266, + expected_vds = vdsgen.VDS(shape=(3, 1586, 2048), + spacing=[10] * 5 + [0], path="/test/path") vds = vdsgen.construct_vds_metadata(source, "/test/path") @@ -133,7 +134,8 @@ def test_construct_vds_metadata(self): def test_create_vds_maps(self, target_mock, source_mock, map_mock): source = vdsgen.Source(frames=3, height=256, width=2048, dtype="uint16", datasets=["source"]*6) - vds = vdsgen.VDS(shape=(3, 1586, 2048), spacing=266, path="/test/path") + vds = vdsgen.VDS(shape=(3, 1586, 2048), spacing=[10] * 5 + [0], + path="/test/path") map_list = vdsgen.create_vds_maps(source, vds) @@ -188,3 +190,5 @@ def test_main(self, parse_mock, generate_mock): parse_mock.assert_called_once_with() generate_mock.assert_called_once_with(args_mock.path, args_mock.prefix) + source = vdsgen.Source(frames=3, height=256, width=2048, + dtype="uint16", datasets=[""]*6) From 088099a87453dfafaa1e706f87073b234bde7f1c Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Mon, 6 Mar 2017 11:27:13 +0000 Subject: [PATCH 07/28] Update so data_path, stripe_spacing and module_spacing can be passed as arguments --- vdsgen/vdsgen.py | 54 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/vdsgen/vdsgen.py b/vdsgen/vdsgen.py index 8eabfff..e72f60f 100644 --- a/vdsgen/vdsgen.py +++ b/vdsgen/vdsgen.py @@ -17,9 +17,9 @@ ["datasets", "frames", "height", "width", "dtype"]) VDS = namedtuple("VDS", ["shape", "spacing", "path"]) -INTER_MODULE_SPACING = 10 # Pixel spacing between stripes in a module -MODULE_SPACING = 10 # Pixel spacing between modules -DATA_PATH = "data" # Location of data in HDF5 file tree +STRIPE_SPACING = 10 # Pixel spacing between stripes in a module +MODULE_SPACING = 10 # Pixel spacing between modules +DATA_PATH = "data" # Location of data in HDF5 file tree def parse_args(): @@ -31,6 +31,15 @@ def parse_args(): help="Root name of images - e.g 'stripe_' to combine " "the images 'stripe_1.hdf5', 'stripe_2.hdf5' " "and 'stripe_3.hdf5' located at .") + parser.add_argument("-s", "--stripe_spacing", nargs="?", type=int, + default=None, dest="stripe_spacing", + help="Spacing between two stripes in a module.") + parser.add_argument("-m", "--module_spacing", nargs="?", type=int, + default=None, dest="module_spacing", + help="Spacing between two modules.") + parser.add_argument("-d", "--data_path", nargs="?", type=str, default=None, + dest="data_path", + help="Data location in HDF5 files.") return parser.parse_args() @@ -79,7 +88,7 @@ def construct_vds_name(prefix, files): return vds_name -def grab_metadata(file_path): +def grab_metadata(file_path, data_path=None): """Grab data from given HDF5 file. Args: @@ -118,24 +127,31 @@ def process_source_datasets(datasets): width=data['width'], dtype=data['dtype'], datasets=datasets) -def construct_vds_metadata(source, output_file): +def construct_vds_metadata(source, output_file, + stripe_spacing=None, module_spacing=None): """Construct VDS data attributes from source attributes. Args: source(Source): Attributes of data sets output_file(str): File path of new VDS + stripe_spacing(int): Spacing between stripes in module + module_spacing(int): Spacing between modules Returns: VDS: Shape, dataset spacing and output path of virtual data set """ - stripes = len(source.datasets) + if stripe_spacing is None: + stripe_spacing = STRIPE_SPACING + if module_spacing is None: + module_spacing = MODULE_SPACING + stripes = len(source.datasets) spacing = [0] * stripes for idx in range(0, stripes - 1, 2): - spacing[idx] = INTER_MODULE_SPACING + spacing[idx] = stripe_spacing for idx in range(1, stripes, 2): - spacing[idx] = MODULE_SPACING + spacing[idx] = module_spacing spacing[-1] = 0 # We don't want the final stripe to have a gap afterwards height = (source.height * stripes) + sum(spacing) @@ -144,12 +160,13 @@ def construct_vds_metadata(source, output_file): return VDS(shape=shape, spacing=spacing, path=output_file) -def create_vds_maps(source, vds_data): +def create_vds_maps(source, vds_data, data_path): """Create a list of VirtualMaps of raw data to the VDS. Args: source(Source): Source attributes vds_data(VDS): VDS attributes + data_path(str): Path to raw data in HDF5 file Returns: list(VirtualMap): Maps describing links between raw data and VDS @@ -163,7 +180,7 @@ def create_vds_maps(source, vds_data): for idx, dataset in enumerate(source.datasets): logging.info("Processing dataset %s", idx + 1) - v_source = h5.VirtualSource(dataset, DATA_PATH, shape=source_shape) + v_source = h5.VirtualSource(dataset, data_path, shape=source_shape) start = current_position stop = start + source.height + vds_data.spacing[idx] @@ -176,15 +193,21 @@ def create_vds_maps(source, vds_data): return map_list -def generate_vds(path, prefix): +def generate_vds(path, prefix, data_path, stripe_spacing, module_spacing): """Generate a virtual dataset. Args: path(str): Path to folder containing HDF5 files prefix(str): Prefix of HDF5 files to generate from (in folder) e.g. image_ for image_1.hdf5, image_2.hdf5, image_3.hdf5 + data_path(str): Path to raw data in HDF5 file + stripe_spacing(int): Spacing between stripes in module + module_spacing(int): Spacing between modules """ + if data_path is None: + data_path = DATA_PATH + file_paths = find_files(path, prefix) vds_name = construct_vds_name(prefix, file_paths) output_file = os.path.abspath(os.path.join(path, vds_name)) @@ -194,8 +217,9 @@ def generate_vds(path, prefix): ", ".join(file_names), vds_name) source = process_source_datasets(file_paths) - vds_data = construct_vds_metadata(source, output_file) - map_list = create_vds_maps(source, vds_data) + vds_data = construct_vds_metadata(source, output_file, + stripe_spacing, module_spacing) + map_list = create_vds_maps(source, vds_data, data_path) logging.info("Creating VDS at %s", output_file) with h5.File(output_file, "w", libver="latest") as vds_file: @@ -207,7 +231,9 @@ def generate_vds(path, prefix): def main(): """Run program.""" args = parse_args() - generate_vds(args.path, args.prefix) + + generate_vds(args.path, args.prefix, args.data_path, + args.stripe_spacing, args.module_spacing) if __name__ == "__main__": sys.exit(main()) From 86b211fff5dd5623684050a9e5b49e30de1332df Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Mon, 6 Mar 2017 16:25:45 +0000 Subject: [PATCH 08/28] Update to allow an empty VDS to be generated, before the raw files exist --- vdsgen/vdsgen.py | 99 +++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 81 insertions(+), 18 deletions(-) diff --git a/vdsgen/vdsgen.py b/vdsgen/vdsgen.py index e72f60f..c218d47 100644 --- a/vdsgen/vdsgen.py +++ b/vdsgen/vdsgen.py @@ -27,10 +27,37 @@ def parse_args(): parser = ArgumentParser() parser.add_argument("path", type=str, help="Path to folder containing HDF5 files.") - parser.add_argument("prefix", type=str, - help="Root name of images - e.g 'stripe_' to combine " - "the images 'stripe_1.hdf5', 'stripe_2.hdf5' " - "and 'stripe_3.hdf5' located at .") + + # Definition of file names in - Common prefix or explicit list + file_definition = parser.add_mutually_exclusive_group(required=True) + file_definition.add_argument( + "-p", "--prefix", type=str, default=None, dest="prefix", + help="Prefix of files - e.g 'stripe_' to combine the images " + "'stripe_1.hdf5', 'stripe_2.hdf5' and 'stripe_3.hdf5' located " + "at .") + file_definition.add_argument( + "-f", "--files", nargs="*", type=str, default=None, dest="files", + help="Manually define files to combine.") + + # Arguments required to allow VDS to be created before raw files exist + parser.add_argument( + "-e", "--empty", action="store_true", dest="empty", + help="Make empty VDS pointing to datasets that don't exist, yet.") + source_metadata = parser.add_argument_group() + source_metadata.add_argument( + "--frames", type=int, default=1, dest="frames", + help="Number of frames to combine into VDS.") + source_metadata.add_argument( + "--height", type=int, default=256, dest="height", + help="Height of raw datasets.") + source_metadata.add_argument( + "--width", type=int, default=1024, dest="width", + help="Width of raw datasets.") + source_metadata.add_argument( + "--data_type", type=str, default="uint16", dest="data_type", + help="Data type of raw datasets.") + + # Arguments to override defaults parser.add_argument("-s", "--stripe_spacing", nargs="?", type=int, default=None, dest="stripe_spacing", help="Spacing between two stripes in a module.") @@ -41,7 +68,16 @@ def parse_args(): dest="data_path", help="Data location in HDF5 files.") - return parser.parse_args() + args = parser.parse_args() + + if args.empty and args.files is None: + parser.error( + "To make an empty VDS you must explicitly define --files for the " + "eventual raw datasets.") + if args.files is not None and len(args.files) < 2: + parser.error("Must define at least two files to combine.") + + return args def find_files(path, prefix): @@ -88,37 +124,39 @@ def construct_vds_name(prefix, files): return vds_name -def grab_metadata(file_path, data_path=None): +def grab_metadata(file_path, data_path): """Grab data from given HDF5 file. Args: file_path(str): Path to HDF5 file + data_path(str): Location of raw data in HDF5 file Returns: dict: Number of frames, height, width and data type of datasets """ - h5_data = h5.File(file_path, 'r')[DATA_PATH] + h5_data = h5.File(file_path, 'r')[data_path] frames, height, width = h5_data.shape data_type = h5_data.dtype return dict(frames=frames, height=height, width=width, dtype=data_type) -def process_source_datasets(datasets): +def process_source_datasets(datasets, data_path): """Grab data from the given HDF5 files and check for consistency. Args: datasets(list(str)): Datasets to grab data from + data_path(str): Location of raw data in HDF5 file Returns: Source: Number of datasets and the attributes of them (frames, height width and data type) """ - data = grab_metadata(datasets[0]) + data = grab_metadata(datasets[0], data_path) for path in datasets[1:]: - temp_data = grab_metadata(path) + temp_data = grab_metadata(path, data_path) for attribute, value in data.items(): if temp_data[attribute] != value: raise ValueError("Files have mismatched {}".format(attribute)) @@ -193,33 +231,52 @@ def create_vds_maps(source, vds_data, data_path): return map_list -def generate_vds(path, prefix, data_path, stripe_spacing, module_spacing): +def generate_vds(path, prefix=None, files=None, source=None, data_path=None, + stripe_spacing=None, module_spacing=None): """Generate a virtual dataset. Args: path(str): Path to folder containing HDF5 files prefix(str): Prefix of HDF5 files to generate from (in folder) e.g. image_ for image_1.hdf5, image_2.hdf5, image_3.hdf5 + files(list(str)): List of files to combine. + source(dict): Height, width, data_type and frames for source data data_path(str): Path to raw data in HDF5 file stripe_spacing(int): Spacing between stripes in module module_spacing(int): Spacing between modules """ + if (prefix is None and files is None) or \ + (prefix is not None and files is not None): + raise ValueError("One, and only one, of prefix or files required.") + if data_path is None: data_path = DATA_PATH - file_paths = find_files(path, prefix) - vds_name = construct_vds_name(prefix, file_paths) + if files is None: + file_paths = find_files(path, prefix) + files = [path_.split("/")[-1] for path_ in file_paths] + else: + file_paths = [os.path.join(path, file_) for file_ in files] + prefix = os.path.commonprefix(files) + + vds_name = construct_vds_name(prefix, files) output_file = os.path.abspath(os.path.join(path, vds_name)) file_names = [file_.split('/')[-1] for file_ in file_paths] logging.info("Combining datasets %s into %s", ", ".join(file_names), vds_name) - source = process_source_datasets(file_paths) - vds_data = construct_vds_metadata(source, output_file, + if source is None: + source_metadata = process_source_datasets(file_paths, data_path) + else: + source_metadata = Source( + frames=source['frames'], height=source['height'], + width=source['width'], dtype=source['dtype'], datasets=file_paths) + + vds_data = construct_vds_metadata(source_metadata, output_file, stripe_spacing, module_spacing) - map_list = create_vds_maps(source, vds_data, data_path) + map_list = create_vds_maps(source_metadata, vds_data, data_path) logging.info("Creating VDS at %s", output_file) with h5.File(output_file, "w", libver="latest") as vds_file: @@ -232,8 +289,14 @@ def main(): """Run program.""" args = parse_args() - generate_vds(args.path, args.prefix, args.data_path, - args.stripe_spacing, args.module_spacing) + if args.empty: + source_metadata = dict(frames=args.frames, height=args.height, + width=args.width, dtype=args.data_type) + else: + source_metadata = None + + generate_vds(args.path, args.prefix, args.files, source_metadata, + args.data_path, args.stripe_spacing, args.module_spacing) if __name__ == "__main__": sys.exit(main()) From b4566f8a0f2ac0858fe5f0edba018cfe011a6070 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Tue, 7 Mar 2017 13:15:52 +0000 Subject: [PATCH 09/28] Add check for source if given files don't exist yet --- vdsgen/vdsgen.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/vdsgen/vdsgen.py b/vdsgen/vdsgen.py index c218d47..15a392c 100644 --- a/vdsgen/vdsgen.py +++ b/vdsgen/vdsgen.py @@ -268,6 +268,10 @@ def generate_vds(path, prefix=None, files=None, source=None, data_path=None, ", ".join(file_names), vds_name) if source is None: + for file_ in file_paths: + if not os.path.isfile(file_): + raise IOError("To create VDS from raw files that haven't been " + "created yet, source must be provided.") source_metadata = process_source_datasets(file_paths, data_path) else: source_metadata = Source( From 116b26b2a1c162d99e48e0a5d07c7d8295c0f351 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Tue, 7 Mar 2017 14:44:42 +0000 Subject: [PATCH 10/28] Update tests --- tests/vdsgen_test.py | 191 +++++++++++++++++++++++++++++++++++++------ 1 file changed, 165 insertions(+), 26 deletions(-) diff --git a/tests/vdsgen_test.py b/tests/vdsgen_test.py index 9f7f551..98ba151 100644 --- a/tests/vdsgen_test.py +++ b/tests/vdsgen_test.py @@ -16,21 +16,80 @@ class ParseArgsTest(unittest.TestCase): + @patch(parser_patch_path + '.add_mutually_exclusive_group') + @patch(parser_patch_path + '.add_argument_group') @patch(parser_patch_path + '.add_argument') - @patch(parser_patch_path + '.parse_args') - def test_no_args_given(self, parse_mock, add_mock): + @patch(parser_patch_path + '.parse_args', + return_value=MagicMock(empty=False, files=None)) + def test_parser(self, parse_mock, add_mock, add_group_mock, + add_exclusive_group_mock): + group_mock = add_group_mock.return_value + exclusive_group_mock = add_exclusive_group_mock.return_value + args = vdsgen.parse_args() - add_mock.has_calls(call("path", type=str, - help="Path to folder containing HDF5 files."), - call("prefix", type=str, - help="Root name of images - e.g 'stripe_' to " - "combine the images 'stripe_1.hdf5', " - "'stripe_2.hdf5' and 'stripe_3.hdf5' " - "located at .")) + add_mock.assert_has_calls( + [call("path", type=str, + help="Path to folder containing HDF5 files."), + call("-e", "--empty", action="store_true", dest="empty", + help="Make empty VDS pointing to datasets " + "that don't exist, yet."), + call("-s", "--stripe_spacing", nargs="?", type=int, default=None, + dest="stripe_spacing", + help="Spacing between two stripes in a module."), + call("-m", "--module_spacing", nargs="?", type=int, default=None, + dest="module_spacing", + help="Spacing between two modules."), + call("-d", "--data_path", nargs="?", type=str, default=None, + dest="data_path", + help="Data location in HDF5 files.")]) + + add_group_mock.assert_called_with() + group_mock.add_argument.assert_has_calls( + [call("--frames", type=int, default=1, dest="frames", + help="Number of frames to combine into VDS."), + call("--height", type=int, default=256, dest="height", + help="Height of raw datasets."), + call("--width", type=int, default=1024, dest="width", + help="Width of raw datasets."), + call("--data_type", type=str, default="uint16", dest="data_type", + help="Data type of raw datasets.")] + ) + + add_exclusive_group_mock.assert_called_with(required=True) + exclusive_group_mock.add_argument.assert_has_calls( + [call("-p", "--prefix", type=str, default=None, dest="prefix", + help="Prefix of files - e.g 'stripe_' to combine the images " + "'stripe_1.hdf5', 'stripe_2.hdf5' and 'stripe_3.hdf5' " + "located at ."), + call("-f", "--files", nargs="*", type=str, default=None, + dest="files", + help="Manually define files to combine.")] + ) + parse_mock.assert_called_once_with() self.assertEqual(parse_mock.return_value, args) + @patch(parser_patch_path + '.error') + @patch(parser_patch_path + '.parse_args', + return_value=MagicMock(empty=True, files=None)) + def test_empty_and_not_files_then_error(self, parse_mock, error_mock): + + vdsgen.parse_args() + + error_mock.assert_called_once_with( + "To make an empty VDS you must explicitly define --files for the " + "eventual raw datasets.") + + @patch(parser_patch_path + '.error') + @patch(parser_patch_path + '.parse_args', + return_value=MagicMock(empty=True, files=["file"])) + def test_only_one_file_then_error(self, parse_mock, error_mock): + vdsgen.parse_args() + + error_mock.assert_called_once_with( + "Must define at least two files to combine.") + class FindFilesTest(unittest.TestCase): @@ -88,7 +147,7 @@ def test_generate_vds_name(self): def test_grab_metadata(self, h5file_mock): expected_data = dict(frames=3, height=256, width=2048, dtype="uint16") - meta_data = vdsgen.grab_metadata("/test/path") + meta_data = vdsgen.grab_metadata("/test/path", "data") h5file_mock.assert_called_once_with("/test/path", "r") self.assertEqual(expected_data, meta_data) @@ -100,9 +159,10 @@ def test_process_source_datasets_given_valid_data(self, grab_mock): expected_source = vdsgen.Source(frames=3, height=256, width=2048, dtype="uint16", datasets=files) - source = vdsgen.process_source_datasets(files) + source = vdsgen.process_source_datasets(files, "data") - grab_mock.assert_has_calls([call("stripe_1.h5"), call("stripe_2.h5")]) + grab_mock.assert_has_calls([call("stripe_1.h5", "data"), + call("stripe_2.h5", "data")]) self.assertEqual(expected_source, source) @patch(vdsgen_patch_path + '.grab_metadata', @@ -113,9 +173,10 @@ def test_process_source_datasets_given_mismatched_data(self, grab_mock): files = ["stripe_1.h5", "stripe_2.h5"] with self.assertRaises(ValueError): - vdsgen.process_source_datasets(files) + vdsgen.process_source_datasets(files, "data") - grab_mock.assert_has_calls([call("stripe_1.h5"), call("stripe_2.h5")]) + grab_mock.assert_has_calls([call("stripe_1.h5", "data"), + call("stripe_2.h5", "data")]) def test_construct_vds_metadata(self): source = vdsgen.Source(frames=3, height=256, width=2048, @@ -137,7 +198,7 @@ def test_create_vds_maps(self, target_mock, source_mock, map_mock): vds = vdsgen.VDS(shape=(3, 1586, 2048), spacing=[10] * 5 + [0], path="/test/path") - map_list = vdsgen.create_vds_maps(source, vds) + map_list = vdsgen.create_vds_maps(source, vds, "data") target_mock.assert_called_once_with("/test/path", "full_frame", shape=(3, 1586, 2048)) @@ -154,6 +215,7 @@ class MainTest(unittest.TestCase): file_mock = MagicMock() + @patch('os.path.isfile', return_value=True) @patch(h5py_patch_path + '.File', return_value=file_mock) @patch(vdsgen_patch_path + '.create_vds_maps') @patch(vdsgen_patch_path + '.construct_vds_metadata') @@ -162,33 +224,110 @@ class MainTest(unittest.TestCase): return_value="stripe_vds.h5") @patch(vdsgen_patch_path + '.find_files', return_value=["stripe_1.hdf5", "stripe_2.hdf5", "stripe_3.hdf5"]) - def test_generate_vds(self, find_mock, gen_mock, process_mock, - construct_mock, create_mock, h5file_mock): + def test_generate_vds_defaults(self, find_mock, gen_mock, process_mock, + construct_mock, create_mock, h5file_mock, + isfile_mock): + self.file_mock.reset_mock() vds_file_mock = self.file_mock.__enter__.return_value - vdsgen.generate_vds("/test/path", "stripe_") + vdsgen.generate_vds("/test/path", prefix="stripe_") find_mock.assert_called_once_with("/test/path", "stripe_") gen_mock.assert_called_once_with("stripe_", find_mock.return_value) - process_mock.assert_called_once_with(find_mock.return_value) + process_mock.assert_called_once_with(find_mock.return_value, "data") construct_mock.assert_called_once_with(process_mock.return_value, - "/test/path/stripe_vds.h5") + "/test/path/stripe_vds.h5", + None, None) create_mock.assert_called_once_with(process_mock.return_value, - construct_mock.return_value) + construct_mock.return_value, + "data") + h5file_mock.assert_called_once_with("/test/path/stripe_vds.h5", "w", + libver="latest") + vds_file_mock.create_virtual_dataset.assert_called_once_with( + VMlist=create_mock.return_value, fill_value=0x1) + + @patch('os.path.isfile', return_value=True) + @patch(h5py_patch_path + '.File', return_value=file_mock) + @patch(vdsgen_patch_path + '.create_vds_maps') + @patch(vdsgen_patch_path + '.construct_vds_metadata') + @patch(vdsgen_patch_path + '.construct_vds_name', + return_value="stripe_vds.h5") + def test_generate_vds_given_args(self, gen_mock, construct_mock, + create_mock, h5file_mock, isfile_mock): + self.file_mock.reset_mock() + vds_file_mock = self.file_mock.__enter__.return_value + files = ["stripe_1.h5", "stripe_2.h5"] + file_paths = ["/test/path/" + file_ for file_ in files] + source_dict = dict(frames=3, height=256, width=1024, dtype="int16") + source = vdsgen.Source(frames=3, height=256, width=1024, dtype="int16", + datasets=file_paths) + + vdsgen.generate_vds("/test/path", files=files, source=source_dict, + data_path="data", + stripe_spacing=3, module_spacing=127) + + gen_mock.assert_called_once_with("stripe_", + ["stripe_1.h5", "stripe_2.h5"]) + construct_mock.assert_called_once_with(source, + "/test/path/stripe_vds.h5", + 3, 127) + create_mock.assert_called_once_with(source, + construct_mock.return_value, + "data") h5file_mock.assert_called_once_with("/test/path/stripe_vds.h5", "w", libver="latest") vds_file_mock.create_virtual_dataset.assert_called_once_with( VMlist=create_mock.return_value, fill_value=0x1) + def test_generate_vds_prefix_and_files_then_error(self): + + with self.assertRaises(ValueError): + vdsgen.generate_vds("/test/path", "stripe_", ["file1", "file2"]) + + @patch('os.path.isfile', return_value=False) + @patch(vdsgen_patch_path + '.construct_vds_name', + return_value="stripe_vds.h5") + def test_generate_vds_no_source_or_files_then_error(self, construct_mock, + isfile_mock): + + with self.assertRaises(IOError): + vdsgen.generate_vds("/test/path", files=["file1", "file2"]) + @patch(vdsgen_patch_path + '.generate_vds') @patch(vdsgen_patch_path + '.parse_args', - return_value=MagicMock(path="/test/path", prefix="stripe_")) - def test_main(self, parse_mock, generate_mock): + return_value=MagicMock( + path="/test/path", prefix="stripe_", empty=True, + files=["file1.hdf5", "file2.hdf5"], + frames=3, height=256, width=2048, data_type="int16", + data_path="data", stripe_spacing=3, module_spacing=127)) + def test_main_empty(self, parse_mock, generate_mock): args_mock = parse_mock.return_value vdsgen.main() parse_mock.assert_called_once_with() - generate_mock.assert_called_once_with(args_mock.path, args_mock.prefix) - source = vdsgen.Source(frames=3, height=256, width=2048, - dtype="uint16", datasets=[""]*6) + generate_mock.assert_called_once_with( + args_mock.path, args_mock.prefix, args_mock.files, + dict(frames=args_mock.frames, height=args_mock.height, + width=args_mock.width, dtype=args_mock.data_type), + args_mock.data_path, + args_mock.stripe_spacing, args_mock.module_spacing) + + @patch(vdsgen_patch_path + '.generate_vds') + @patch(vdsgen_patch_path + '.parse_args', + return_value=MagicMock( + path="/test/path", prefix="stripe_", empty=False, + files=["file1.hdf5", "file2.hdf5"], + frames=3, height=256, width=2048, data_type="int16", + data_path="data", stripe_spacing=3, module_spacing=127)) + def test_main_not_empty(self, parse_mock, generate_mock): + args_mock = parse_mock.return_value + + vdsgen.main() + + parse_mock.assert_called_once_with() + generate_mock.assert_called_once_with( + args_mock.path, args_mock.prefix, args_mock.files, + None, + args_mock.data_path, + args_mock.stripe_spacing, args_mock.module_spacing) From 5a62c905ab3c1519f8a546ba266009477e2f6d1a Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Fri, 10 Mar 2017 11:14:04 +0000 Subject: [PATCH 11/28] Update to accept output file name as argument --- tests/vdsgen_test.py | 51 ++++++++++++++++++++++++-------------------- vdsgen/vdsgen.py | 25 ++++++++++++++++------ 2 files changed, 47 insertions(+), 29 deletions(-) diff --git a/tests/vdsgen_test.py b/tests/vdsgen_test.py index 98ba151..2962cb7 100644 --- a/tests/vdsgen_test.py +++ b/tests/vdsgen_test.py @@ -30,7 +30,11 @@ def test_parser(self, parse_mock, add_mock, add_group_mock, add_mock.assert_has_calls( [call("path", type=str, - help="Path to folder containing HDF5 files."), + help="Root folder to create VDS in. Also where source " + "files are searched for if --prefix given."), + call("-o", "--output", type=str, default=None, dest="output", + help="Output file name. Default is input file prefix with " + "vds suffix."), call("-e", "--empty", action="store_true", dest="empty", help="Make empty VDS pointing to datasets " "that don't exist, yet."), @@ -250,9 +254,7 @@ def test_generate_vds_defaults(self, find_mock, gen_mock, process_mock, @patch(h5py_patch_path + '.File', return_value=file_mock) @patch(vdsgen_patch_path + '.create_vds_maps') @patch(vdsgen_patch_path + '.construct_vds_metadata') - @patch(vdsgen_patch_path + '.construct_vds_name', - return_value="stripe_vds.h5") - def test_generate_vds_given_args(self, gen_mock, construct_mock, + def test_generate_vds_given_args(self, metadata_mock, create_mock, h5file_mock, isfile_mock): self.file_mock.reset_mock() vds_file_mock = self.file_mock.__enter__.return_value @@ -262,19 +264,18 @@ def test_generate_vds_given_args(self, gen_mock, construct_mock, source = vdsgen.Source(frames=3, height=256, width=1024, dtype="int16", datasets=file_paths) - vdsgen.generate_vds("/test/path", files=files, source=source_dict, + vdsgen.generate_vds("/test/path", files=files, output="vds.h5", + source=source_dict, data_path="data", stripe_spacing=3, module_spacing=127) - gen_mock.assert_called_once_with("stripe_", - ["stripe_1.h5", "stripe_2.h5"]) - construct_mock.assert_called_once_with(source, - "/test/path/stripe_vds.h5", - 3, 127) + metadata_mock.assert_called_once_with(source, + "/test/path/vds.h5", + 3, 127) create_mock.assert_called_once_with(source, - construct_mock.return_value, + metadata_mock.return_value, "data") - h5file_mock.assert_called_once_with("/test/path/stripe_vds.h5", "w", + h5file_mock.assert_called_once_with("/test/path/vds.h5", "w", libver="latest") vds_file_mock.create_virtual_dataset.assert_called_once_with( VMlist=create_mock.return_value, fill_value=0x1) @@ -297,7 +298,7 @@ def test_generate_vds_no_source_or_files_then_error(self, construct_mock, @patch(vdsgen_patch_path + '.parse_args', return_value=MagicMock( path="/test/path", prefix="stripe_", empty=True, - files=["file1.hdf5", "file2.hdf5"], + files=["file1.hdf5", "file2.hdf5"], output="vds", frames=3, height=256, width=2048, data_type="int16", data_path="data", stripe_spacing=3, module_spacing=127)) def test_main_empty(self, parse_mock, generate_mock): @@ -307,17 +308,19 @@ def test_main_empty(self, parse_mock, generate_mock): parse_mock.assert_called_once_with() generate_mock.assert_called_once_with( - args_mock.path, args_mock.prefix, args_mock.files, - dict(frames=args_mock.frames, height=args_mock.height, - width=args_mock.width, dtype=args_mock.data_type), - args_mock.data_path, - args_mock.stripe_spacing, args_mock.module_spacing) + args_mock.path, + prefix=args_mock.prefix, output="vds", files=args_mock.files, + source=dict(frames=args_mock.frames, height=args_mock.height, + width=args_mock.width, dtype=args_mock.data_type), + data_path=args_mock.data_path, + stripe_spacing=args_mock.stripe_spacing, + module_spacing=args_mock.module_spacing) @patch(vdsgen_patch_path + '.generate_vds') @patch(vdsgen_patch_path + '.parse_args', return_value=MagicMock( path="/test/path", prefix="stripe_", empty=False, - files=["file1.hdf5", "file2.hdf5"], + files=["file1.hdf5", "file2.hdf5"], output="vds", frames=3, height=256, width=2048, data_type="int16", data_path="data", stripe_spacing=3, module_spacing=127)) def test_main_not_empty(self, parse_mock, generate_mock): @@ -327,7 +330,9 @@ def test_main_not_empty(self, parse_mock, generate_mock): parse_mock.assert_called_once_with() generate_mock.assert_called_once_with( - args_mock.path, args_mock.prefix, args_mock.files, - None, - args_mock.data_path, - args_mock.stripe_spacing, args_mock.module_spacing) + args_mock.path, + prefix=args_mock.prefix, output="vds", files=args_mock.files, + source=None, + data_path=args_mock.data_path, + stripe_spacing=args_mock.stripe_spacing, + module_spacing=args_mock.module_spacing) diff --git a/vdsgen/vdsgen.py b/vdsgen/vdsgen.py index 15a392c..fbc9cb5 100644 --- a/vdsgen/vdsgen.py +++ b/vdsgen/vdsgen.py @@ -26,7 +26,8 @@ def parse_args(): """Parse command line arguments.""" parser = ArgumentParser() parser.add_argument("path", type=str, - help="Path to folder containing HDF5 files.") + help="Root folder to create VDS in. Also where source " + "files are searched for if --prefix given.") # Definition of file names in - Common prefix or explicit list file_definition = parser.add_mutually_exclusive_group(required=True) @@ -38,6 +39,9 @@ def parse_args(): file_definition.add_argument( "-f", "--files", nargs="*", type=str, default=None, dest="files", help="Manually define files to combine.") + parser.add_argument( + "-o", "--output", type=str, default=None, dest="output", + help="Output file name. Default is input file prefix with vds suffix.") # Arguments required to allow VDS to be created before raw files exist parser.add_argument( @@ -231,8 +235,8 @@ def create_vds_maps(source, vds_data, data_path): return map_list -def generate_vds(path, prefix=None, files=None, source=None, data_path=None, - stripe_spacing=None, module_spacing=None): +def generate_vds(path, prefix=None, files=None, output=None, source=None, + data_path=None, stripe_spacing=None, module_spacing=None): """Generate a virtual dataset. Args: @@ -240,6 +244,7 @@ def generate_vds(path, prefix=None, files=None, source=None, data_path=None, prefix(str): Prefix of HDF5 files to generate from (in folder) e.g. image_ for image_1.hdf5, image_2.hdf5, image_3.hdf5 files(list(str)): List of files to combine. + output(str): Name of VDS file. source(dict): Height, width, data_type and frames for source data data_path(str): Path to raw data in HDF5 file stripe_spacing(int): Spacing between stripes in module @@ -260,7 +265,11 @@ def generate_vds(path, prefix=None, files=None, source=None, data_path=None, file_paths = [os.path.join(path, file_) for file_ in files] prefix = os.path.commonprefix(files) - vds_name = construct_vds_name(prefix, files) + if output is None: + vds_name = construct_vds_name(prefix, files) + else: + vds_name = output + output_file = os.path.abspath(os.path.join(path, vds_name)) file_names = [file_.split('/')[-1] for file_ in file_paths] @@ -299,8 +308,12 @@ def main(): else: source_metadata = None - generate_vds(args.path, args.prefix, args.files, source_metadata, - args.data_path, args.stripe_spacing, args.module_spacing) + generate_vds(args.path, + prefix=args.prefix, files=args.files, output=args.output, + source=source_metadata, + data_path=args.data_path, + stripe_spacing=args.stripe_spacing, + module_spacing=args.module_spacing) if __name__ == "__main__": sys.exit(main()) From 9414ba20d334cb4b22f07ea884741584d3f63034 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Fri, 10 Mar 2017 11:28:41 +0000 Subject: [PATCH 12/28] Update to allow source and target data nodes as arguments source_node was originally data_path. --- tests/vdsgen_test.py | 29 +++++++++++++--------- vdsgen/vdsgen.py | 58 ++++++++++++++++++++++++++------------------ 2 files changed, 52 insertions(+), 35 deletions(-) diff --git a/tests/vdsgen_test.py b/tests/vdsgen_test.py index 2962cb7..a2f8ff0 100644 --- a/tests/vdsgen_test.py +++ b/tests/vdsgen_test.py @@ -44,9 +44,12 @@ def test_parser(self, parse_mock, add_mock, add_group_mock, call("-m", "--module_spacing", nargs="?", type=int, default=None, dest="module_spacing", help="Spacing between two modules."), - call("-d", "--data_path", nargs="?", type=str, default=None, - dest="data_path", - help="Data location in HDF5 files.")]) + call("--source_node", nargs="?", type=str, default=None, + dest="source_node", + help="Data node in source HDF5 files."), + call("--target_node", nargs="?", type=str, default=None, + dest="target_node", + help="Data node in VDS file.")]) add_group_mock.assert_called_with() group_mock.add_argument.assert_has_calls( @@ -202,7 +205,7 @@ def test_create_vds_maps(self, target_mock, source_mock, map_mock): vds = vdsgen.VDS(shape=(3, 1586, 2048), spacing=[10] * 5 + [0], path="/test/path") - map_list = vdsgen.create_vds_maps(source, vds, "data") + map_list = vdsgen.create_vds_maps(source, vds, "data", "full_frame") target_mock.assert_called_once_with("/test/path", "full_frame", shape=(3, 1586, 2048)) @@ -244,7 +247,7 @@ def test_generate_vds_defaults(self, find_mock, gen_mock, process_mock, None, None) create_mock.assert_called_once_with(process_mock.return_value, construct_mock.return_value, - "data") + "data", "full_frame") h5file_mock.assert_called_once_with("/test/path/stripe_vds.h5", "w", libver="latest") vds_file_mock.create_virtual_dataset.assert_called_once_with( @@ -266,7 +269,7 @@ def test_generate_vds_given_args(self, metadata_mock, vdsgen.generate_vds("/test/path", files=files, output="vds.h5", source=source_dict, - data_path="data", + source_node="data", stripe_spacing=3, module_spacing=127) metadata_mock.assert_called_once_with(source, @@ -274,7 +277,7 @@ def test_generate_vds_given_args(self, metadata_mock, 3, 127) create_mock.assert_called_once_with(source, metadata_mock.return_value, - "data") + "data", "full_frame") h5file_mock.assert_called_once_with("/test/path/vds.h5", "w", libver="latest") vds_file_mock.create_virtual_dataset.assert_called_once_with( @@ -300,7 +303,8 @@ def test_generate_vds_no_source_or_files_then_error(self, construct_mock, path="/test/path", prefix="stripe_", empty=True, files=["file1.hdf5", "file2.hdf5"], output="vds", frames=3, height=256, width=2048, data_type="int16", - data_path="data", stripe_spacing=3, module_spacing=127)) + source_node="data", target_node="full_frame", + stripe_spacing=3, module_spacing=127)) def test_main_empty(self, parse_mock, generate_mock): args_mock = parse_mock.return_value @@ -312,7 +316,8 @@ def test_main_empty(self, parse_mock, generate_mock): prefix=args_mock.prefix, output="vds", files=args_mock.files, source=dict(frames=args_mock.frames, height=args_mock.height, width=args_mock.width, dtype=args_mock.data_type), - data_path=args_mock.data_path, + source_node=args_mock.source_node, + target_node=args_mock.target_node, stripe_spacing=args_mock.stripe_spacing, module_spacing=args_mock.module_spacing) @@ -322,7 +327,8 @@ def test_main_empty(self, parse_mock, generate_mock): path="/test/path", prefix="stripe_", empty=False, files=["file1.hdf5", "file2.hdf5"], output="vds", frames=3, height=256, width=2048, data_type="int16", - data_path="data", stripe_spacing=3, module_spacing=127)) + source_node="data", target_node="full_frame", + stripe_spacing=3, module_spacing=127)) def test_main_not_empty(self, parse_mock, generate_mock): args_mock = parse_mock.return_value @@ -333,6 +339,7 @@ def test_main_not_empty(self, parse_mock, generate_mock): args_mock.path, prefix=args_mock.prefix, output="vds", files=args_mock.files, source=None, - data_path=args_mock.data_path, + source_node=args_mock.source_node, stripe_spacing=args_mock.stripe_spacing, + target_node=args_mock.target_node, module_spacing=args_mock.module_spacing) diff --git a/vdsgen/vdsgen.py b/vdsgen/vdsgen.py index fbc9cb5..53234c5 100644 --- a/vdsgen/vdsgen.py +++ b/vdsgen/vdsgen.py @@ -17,9 +17,10 @@ ["datasets", "frames", "height", "width", "dtype"]) VDS = namedtuple("VDS", ["shape", "spacing", "path"]) -STRIPE_SPACING = 10 # Pixel spacing between stripes in a module -MODULE_SPACING = 10 # Pixel spacing between modules -DATA_PATH = "data" # Location of data in HDF5 file tree +STRIPE_SPACING = 10 # Pixel spacing between stripes in a module +MODULE_SPACING = 10 # Pixel spacing between modules +SOURCE_DATA_NODE = "data" # Data node in source HDF5 files +TARGET_DATA_NODE = "full_frame" # Data node in VDS file def parse_args(): @@ -68,9 +69,12 @@ def parse_args(): parser.add_argument("-m", "--module_spacing", nargs="?", type=int, default=None, dest="module_spacing", help="Spacing between two modules.") - parser.add_argument("-d", "--data_path", nargs="?", type=str, default=None, - dest="data_path", - help="Data location in HDF5 files.") + parser.add_argument("--source_node", nargs="?", type=str, default=None, + dest="source_node", + help="Data node in source HDF5 files.") + parser.add_argument("--target_node", nargs="?", type=str, default=None, + dest="target_node", + help="Data node in VDS file.") args = parser.parse_args() @@ -128,39 +132,39 @@ def construct_vds_name(prefix, files): return vds_name -def grab_metadata(file_path, data_path): +def grab_metadata(file_path, source_node): """Grab data from given HDF5 file. Args: file_path(str): Path to HDF5 file - data_path(str): Location of raw data in HDF5 file + source_node(str): Location of raw data in HDF5 file Returns: dict: Number of frames, height, width and data type of datasets """ - h5_data = h5.File(file_path, 'r')[data_path] + h5_data = h5.File(file_path, 'r')[source_node] frames, height, width = h5_data.shape data_type = h5_data.dtype return dict(frames=frames, height=height, width=width, dtype=data_type) -def process_source_datasets(datasets, data_path): +def process_source_datasets(datasets, source_node): """Grab data from the given HDF5 files and check for consistency. Args: datasets(list(str)): Datasets to grab data from - data_path(str): Location of raw data in HDF5 file + source_node(str): Location of raw data in HDF5 file Returns: Source: Number of datasets and the attributes of them (frames, height width and data type) """ - data = grab_metadata(datasets[0], data_path) + data = grab_metadata(datasets[0], source_node) for path in datasets[1:]: - temp_data = grab_metadata(path, data_path) + temp_data = grab_metadata(path, source_node) for attribute, value in data.items(): if temp_data[attribute] != value: raise ValueError("Files have mismatched {}".format(attribute)) @@ -202,27 +206,28 @@ def construct_vds_metadata(source, output_file, return VDS(shape=shape, spacing=spacing, path=output_file) -def create_vds_maps(source, vds_data, data_path): +def create_vds_maps(source, vds_data, source_node, target_node): """Create a list of VirtualMaps of raw data to the VDS. Args: source(Source): Source attributes vds_data(VDS): VDS attributes - data_path(str): Path to raw data in HDF5 file + source_node(str): Data node in source HDF5 files + target_node(str): Data node in VDS file Returns: list(VirtualMap): Maps describing links between raw data and VDS """ source_shape = (source.frames, source.height, source.width) - vds = h5.VirtualTarget(vds_data.path, "full_frame", shape=vds_data.shape) + vds = h5.VirtualTarget(vds_data.path, target_node, shape=vds_data.shape) map_list = [] current_position = 0 for idx, dataset in enumerate(source.datasets): logging.info("Processing dataset %s", idx + 1) - v_source = h5.VirtualSource(dataset, data_path, shape=source_shape) + v_source = h5.VirtualSource(dataset, source_node, shape=source_shape) start = current_position stop = start + source.height + vds_data.spacing[idx] @@ -236,7 +241,8 @@ def create_vds_maps(source, vds_data, data_path): def generate_vds(path, prefix=None, files=None, output=None, source=None, - data_path=None, stripe_spacing=None, module_spacing=None): + source_node=None, target_node=None, + stripe_spacing=None, module_spacing=None): """Generate a virtual dataset. Args: @@ -246,7 +252,8 @@ def generate_vds(path, prefix=None, files=None, output=None, source=None, files(list(str)): List of files to combine. output(str): Name of VDS file. source(dict): Height, width, data_type and frames for source data - data_path(str): Path to raw data in HDF5 file + source_node(str): Data node in source HDF5 files + target_node(str): Data node in VDS file stripe_spacing(int): Spacing between stripes in module module_spacing(int): Spacing between modules @@ -255,8 +262,10 @@ def generate_vds(path, prefix=None, files=None, output=None, source=None, (prefix is not None and files is not None): raise ValueError("One, and only one, of prefix or files required.") - if data_path is None: - data_path = DATA_PATH + if source_node is None: + source_node = SOURCE_DATA_NODE + if target_node is None: + target_node = TARGET_DATA_NODE if files is None: file_paths = find_files(path, prefix) @@ -281,7 +290,7 @@ def generate_vds(path, prefix=None, files=None, output=None, source=None, if not os.path.isfile(file_): raise IOError("To create VDS from raw files that haven't been " "created yet, source must be provided.") - source_metadata = process_source_datasets(file_paths, data_path) + source_metadata = process_source_datasets(file_paths, source_node) else: source_metadata = Source( frames=source['frames'], height=source['height'], @@ -289,7 +298,8 @@ def generate_vds(path, prefix=None, files=None, output=None, source=None, vds_data = construct_vds_metadata(source_metadata, output_file, stripe_spacing, module_spacing) - map_list = create_vds_maps(source_metadata, vds_data, data_path) + map_list = create_vds_maps(source_metadata, vds_data, + source_node, target_node) logging.info("Creating VDS at %s", output_file) with h5.File(output_file, "w", libver="latest") as vds_file: @@ -311,7 +321,7 @@ def main(): generate_vds(args.path, prefix=args.prefix, files=args.files, output=args.output, source=source_metadata, - data_path=args.data_path, + source_node=args.source_node, target_node=args.target_node, stripe_spacing=args.stripe_spacing, module_spacing=args.module_spacing) From 6f6d464c0bb89faee87ff0fdaeda20c2033b9251 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Fri, 10 Mar 2017 11:32:31 +0000 Subject: [PATCH 13/28] Improve error message and docstring --- vdsgen/vdsgen.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/vdsgen/vdsgen.py b/vdsgen/vdsgen.py index 53234c5..78ee5e2 100644 --- a/vdsgen/vdsgen.py +++ b/vdsgen/vdsgen.py @@ -251,7 +251,8 @@ def generate_vds(path, prefix=None, files=None, output=None, source=None, e.g. image_ for image_1.hdf5, image_2.hdf5, image_3.hdf5 files(list(str)): List of files to combine. output(str): Name of VDS file. - source(dict): Height, width, data_type and frames for source data + source(dict): Height, width, data_type and frames for source data. + Provide this to create a VDS for raw files that don't exist yet. source_node(str): Data node in source HDF5 files target_node(str): Data node in VDS file stripe_spacing(int): Spacing between stripes in module @@ -288,8 +289,9 @@ def generate_vds(path, prefix=None, files=None, output=None, source=None, if source is None: for file_ in file_paths: if not os.path.isfile(file_): - raise IOError("To create VDS from raw files that haven't been " - "created yet, source must be provided.") + raise IOError("File {} does not exist. To create VDS from raw " + "files that haven't been created yet, source " + "must be provided.".format(file_)) source_metadata = process_source_datasets(file_paths, source_node) else: source_metadata = Source( From 06ac73c14e784cc5cda4195281f2c79a2eb44c76 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Fri, 10 Mar 2017 13:35:15 +0000 Subject: [PATCH 14/28] Add check for existing target_node if file exists --- tests/vdsgen_test.py | 25 +++++++++++++++++++------ vdsgen/vdsgen.py | 7 +++++++ 2 files changed, 26 insertions(+), 6 deletions(-) diff --git a/tests/vdsgen_test.py b/tests/vdsgen_test.py index a2f8ff0..65511dc 100644 --- a/tests/vdsgen_test.py +++ b/tests/vdsgen_test.py @@ -221,8 +221,9 @@ def test_create_vds_maps(self, target_mock, source_mock, map_mock): class MainTest(unittest.TestCase): file_mock = MagicMock() + file_mock_2 = MagicMock() - @patch('os.path.isfile', return_value=True) + @patch('os.path.isfile', side_effect=[False, True, True, True]) @patch(h5py_patch_path + '.File', return_value=file_mock) @patch(vdsgen_patch_path + '.create_vds_maps') @patch(vdsgen_patch_path + '.construct_vds_metadata') @@ -254,13 +255,15 @@ def test_generate_vds_defaults(self, find_mock, gen_mock, process_mock, VMlist=create_mock.return_value, fill_value=0x1) @patch('os.path.isfile', return_value=True) - @patch(h5py_patch_path + '.File', return_value=file_mock) + @patch(h5py_patch_path + '.File', side_effect=[file_mock, file_mock_2]) @patch(vdsgen_patch_path + '.create_vds_maps') @patch(vdsgen_patch_path + '.construct_vds_metadata') def test_generate_vds_given_args(self, metadata_mock, create_mock, h5file_mock, isfile_mock): self.file_mock.reset_mock() - vds_file_mock = self.file_mock.__enter__.return_value + self.file_mock_2.reset_mock() + self.file_mock.__enter__.return_value.get.return_value = None + vds_file_mock = self.file_mock_2.__enter__.return_value files = ["stripe_1.h5", "stripe_2.h5"] file_paths = ["/test/path/" + file_ for file_ in files] source_dict = dict(frames=3, height=256, width=1024, dtype="int16") @@ -269,7 +272,7 @@ def test_generate_vds_given_args(self, metadata_mock, vdsgen.generate_vds("/test/path", files=files, output="vds.h5", source=source_dict, - source_node="data", + source_node="data", target_node="full_frame", stripe_spacing=3, module_spacing=127) metadata_mock.assert_called_once_with(source, @@ -278,8 +281,9 @@ def test_generate_vds_given_args(self, metadata_mock, create_mock.assert_called_once_with(source, metadata_mock.return_value, "data", "full_frame") - h5file_mock.assert_called_once_with("/test/path/vds.h5", "w", - libver="latest") + h5file_mock.assert_has_calls([ + call("/test/path/vds.h5", "r", libver="latest"), + call("/test/path/vds.h5", "w", libver="latest")]) vds_file_mock.create_virtual_dataset.assert_called_once_with( VMlist=create_mock.return_value, fill_value=0x1) @@ -297,6 +301,15 @@ def test_generate_vds_no_source_or_files_then_error(self, construct_mock, with self.assertRaises(IOError): vdsgen.generate_vds("/test/path", files=["file1", "file2"]) + @patch(h5py_patch_path + '.File', return_value=file_mock) + def test_generate_vds_target_node_exists_then_error(self, _): + self.file_mock.reset_mock() + self.file_mock.get.return_value = None + + with self.assertRaises(IOError): + vdsgen.generate_vds("/test/path", files=["file1", "file2"], + output="vds") + @patch(vdsgen_patch_path + '.generate_vds') @patch(vdsgen_patch_path + '.parse_args', return_value=MagicMock( diff --git a/vdsgen/vdsgen.py b/vdsgen/vdsgen.py index 78ee5e2..7809030 100644 --- a/vdsgen/vdsgen.py +++ b/vdsgen/vdsgen.py @@ -281,6 +281,13 @@ def generate_vds(path, prefix=None, files=None, output=None, source=None, vds_name = output output_file = os.path.abspath(os.path.join(path, vds_name)) + if os.path.isfile(output_file): + with h5.File(output_file, "r", libver="latest") as vds_file: + node = vds_file.get(target_node) + if node is not None: + raise IOError("VDS {file} already has an entry for node " + "{node}".format(file=output_file, + node=target_node)) file_names = [file_.split('/')[-1] for file_ in file_paths] logging.info("Combining datasets %s into %s", From 91c78300fc93ed51a9c6c76d4f0ac45ffcce7298 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Fri, 10 Mar 2017 13:57:46 +0000 Subject: [PATCH 15/28] Make tests more specific Check for actual message in IOError checks --- tests/vdsgen_test.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/tests/vdsgen_test.py b/tests/vdsgen_test.py index 65511dc..6b71486 100644 --- a/tests/vdsgen_test.py +++ b/tests/vdsgen_test.py @@ -298,17 +298,26 @@ def test_generate_vds_prefix_and_files_then_error(self): def test_generate_vds_no_source_or_files_then_error(self, construct_mock, isfile_mock): - with self.assertRaises(IOError): + with self.assertRaises(IOError) as e: vdsgen.generate_vds("/test/path", files=["file1", "file2"]) + self.assertEqual("File /test/path/file1 does not exist. To create VDS " + "from raw files that haven't been created yet, " + "source must be provided.", + e.exception.message) + @patch('os.path.isfile', return_value=True) @patch(h5py_patch_path + '.File', return_value=file_mock) - def test_generate_vds_target_node_exists_then_error(self, _): + def test_generate_vds_target_node_exists_then_error(self, h5_file_mock, + isfile_mock): self.file_mock.reset_mock() - self.file_mock.get.return_value = None + self.file_mock.__enter__.return_value.get.return_value = MagicMock() - with self.assertRaises(IOError): + with self.assertRaises(IOError) as e: vdsgen.generate_vds("/test/path", files=["file1", "file2"], output="vds") + self.assertEqual("VDS /test/path/vds already has an entry for node " + "full_frame", + e.exception.message) @patch(vdsgen_patch_path + '.generate_vds') @patch(vdsgen_patch_path + '.parse_args', From 65a9c9a2a06b39f7639fc9ad541e1d6df684d024 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Fri, 10 Mar 2017 15:21:42 +0000 Subject: [PATCH 16/28] Add validate_node function * Check if target node is valid * Create sub-group, if it doesn't exist --- tests/vdsgen_test.py | 29 +++++++++++++++++++++++++++++ vdsgen/vdsgen.py | 22 ++++++++++++++++++++++ 2 files changed, 51 insertions(+) diff --git a/tests/vdsgen_test.py b/tests/vdsgen_test.py index 6b71486..f0ae3df 100644 --- a/tests/vdsgen_test.py +++ b/tests/vdsgen_test.py @@ -218,6 +218,35 @@ def test_create_vds_maps(self, target_mock, source_mock, map_mock): self.assertEqual([map_mock.return_value]*6, map_list) +class ValidateNodeTest(unittest.TestCase): + + def setUp(self): + self.file_mock = MagicMock() + + def test_validate_node_creates(self): + self.file_mock.get.return_value = None + + vdsgen.validate_node(self.file_mock, "entry/detector/detector1") + + self.file_mock.create_group.assert_called_once_with("entry/detector") + + def test_validate_node_exists_then_no_op(self): + self.file_mock.get.return_value = "Group" + + vdsgen.validate_node(self.file_mock, "entry/detector/detector1") + + self.file_mock.create_group.assert_not_called() + + def test_validate_node_invalid_then_error(self): + + with self.assertRaises(ValueError): + vdsgen.validate_node(self.file_mock, "/entry/detector/detector1") + with self.assertRaises(ValueError): + vdsgen.validate_node(self.file_mock, "entry/detector/detector1/") + with self.assertRaises(ValueError): + vdsgen.validate_node(self.file_mock, "/entry/detector/detector1/") + + class MainTest(unittest.TestCase): file_mock = MagicMock() diff --git a/vdsgen/vdsgen.py b/vdsgen/vdsgen.py index 7809030..fb6aa3e 100644 --- a/vdsgen/vdsgen.py +++ b/vdsgen/vdsgen.py @@ -240,6 +240,27 @@ def create_vds_maps(source, vds_data, source_node, target_node): return map_list +def validate_node(vds_file, target_node): + """Check if it is possible to create the given node. + + Check the target node is valid (no leading or trailing slashes) + Create any sub-group of the target node if it doesn't exist. + + Args: + vds_file(h5py.File): File to check for node + target_node(str): Full path to node + + """ + if target_node.startswith("/") or target_node.endswith("/"): + raise ValueError("Target node should have no leading or trailing " + "slashes, got {}".format(target_node)) + + if "/" in target_node: + sub_group = target_node.rsplit("/", 1)[0] + if vds_file.get(sub_group) is None: + vds_file.create_group(sub_group) + + def generate_vds(path, prefix=None, files=None, output=None, source=None, source_node=None, target_node=None, stripe_spacing=None, module_spacing=None): @@ -312,6 +333,7 @@ def generate_vds(path, prefix=None, files=None, output=None, source=None, logging.info("Creating VDS at %s", output_file) with h5.File(output_file, "w", libver="latest") as vds_file: + validate_node(vds_file, target_node) vds_file.create_virtual_dataset(VMlist=map_list, fill_value=0x1) logging.info("Creation successful!") From f7be3f3a8de9c5b4cfd94e05df30584205d5784a Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Fri, 10 Mar 2017 15:26:41 +0000 Subject: [PATCH 17/28] Update to open in append mode if file already exists --- tests/vdsgen_test.py | 11 ++++++++--- vdsgen/vdsgen.py | 7 ++++++- 2 files changed, 14 insertions(+), 4 deletions(-) diff --git a/tests/vdsgen_test.py b/tests/vdsgen_test.py index f0ae3df..a6b070f 100644 --- a/tests/vdsgen_test.py +++ b/tests/vdsgen_test.py @@ -253,6 +253,7 @@ class MainTest(unittest.TestCase): file_mock_2 = MagicMock() @patch('os.path.isfile', side_effect=[False, True, True, True]) + @patch(vdsgen_patch_path + '.validate_node') @patch(h5py_patch_path + '.File', return_value=file_mock) @patch(vdsgen_patch_path + '.create_vds_maps') @patch(vdsgen_patch_path + '.construct_vds_metadata') @@ -263,7 +264,7 @@ class MainTest(unittest.TestCase): return_value=["stripe_1.hdf5", "stripe_2.hdf5", "stripe_3.hdf5"]) def test_generate_vds_defaults(self, find_mock, gen_mock, process_mock, construct_mock, create_mock, h5file_mock, - isfile_mock): + validate_mock, isfile_mock): self.file_mock.reset_mock() vds_file_mock = self.file_mock.__enter__.return_value @@ -278,17 +279,20 @@ def test_generate_vds_defaults(self, find_mock, gen_mock, process_mock, create_mock.assert_called_once_with(process_mock.return_value, construct_mock.return_value, "data", "full_frame") + validate_mock.assert_called_once_with(vds_file_mock, "full_frame") h5file_mock.assert_called_once_with("/test/path/stripe_vds.h5", "w", libver="latest") vds_file_mock.create_virtual_dataset.assert_called_once_with( VMlist=create_mock.return_value, fill_value=0x1) @patch('os.path.isfile', return_value=True) + @patch(vdsgen_patch_path + '.validate_node') @patch(h5py_patch_path + '.File', side_effect=[file_mock, file_mock_2]) @patch(vdsgen_patch_path + '.create_vds_maps') @patch(vdsgen_patch_path + '.construct_vds_metadata') def test_generate_vds_given_args(self, metadata_mock, - create_mock, h5file_mock, isfile_mock): + create_mock, h5file_mock, validate_mock, + isfile_mock): self.file_mock.reset_mock() self.file_mock_2.reset_mock() self.file_mock.__enter__.return_value.get.return_value = None @@ -310,9 +314,10 @@ def test_generate_vds_given_args(self, metadata_mock, create_mock.assert_called_once_with(source, metadata_mock.return_value, "data", "full_frame") + validate_mock.assert_called_once_with(vds_file_mock, "full_frame") h5file_mock.assert_has_calls([ call("/test/path/vds.h5", "r", libver="latest"), - call("/test/path/vds.h5", "w", libver="latest")]) + call("/test/path/vds.h5", "a", libver="latest")]) vds_file_mock.create_virtual_dataset.assert_called_once_with( VMlist=create_mock.return_value, fill_value=0x1) diff --git a/vdsgen/vdsgen.py b/vdsgen/vdsgen.py index fb6aa3e..6b47b5a 100644 --- a/vdsgen/vdsgen.py +++ b/vdsgen/vdsgen.py @@ -21,6 +21,8 @@ MODULE_SPACING = 10 # Pixel spacing between modules SOURCE_DATA_NODE = "data" # Data node in source HDF5 files TARGET_DATA_NODE = "full_frame" # Data node in VDS file +APPEND = "a" +OVERWRITE = "w" def parse_args(): @@ -280,6 +282,7 @@ def generate_vds(path, prefix=None, files=None, output=None, source=None, module_spacing(int): Spacing between modules """ + write_mode = OVERWRITE if (prefix is None and files is None) or \ (prefix is not None and files is not None): raise ValueError("One, and only one, of prefix or files required.") @@ -309,6 +312,8 @@ def generate_vds(path, prefix=None, files=None, output=None, source=None, raise IOError("VDS {file} already has an entry for node " "{node}".format(file=output_file, node=target_node)) + else: + write_mode = APPEND file_names = [file_.split('/')[-1] for file_ in file_paths] logging.info("Combining datasets %s into %s", @@ -332,7 +337,7 @@ def generate_vds(path, prefix=None, files=None, output=None, source=None, source_node, target_node) logging.info("Creating VDS at %s", output_file) - with h5.File(output_file, "w", libver="latest") as vds_file: + with h5.File(output_file, write_mode, libver="latest") as vds_file: validate_node(vds_file, target_node) vds_file.create_virtual_dataset(VMlist=map_list, fill_value=0x1) From 8447220f9fc3d80117d040b26de92889d9ca3720 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Mon, 13 Mar 2017 11:54:24 +0000 Subject: [PATCH 18/28] Create VDSGenerator class --- tests/vdsgen_test.py | 371 +++++++++++++++++++++------------ vdsgen/__init__.py | 6 +- vdsgen/vdsgen.py | 485 ++++++++++++++++++++++--------------------- 3 files changed, 482 insertions(+), 380 deletions(-) diff --git a/tests/vdsgen_test.py b/tests/vdsgen_test.py index a6b070f..21933c4 100644 --- a/tests/vdsgen_test.py +++ b/tests/vdsgen_test.py @@ -1,17 +1,34 @@ +import os +import sys import unittest from pkg_resources import require require("mock") -from mock import MagicMock, patch, ANY, call +from mock import MagicMock, patch, call + +from vdsgen import vdsgen +from vdsgen.vdsgen import VDSGenerator + vdsgen_patch_path = "vdsgen.vdsgen" +VDSGenerator_patch_path = vdsgen_patch_path + ".VDSGenerator" parser_patch_path = "argparse.ArgumentParser" h5py_patch_path = "h5py" -import os -import sys sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..", "h5py")) -from vdsgen import vdsgen + +class VDSGeneratorTester(VDSGenerator): + + """A version of VDSGenerator without initialisation. + + For testing single methods of the class. Must have required attributes + passed before calling testee function. + + """ + + def __init__(self, **kwargs): + for attribute, value in kwargs.items(): + self.__setattr__(attribute, value) class ParseArgsTest(unittest.TestCase): @@ -60,8 +77,7 @@ def test_parser(self, parse_mock, add_mock, add_group_mock, call("--width", type=int, default=1024, dest="width", help="Width of raw datasets."), call("--data_type", type=str, default="uint16", dest="data_type", - help="Data type of raw datasets.")] - ) + help="Data type of raw datasets.")]) add_exclusive_group_mock.assert_called_with(required=True) exclusive_group_mock.add_argument.assert_has_calls( @@ -71,8 +87,7 @@ def test_parser(self, parse_mock, add_mock, add_group_mock, "located at ."), call("-f", "--files", nargs="*", type=str, default=None, dest="files", - help="Manually define files to combine.")] - ) + help="Manually define files to combine.")]) parse_mock.assert_called_once_with() self.assertEqual(parse_mock.return_value, args) @@ -98,8 +113,94 @@ def test_only_one_file_then_error(self, parse_mock, error_mock): "Must define at least two files to combine.") +class VDSGeneratorInitTest(unittest.TestCase): + + @patch('os.path.isfile', return_value=True) + @patch(VDSGenerator_patch_path + '.process_source_datasets') + @patch(VDSGenerator_patch_path + '.construct_vds_name', + return_value="stripe_vds.hdf5") + @patch(VDSGenerator_patch_path + '.find_files', + return_value=["/test/path/stripe_1.hdf5", + "/test/path/stripe_2.hdf5", + "/test/path/stripe_3.hdf5"]) + def test_generate_vds_defaults(self, find_mock, construct_mock, + process_mock, isfile_mock): + expected_files = ["stripe_1.hdf5", "stripe_2.hdf5", "stripe_3.hdf5"] + + gen = VDSGenerator("/test/path", prefix="stripe_") + + find_mock.assert_called_once_with() + construct_mock.assert_called_once_with(expected_files) + process_mock.assert_called_once_with() + + self.assertEqual("/test/path", gen.path) + self.assertEqual("stripe_", gen.prefix) + self.assertEqual("stripe_vds.hdf5", gen.name) + self.assertEqual(find_mock.return_value, gen.datasets) + self.assertEqual(process_mock.return_value, gen.source_metadata) + self.assertEqual("data", gen.source_node) + self.assertEqual("full_frame", gen.target_node) + self.assertEqual(10, gen.stripe_spacing) + self.assertEqual(10, gen.module_spacing) + self.assertEqual(gen.CREATE, gen.mode) + + def test_generate_vds_given_args(self): + files = ["stripe_1.h5", "stripe_2.h5"] + file_paths = ["/test/path/" + file_ for file_ in files] + source_dict = dict(frames=3, height=256, width=2048, dtype="int16") + source = vdsgen.Source(frames=3, height=256, width=2048, dtype="int16") + + gen = VDSGenerator("/test/path", + files=files, + output="vds.hdf5", + source=source_dict, + source_node="entry/data/data", + target_node="entry/detector/detector1", + stripe_spacing=3, module_spacing=127) + + self.assertEqual("/test/path", gen.path) + self.assertEqual("stripe_", gen.prefix) + self.assertEqual("vds.hdf5", gen.name) + self.assertEqual(file_paths, gen.datasets) + self.assertEqual(source, gen.source_metadata) + self.assertEqual("entry/data/data", gen.source_node) + self.assertEqual("entry/detector/detector1", gen.target_node) + self.assertEqual(3, gen.stripe_spacing) + self.assertEqual(127, gen.module_spacing) + self.assertEqual(gen.CREATE, gen.mode) + + def test_generate_vds_prefix_and_files_then_error(self): + files = ["stripe_1.h5", "stripe_2.h5"] + source_dict = dict(frames=3, height=256, width=2048, dtype="int16") + + with self.assertRaises(ValueError): + VDSGenerator("/test/path", + prefix="stripe_", files=files, + output="vds.hdf5", + source=source_dict, + source_node="entry/data/data", + target_node="entry/detector/detector1", + stripe_spacing=3, module_spacing=127) + + @patch('os.path.isfile', return_value=False) + def test_generate_vds_no_source_or_files_then_error(self, _): + + with self.assertRaises(IOError) as e: + VDSGenerator("/test/path", + files=["file1", "file2"], + output="vds.hdf5") + + self.assertEqual("File /test/path/file1 does not exist. To create VDS " + "from raw files that haven't been created yet, " + "source must be provided.", + e.exception.message) + + class FindFilesTest(unittest.TestCase): + def setUp(self): + self.gen = VDSGeneratorTester(path="/test/path", prefix="stripe_") + @patch('os.listdir', return_value=["stripe_1.h5", "stripe_2.h5", "stripe_3.h5", "stripe_4.h5", "stripe_5.h5", "stripe_6.h5"]) @@ -108,7 +209,7 @@ def test_given_files_then_return(self, _): "/test/path/stripe_3.h5", "/test/path/stripe_4.h5", "/test/path/stripe_5.h5", "/test/path/stripe_6.h5"] - files = vdsgen.find_files("/test/path", "stripe_") + files = self.gen.find_files() self.assertEqual(expected_files, files) @@ -120,7 +221,7 @@ def test_given_files_out_of_order_then_return(self, _): "/test/path/stripe_3.h5", "/test/path/stripe_4.h5", "/test/path/stripe_5.h5", "/test/path/stripe_6.h5"] - files = vdsgen.find_files("/test/path", "stripe_") + files = self.gen.find_files() self.assertEqual(expected_files, files) @@ -128,23 +229,24 @@ def test_given_files_out_of_order_then_return(self, _): def test_given_one_file_then_error(self, _): with self.assertRaises(IOError): - vdsgen.find_files("/test/path", "stripe_") + self.gen.find_files() @patch('os.listdir', return_value=[]) def test_given_no_files_then_error(self, _): with self.assertRaises(IOError): - vdsgen.find_files("/test/path", "stripe_") + self.gen.find_files() class SimpleFunctionsTest(unittest.TestCase): def test_generate_vds_name(self): + gen = VDSGeneratorTester(prefix="stripe_") expected_name = "stripe_vds.h5" files = ["stripe_1.h5", "stripe_2.h5", "stripe_3.h5", "stripe_4.h5", "stripe_5.h5", "stripe_6.h5"] - vds_name = vdsgen.construct_vds_name("stripe_", files) + vds_name = gen.construct_vds_name(files) self.assertEqual(expected_name, vds_name) @@ -152,47 +254,47 @@ def test_generate_vds_name(self): @patch(h5py_patch_path + '.File', return_value=mock_data) def test_grab_metadata(self, h5file_mock): + gen = VDSGeneratorTester(source_node="data") expected_data = dict(frames=3, height=256, width=2048, dtype="uint16") - meta_data = vdsgen.grab_metadata("/test/path", "data") + meta_data = gen.grab_metadata("/test/path/stripe.hdf5") - h5file_mock.assert_called_once_with("/test/path", "r") + h5file_mock.assert_called_once_with("/test/path/stripe.hdf5", "r") self.assertEqual(expected_data, meta_data) - @patch(vdsgen_patch_path + '.grab_metadata', + @patch(VDSGenerator_patch_path + '.grab_metadata', return_value=dict(frames=3, height=256, width=2048, dtype="uint16")) def test_process_source_datasets_given_valid_data(self, grab_mock): - files = ["stripe_1.h5", "stripe_2.h5"] + gen = VDSGeneratorTester(datasets=["stripe_1.h5", "stripe_2.h5"]) expected_source = vdsgen.Source(frames=3, height=256, width=2048, - dtype="uint16", datasets=files) + dtype="uint16") - source = vdsgen.process_source_datasets(files, "data") + source = gen.process_source_datasets() - grab_mock.assert_has_calls([call("stripe_1.h5", "data"), - call("stripe_2.h5", "data")]) + grab_mock.assert_has_calls([call("stripe_1.h5"), call("stripe_2.h5")]) self.assertEqual(expected_source, source) - @patch(vdsgen_patch_path + '.grab_metadata', + @patch(VDSGenerator_patch_path + '.grab_metadata', side_effect=[dict(frames=3, height=256, width=2048, dtype="uint16"), dict(frames=4, height=256, width=2048, dtype="uint16")]) def test_process_source_datasets_given_mismatched_data(self, grab_mock): - files = ["stripe_1.h5", "stripe_2.h5"] + gen = VDSGeneratorTester(datasets=["stripe_1.h5", "stripe_2.h5"]) with self.assertRaises(ValueError): - vdsgen.process_source_datasets(files, "data") + gen.process_source_datasets() - grab_mock.assert_has_calls([call("stripe_1.h5", "data"), - call("stripe_2.h5", "data")]) + grab_mock.assert_has_calls([call("stripe_1.h5"), call("stripe_2.h5")]) def test_construct_vds_metadata(self): + gen = VDSGeneratorTester(datasets=[""] * 6, stripe_spacing=10, + module_spacing=100) source = vdsgen.Source(frames=3, height=256, width=2048, - dtype="uint16", datasets=[""]*6) - expected_vds = vdsgen.VDS(shape=(3, 1586, 2048), - spacing=[10] * 5 + [0], - path="/test/path") + dtype="uint16") + expected_vds = vdsgen.VDS(shape=(3, 1766, 2048), + spacing=[10, 100, 10, 100, 10, 0]) - vds = vdsgen.construct_vds_metadata(source, "/test/path") + vds = gen.construct_vds_metadata(source) self.assertEqual(expected_vds, vds) @@ -200,21 +302,26 @@ def test_construct_vds_metadata(self): @patch(h5py_patch_path + '.VirtualSource') @patch(h5py_patch_path + '.VirtualTarget') def test_create_vds_maps(self, target_mock, source_mock, map_mock): + gen = VDSGeneratorTester(output_file= "/test/path/vds.hdf5", + stripe_spacing=10, module_spacing=100, + target_node="full_frame", source_node="data", + datasets=["source"] * 6) source = vdsgen.Source(frames=3, height=256, width=2048, - dtype="uint16", datasets=["source"]*6) - vds = vdsgen.VDS(shape=(3, 1586, 2048), spacing=[10] * 5 + [0], - path="/test/path") + dtype="uint16") + vds = vdsgen.VDS(shape=(3, 1586, 2048), spacing=[10] * 5 + [0]) - map_list = vdsgen.create_vds_maps(source, vds, "data", "full_frame") + map_list = gen.create_vds_maps(source, vds) - target_mock.assert_called_once_with("/test/path", "full_frame", + target_mock.assert_called_once_with("/test/path/vds.hdf5", + "full_frame", shape=(3, 1586, 2048)) source_mock.assert_has_calls([call("source", "data", - shape=(3, 256, 2048))]*6) + shape=(3, 256, 2048))] * 6) # TODO: Improve this assert by passing numpy arrays to check slicing - map_mock.assert_has_calls([call(source_mock.return_value, - target_mock.return_value.__getitem__.return_value, - dtype="uint16")]*6) + map_mock.assert_has_calls([ + call(source_mock.return_value, + target_mock.return_value.__getitem__.return_value, + dtype="uint16")]*6) self.assertEqual([map_mock.return_value]*6, map_list) @@ -224,136 +331,126 @@ def setUp(self): self.file_mock = MagicMock() def test_validate_node_creates(self): + gen = VDSGeneratorTester(target_node="entry/detector/detector1") self.file_mock.get.return_value = None - vdsgen.validate_node(self.file_mock, "entry/detector/detector1") + gen.validate_node(self.file_mock) self.file_mock.create_group.assert_called_once_with("entry/detector") def test_validate_node_exists_then_no_op(self): + gen = VDSGeneratorTester(target_node="entry/detector/detector1") self.file_mock.get.return_value = "Group" - vdsgen.validate_node(self.file_mock, "entry/detector/detector1") + gen.validate_node(self.file_mock) self.file_mock.create_group.assert_not_called() def test_validate_node_invalid_then_error(self): + gen = VDSGeneratorTester(target_node="/entry/detector/detector1") with self.assertRaises(ValueError): - vdsgen.validate_node(self.file_mock, "/entry/detector/detector1") + gen.validate_node(self.file_mock) + + gen = VDSGeneratorTester(target_node="entry/detector/detector1/") with self.assertRaises(ValueError): - vdsgen.validate_node(self.file_mock, "entry/detector/detector1/") + gen.validate_node(self.file_mock) + + gen = VDSGeneratorTester(target_node="/entry/detector/detector1/") with self.assertRaises(ValueError): - vdsgen.validate_node(self.file_mock, "/entry/detector/detector1/") + gen.validate_node(self.file_mock) -class MainTest(unittest.TestCase): +class GenerateVDSTest(unittest.TestCase): file_mock = MagicMock() - file_mock_2 = MagicMock() - @patch('os.path.isfile', side_effect=[False, True, True, True]) - @patch(vdsgen_patch_path + '.validate_node') + @patch('os.path.isfile', return_value=False) + @patch(VDSGenerator_patch_path + '.validate_node') @patch(h5py_patch_path + '.File', return_value=file_mock) - @patch(vdsgen_patch_path + '.create_vds_maps') - @patch(vdsgen_patch_path + '.construct_vds_metadata') - @patch(vdsgen_patch_path + '.process_source_datasets') - @patch(vdsgen_patch_path + '.construct_vds_name', - return_value="stripe_vds.h5") - @patch(vdsgen_patch_path + '.find_files', - return_value=["stripe_1.hdf5", "stripe_2.hdf5", "stripe_3.hdf5"]) - def test_generate_vds_defaults(self, find_mock, gen_mock, process_mock, - construct_mock, create_mock, h5file_mock, - validate_mock, isfile_mock): + @patch(VDSGenerator_patch_path + '.create_vds_maps') + @patch(VDSGenerator_patch_path + '.construct_vds_metadata') + def test_generate_vds_create(self, construct_mock, create_mock, + h5file_mock, validate_mock, isfile_mock): + source_mock = MagicMock() + gen = VDSGeneratorTester(path="/test/path", prefix="stripe_", + output_file="/test/path/vds.hdf5", + name="vds.hdf5", + target_node="full_frame", source_node="data", + datasets=["stripe_1.hdf5", "stripe_2.hdf5", + "stripe_3.hdf5"], + source_metadata=source_mock) self.file_mock.reset_mock() vds_file_mock = self.file_mock.__enter__.return_value + vds_file_mock.get.return_value = None + + gen.generate_vds() - vdsgen.generate_vds("/test/path", prefix="stripe_") - - find_mock.assert_called_once_with("/test/path", "stripe_") - gen_mock.assert_called_once_with("stripe_", find_mock.return_value) - process_mock.assert_called_once_with(find_mock.return_value, "data") - construct_mock.assert_called_once_with(process_mock.return_value, - "/test/path/stripe_vds.h5", - None, None) - create_mock.assert_called_once_with(process_mock.return_value, - construct_mock.return_value, - "data", "full_frame") - validate_mock.assert_called_once_with(vds_file_mock, "full_frame") - h5file_mock.assert_called_once_with("/test/path/stripe_vds.h5", "w", - libver="latest") + isfile_mock.assert_called_once_with("/test/path/vds.hdf5") + construct_mock.assert_called_once_with(source_mock) + create_mock.assert_called_once_with(source_mock, + construct_mock.return_value) + validate_mock.assert_called_once_with(vds_file_mock) + h5file_mock.assert_called_once_with( + "/test/path/vds.hdf5", "w", libver="latest") vds_file_mock.create_virtual_dataset.assert_called_once_with( VMlist=create_mock.return_value, fill_value=0x1) @patch('os.path.isfile', return_value=True) - @patch(vdsgen_patch_path + '.validate_node') - @patch(h5py_patch_path + '.File', side_effect=[file_mock, file_mock_2]) - @patch(vdsgen_patch_path + '.create_vds_maps') - @patch(vdsgen_patch_path + '.construct_vds_metadata') - def test_generate_vds_given_args(self, metadata_mock, - create_mock, h5file_mock, validate_mock, - isfile_mock): + @patch(VDSGenerator_patch_path + '.validate_node') + @patch(h5py_patch_path + '.File', return_value=file_mock) + @patch(VDSGenerator_patch_path + '.create_vds_maps') + @patch(VDSGenerator_patch_path + '.construct_vds_metadata') + def test_generate_vds_append(self, construct_mock, create_mock, + h5file_mock, validate_mock, isfile_mock): + source_mock = MagicMock() + gen = VDSGeneratorTester(path="/test/path", prefix="stripe_", + output_file="/test/path/vds.hdf5", + name="vds.hdf5", + target_node="full_frame", source_node="data", + datasets=["stripe_1.hdf5", "stripe_2.hdf5", + "stripe_3.hdf5"], + source_metadata=source_mock) self.file_mock.reset_mock() - self.file_mock_2.reset_mock() - self.file_mock.__enter__.return_value.get.return_value = None - vds_file_mock = self.file_mock_2.__enter__.return_value - files = ["stripe_1.h5", "stripe_2.h5"] - file_paths = ["/test/path/" + file_ for file_ in files] - source_dict = dict(frames=3, height=256, width=1024, dtype="int16") - source = vdsgen.Source(frames=3, height=256, width=1024, dtype="int16", - datasets=file_paths) - - vdsgen.generate_vds("/test/path", files=files, output="vds.h5", - source=source_dict, - source_node="data", target_node="full_frame", - stripe_spacing=3, module_spacing=127) - - metadata_mock.assert_called_once_with(source, - "/test/path/vds.h5", - 3, 127) - create_mock.assert_called_once_with(source, - metadata_mock.return_value, - "data", "full_frame") - validate_mock.assert_called_once_with(vds_file_mock, "full_frame") + vds_file_mock = self.file_mock.__enter__.return_value + vds_file_mock.get.return_value = None + + gen.generate_vds() + + isfile_mock.assert_called_once_with("/test/path/vds.hdf5") + construct_mock.assert_called_once_with(source_mock) + create_mock.assert_called_once_with(source_mock, + construct_mock.return_value) + validate_mock.assert_called_once_with(vds_file_mock) h5file_mock.assert_has_calls([ - call("/test/path/vds.h5", "r", libver="latest"), - call("/test/path/vds.h5", "a", libver="latest")]) + call("/test/path/vds.hdf5", "r", libver="latest"), + call("/test/path/vds.hdf5", "a", libver="latest")]) vds_file_mock.create_virtual_dataset.assert_called_once_with( VMlist=create_mock.return_value, fill_value=0x1) - def test_generate_vds_prefix_and_files_then_error(self): - - with self.assertRaises(ValueError): - vdsgen.generate_vds("/test/path", "stripe_", ["file1", "file2"]) - - @patch('os.path.isfile', return_value=False) - @patch(vdsgen_patch_path + '.construct_vds_name', - return_value="stripe_vds.h5") - def test_generate_vds_no_source_or_files_then_error(self, construct_mock, - isfile_mock): - - with self.assertRaises(IOError) as e: - vdsgen.generate_vds("/test/path", files=["file1", "file2"]) - self.assertEqual("File /test/path/file1 does not exist. To create VDS " - "from raw files that haven't been created yet, " - "source must be provided.", - e.exception.message) - @patch('os.path.isfile', return_value=True) @patch(h5py_patch_path + '.File', return_value=file_mock) - def test_generate_vds_target_node_exists_then_error(self, h5_file_mock, - isfile_mock): + def test_generate_vds_node_exists_then_error(self, h5file_mock, + isfile_mock): + source_mock = MagicMock() + gen = VDSGeneratorTester(path="/test/path", prefix="stripe_", + output_file="/test/path/vds.hdf5", + name="vds.hdf5", + target_node="full_frame", source_node="data", + datasets=["stripe_1.hdf5", "stripe_2.hdf5", + "stripe_3.hdf5"], + source_metadata=source_mock) self.file_mock.reset_mock() - self.file_mock.__enter__.return_value.get.return_value = MagicMock() + vds_file_mock = self.file_mock.__enter__.return_value + vds_file_mock.get.return_value = "Group" - with self.assertRaises(IOError) as e: - vdsgen.generate_vds("/test/path", files=["file1", "file2"], - output="vds") - self.assertEqual("VDS /test/path/vds already has an entry for node " - "full_frame", - e.exception.message) + with self.assertRaises(IOError): + gen.generate_vds() + + +class MainTest(unittest.TestCase): - @patch(vdsgen_patch_path + '.generate_vds') + @patch(VDSGenerator_patch_path) @patch(vdsgen_patch_path + '.parse_args', return_value=MagicMock( path="/test/path", prefix="stripe_", empty=True, @@ -361,15 +458,17 @@ def test_generate_vds_target_node_exists_then_error(self, h5_file_mock, frames=3, height=256, width=2048, data_type="int16", source_node="data", target_node="full_frame", stripe_spacing=3, module_spacing=127)) - def test_main_empty(self, parse_mock, generate_mock): + def test_main_empty(self, parse_mock, init_mock): + gen_mock = init_mock.return_value args_mock = parse_mock.return_value vdsgen.main() parse_mock.assert_called_once_with() - generate_mock.assert_called_once_with( + init_mock.assert_called_once_with( args_mock.path, - prefix=args_mock.prefix, output="vds", files=args_mock.files, + prefix=args_mock.prefix, files=args_mock.files, + output=args_mock.output, source=dict(frames=args_mock.frames, height=args_mock.height, width=args_mock.width, dtype=args_mock.data_type), source_node=args_mock.source_node, @@ -377,7 +476,9 @@ def test_main_empty(self, parse_mock, generate_mock): stripe_spacing=args_mock.stripe_spacing, module_spacing=args_mock.module_spacing) - @patch(vdsgen_patch_path + '.generate_vds') + gen_mock.generate_vds.assert_called_once_with() + + @patch(VDSGenerator_patch_path) @patch(vdsgen_patch_path + '.parse_args', return_value=MagicMock( path="/test/path", prefix="stripe_", empty=False, diff --git a/vdsgen/__init__.py b/vdsgen/__init__.py index b47eda2..7b7928f 100644 --- a/vdsgen/__init__.py +++ b/vdsgen/__init__.py @@ -1,4 +1,4 @@ -"""Make 'generate_vds' easy to import.""" -from vdsgen import generate_vds +"""Make VDSGenerator easy to import.""" +from vdsgen import VDSGenerator -__all__ = ["generate_vds"] +__all__ = ["VDSGenerator"] diff --git a/vdsgen/vdsgen.py b/vdsgen/vdsgen.py index 6b47b5a..4e1ab7b 100644 --- a/vdsgen/vdsgen.py +++ b/vdsgen/vdsgen.py @@ -13,16 +13,8 @@ logging.basicConfig(level=logging.INFO) -Source = namedtuple("Source", - ["datasets", "frames", "height", "width", "dtype"]) -VDS = namedtuple("VDS", ["shape", "spacing", "path"]) - -STRIPE_SPACING = 10 # Pixel spacing between stripes in a module -MODULE_SPACING = 10 # Pixel spacing between modules -SOURCE_DATA_NODE = "data" # Data node in source HDF5 files -TARGET_DATA_NODE = "full_frame" # Data node in VDS file -APPEND = "a" -OVERWRITE = "w" +Source = namedtuple("Source", ["frames", "height", "width", "dtype"]) +VDS = namedtuple("VDS", ["shape", "spacing"]) def parse_args(): @@ -90,258 +82,263 @@ def parse_args(): return args -def find_files(path, prefix): - """Find HDF5 files in given folder with given prefix. - - Args: - path(str): Path to folder containing files - prefix(str): Root name of image files - - Returns: - list: HDF5 files in folder that have the given prefix - - """ - regex = re.compile(prefix + r"\d+\.(hdf5|hdf|h5)") - - files = [] - for file_ in sorted(os.listdir(path)): - if re.match(regex, file_): - files.append(os.path.abspath(os.path.join(path, file_))) - - if len(files) == 0: - raise IOError("No files matching pattern found. Got path: {path}, " - "prefix: {prefix}".format(path=path, prefix=prefix)) - elif len(files) < 2: - raise IOError("Folder must contain more than one matching HDF5 file.") - else: - return files - - -def construct_vds_name(prefix, files): - """Generate the file name for the VDS from the sub files. - - Args: - prefix(str): Root name of image files - files(list(str)): HDF5 files being combined - - Returns: - str: Name of VDS file - - """ - _, ext = os.path.splitext(files[0]) - vds_name = "{prefix}vds{ext}".format(prefix=prefix, ext=ext) - - return vds_name - - -def grab_metadata(file_path, source_node): - """Grab data from given HDF5 file. - - Args: - file_path(str): Path to HDF5 file - source_node(str): Location of raw data in HDF5 file - - Returns: - dict: Number of frames, height, width and data type of datasets - - """ - h5_data = h5.File(file_path, 'r')[source_node] - frames, height, width = h5_data.shape - data_type = h5_data.dtype - - return dict(frames=frames, height=height, width=width, dtype=data_type) +class VDSGenerator(object): + """A class to generate Virtual Datasets from raw HDF5 files.""" -def process_source_datasets(datasets, source_node): - """Grab data from the given HDF5 files and check for consistency. + # Constants + CREATE = "w" # Will overwrite any existing file + APPEND = "a" - Args: - datasets(list(str)): Datasets to grab data from - source_node(str): Location of raw data in HDF5 file + # Default Values + stripe_spacing = 10 # Pixel spacing between stripes in a module + module_spacing = 10 # Pixel spacing between modules + source_node = "data" # Data node in source HDF5 files + target_node = "full_frame" # Data node in VDS file + mode = CREATE # Write mode for vds file - Returns: - Source: Number of datasets and the attributes of them (frames, height - width and data type) - - """ - data = grab_metadata(datasets[0], source_node) - for path in datasets[1:]: - temp_data = grab_metadata(path, source_node) - for attribute, value in data.items(): - if temp_data[attribute] != value: - raise ValueError("Files have mismatched {}".format(attribute)) - - return Source(frames=data['frames'], height=data['height'], - width=data['width'], dtype=data['dtype'], datasets=datasets) - - -def construct_vds_metadata(source, output_file, - stripe_spacing=None, module_spacing=None): - """Construct VDS data attributes from source attributes. - - Args: - source(Source): Attributes of data sets - output_file(str): File path of new VDS - stripe_spacing(int): Spacing between stripes in module - module_spacing(int): Spacing between modules - - Returns: - VDS: Shape, dataset spacing and output path of virtual data set - - """ - if stripe_spacing is None: - stripe_spacing = STRIPE_SPACING - if module_spacing is None: - module_spacing = MODULE_SPACING - - stripes = len(source.datasets) - spacing = [0] * stripes - for idx in range(0, stripes - 1, 2): - spacing[idx] = stripe_spacing - for idx in range(1, stripes, 2): - spacing[idx] = module_spacing - spacing[-1] = 0 # We don't want the final stripe to have a gap afterwards - - height = (source.height * stripes) + sum(spacing) - shape = (source.frames, height, source.width) + def __init__(self, path, prefix=None, files=None, output=None, source=None, + source_node=None, target_node=None, + stripe_spacing=None, module_spacing=None): + """ + Args: + path(str): Root folder to find raw files and create VDS + prefix(str): Prefix of HDF5 files to generate from + e.g. image_ for image_1.hdf5, image_2.hdf5, image_3.hdf5 + files(list(str)): List of HDF5 files to generate from + output(str): Name of VDS file. + source(dict): Height, width, data_type and frames for source data + Provide this to create a VDS for raw files that don't exist yet + source_node(str): Data node in source HDF5 files + target_node(str): Data node in VDS file + stripe_spacing(int): Spacing between stripes in module + module_spacing(int): Spacing between modules + + """ + if (prefix is None and files is None) or \ + (prefix is not None and files is not None): + raise ValueError("One, and only one, of prefix or files required.") + + self.path = path + + # Overwrite default values with arguments, if given + if source_node is not None: + self.source_node = source_node + if target_node is not None: + self.target_node = target_node + if stripe_spacing is not None: + self.stripe_spacing = stripe_spacing + if module_spacing is not None: + self.module_spacing = module_spacing + + # If Files not given, find files using path and prefix. + if files is None: + self.prefix = prefix + self.datasets = self.find_files() + files = [path_.split("/")[-1] for path_ in self.datasets] + # Else, get common prefix of given files and store full path + else: + self.prefix = os.path.commonprefix(files) + self.datasets = [os.path.join(path, file_) for file_ in files] + + # If output vds file name given, use, otherwise generate a default + if output is None: + self.name = self.construct_vds_name(files) + else: + self.name = output + + # If source not given, check files exist and get metadata. + if source is None: + for file_ in self.datasets: + if not os.path.isfile(file_): + raise IOError( + "File {} does not exist. To create VDS from raw " + "files that haven't been created yet, source " + "must be provided.".format(file_)) + self.source_metadata = self.process_source_datasets() + # Else, store given source metadata + else: + self.source_metadata = Source( + frames=source['frames'], height=source['height'], + width=source['width'], dtype=source['dtype']) + + self.output_file = os.path.abspath(os.path.join(self.path, self.name)) + + def generate_vds(self): + """Generate a virtual dataset.""" + if os.path.isfile(self.output_file): + with h5.File(self.output_file, "r", libver="latest") as vds: + node = vds.get(self.target_node) + if node is not None: + raise IOError("VDS {file} already has an entry for node " + "{node}".format(file=self.output_file, + node=self.target_node)) + else: + self.mode = self.APPEND + + file_names = [file_.split('/')[-1] for file_ in self.datasets] + logging.info("Combining datasets %s into %s", + ", ".join(file_names), self.name) + + vds_data = self.construct_vds_metadata(self.source_metadata) + map_list = self.create_vds_maps(self.source_metadata, vds_data) + + logging.info("Creating VDS at %s", self.output_file) + with h5.File(self.output_file, self.mode, libver="latest") as vds: + self.validate_node(vds) + vds.create_virtual_dataset(VMlist=map_list, fill_value=0x1) + + logging.info("Creation successful!") + + def find_files(self): + """Find HDF5 files in given folder with given prefix. + + Returns: + list: HDF5 files in folder that have the given prefix + + """ + regex = re.compile(self.prefix + r"\d+\.(hdf5|hdf|h5)") + + files = [] + for file_ in sorted(os.listdir(self.path)): + if re.match(regex, file_): + files.append(os.path.abspath(os.path.join(self.path, file_))) + + if len(files) == 0: + raise IOError("No files matching pattern found. Got path: {path}, " + "prefix: {prefix}".format(path=self.path, + prefix=self.prefix)) + elif len(files) < 2: + raise IOError("Folder must contain more than one matching HDF5 " + "file.") + else: + return files + + def construct_vds_name(self, files): + """Generate the file name for the VDS from the sub files. + + Args: + files(list(str)): HDF5 files being combined + + Returns: + str: Name of VDS file + + """ + _, ext = os.path.splitext(files[0]) + vds_name = "{prefix}vds{ext}".format(prefix=self.prefix, ext=ext) + + return vds_name + + def grab_metadata(self, file_path): + """Grab data from given HDF5 file. + + Args: + file_path(str): Path to HDF5 file + + Returns: + dict: Number of frames, height, width and data type of datasets + + """ + h5_data = h5.File(file_path, 'r')[self.source_node] + frames, height, width = h5_data.shape + data_type = h5_data.dtype + + return dict(frames=frames, height=height, width=width, dtype=data_type) + + def process_source_datasets(self): + """Grab data from the given HDF5 files and check for consistency. + + Returns: + Source: Number of datasets and the attributes of them (frames, + height width and data type) - return VDS(shape=shape, spacing=spacing, path=output_file) + """ + data = self.grab_metadata(self.datasets[0]) + for dataset in self.datasets[1:]: + temp_data = self.grab_metadata(dataset) + for attribute, value in data.items(): + if temp_data[attribute] != value: + raise ValueError("Files have mismatched " + "{}".format(attribute)) + return Source(frames=data['frames'], height=data['height'], + width=data['width'], dtype=data['dtype']) -def create_vds_maps(source, vds_data, source_node, target_node): - """Create a list of VirtualMaps of raw data to the VDS. + def construct_vds_metadata(self, source): + """Construct VDS data attributes from source attributes. - Args: - source(Source): Source attributes - vds_data(VDS): VDS attributes - source_node(str): Data node in source HDF5 files - target_node(str): Data node in VDS file + Args: + source(Source): Attributes of data sets - Returns: - list(VirtualMap): Maps describing links between raw data and VDS + Returns: + VDS: Shape, dataset spacing and output path of virtual data set - """ - source_shape = (source.frames, source.height, source.width) - vds = h5.VirtualTarget(vds_data.path, target_node, shape=vds_data.shape) + """ + stripes = len(self.datasets) + spacing = [0] * stripes + for idx in range(0, stripes - 1, 2): + spacing[idx] = self.stripe_spacing + for idx in range(1, stripes, 2): + spacing[idx] = self.module_spacing + # We don't want the final stripe to have a gap afterwards + spacing[-1] = 0 - map_list = [] - current_position = 0 - for idx, dataset in enumerate(source.datasets): - logging.info("Processing dataset %s", idx + 1) + height = (source.height * stripes) + sum(spacing) + shape = (source.frames, height, source.width) - v_source = h5.VirtualSource(dataset, source_node, shape=source_shape) + return VDS(shape=shape, spacing=spacing) - start = current_position - stop = start + source.height + vds_data.spacing[idx] - current_position = stop + def create_vds_maps(self, source, vds_data): + """Create a list of VirtualMaps of raw data to the VDS. - v_target = vds[:, start:stop, :] - v_map = h5.VirtualMap(v_source, v_target, dtype=source.dtype) - map_list.append(v_map) + Args: + source(Source): Source attributes + vds_data(VDS): VDS attributes - return map_list + Returns: + list(VirtualMap): Maps describing links between raw data and VDS + """ + source_shape = (source.frames, source.height, source.width) + vds = h5.VirtualTarget(self.output_file, self.target_node, + shape=vds_data.shape) -def validate_node(vds_file, target_node): - """Check if it is possible to create the given node. + map_list = [] + current_position = 0 + for idx, dataset in enumerate(self.datasets): + logging.info("Processing dataset %s", idx + 1) - Check the target node is valid (no leading or trailing slashes) - Create any sub-group of the target node if it doesn't exist. + v_source = h5.VirtualSource(dataset, self.source_node, + shape=source_shape) - Args: - vds_file(h5py.File): File to check for node - target_node(str): Full path to node + start = current_position + stop = start + source.height + vds_data.spacing[idx] + current_position = stop - """ - if target_node.startswith("/") or target_node.endswith("/"): - raise ValueError("Target node should have no leading or trailing " - "slashes, got {}".format(target_node)) + v_target = vds[:, start:stop, :] + v_map = h5.VirtualMap(v_source, v_target, dtype=source.dtype) + map_list.append(v_map) - if "/" in target_node: - sub_group = target_node.rsplit("/", 1)[0] - if vds_file.get(sub_group) is None: - vds_file.create_group(sub_group) + return map_list + def validate_node(self, vds_file): + """Check if it is possible to create the given node. -def generate_vds(path, prefix=None, files=None, output=None, source=None, - source_node=None, target_node=None, - stripe_spacing=None, module_spacing=None): - """Generate a virtual dataset. - - Args: - path(str): Path to folder containing HDF5 files - prefix(str): Prefix of HDF5 files to generate from (in folder) - e.g. image_ for image_1.hdf5, image_2.hdf5, image_3.hdf5 - files(list(str)): List of files to combine. - output(str): Name of VDS file. - source(dict): Height, width, data_type and frames for source data. - Provide this to create a VDS for raw files that don't exist yet. - source_node(str): Data node in source HDF5 files - target_node(str): Data node in VDS file - stripe_spacing(int): Spacing between stripes in module - module_spacing(int): Spacing between modules - - """ - write_mode = OVERWRITE - if (prefix is None and files is None) or \ - (prefix is not None and files is not None): - raise ValueError("One, and only one, of prefix or files required.") - - if source_node is None: - source_node = SOURCE_DATA_NODE - if target_node is None: - target_node = TARGET_DATA_NODE - - if files is None: - file_paths = find_files(path, prefix) - files = [path_.split("/")[-1] for path_ in file_paths] - else: - file_paths = [os.path.join(path, file_) for file_ in files] - prefix = os.path.commonprefix(files) + Check the target node is valid (no leading or trailing slashes) + Create any sub-group of the target node if it doesn't exist. - if output is None: - vds_name = construct_vds_name(prefix, files) - else: - vds_name = output - - output_file = os.path.abspath(os.path.join(path, vds_name)) - if os.path.isfile(output_file): - with h5.File(output_file, "r", libver="latest") as vds_file: - node = vds_file.get(target_node) - if node is not None: - raise IOError("VDS {file} already has an entry for node " - "{node}".format(file=output_file, - node=target_node)) - else: - write_mode = APPEND - - file_names = [file_.split('/')[-1] for file_ in file_paths] - logging.info("Combining datasets %s into %s", - ", ".join(file_names), vds_name) - - if source is None: - for file_ in file_paths: - if not os.path.isfile(file_): - raise IOError("File {} does not exist. To create VDS from raw " - "files that haven't been created yet, source " - "must be provided.".format(file_)) - source_metadata = process_source_datasets(file_paths, source_node) - else: - source_metadata = Source( - frames=source['frames'], height=source['height'], - width=source['width'], dtype=source['dtype'], datasets=file_paths) + Args: + vds_file(h5py.File): File to check for node - vds_data = construct_vds_metadata(source_metadata, output_file, - stripe_spacing, module_spacing) - map_list = create_vds_maps(source_metadata, vds_data, - source_node, target_node) + """ + if self.target_node.startswith("/") or self.target_node.endswith("/"): + raise ValueError("Target node should have no leading or trailing " + "slashes, got {}".format(self.target_node)) - logging.info("Creating VDS at %s", output_file) - with h5.File(output_file, write_mode, libver="latest") as vds_file: - validate_node(vds_file, target_node) - vds_file.create_virtual_dataset(VMlist=map_list, fill_value=0x1) - - logging.info("Creation successful!") + if "/" in self.target_node: + sub_group = self.target_node.rsplit("/", 1)[0] + if vds_file.get(sub_group) is None: + vds_file.create_group(sub_group) def main(): @@ -354,12 +351,16 @@ def main(): else: source_metadata = None - generate_vds(args.path, - prefix=args.prefix, files=args.files, output=args.output, - source=source_metadata, - source_node=args.source_node, target_node=args.target_node, - stripe_spacing=args.stripe_spacing, - module_spacing=args.module_spacing) + gen = VDSGenerator(args.path, + prefix=args.prefix, files=args.files, + output=args.output, + source=source_metadata, + source_node=args.source_node, + target_node=args.target_node, + stripe_spacing=args.stripe_spacing, + module_spacing=args.module_spacing) + + gen.generate_vds() if __name__ == "__main__": sys.exit(main()) From 2eb97903601a99d9141581ff248e5db333d00120 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Mon, 13 Mar 2017 12:03:58 +0000 Subject: [PATCH 19/28] Seperate CLI app and class into two files --- tests/app_test.py | 147 +++++++++++++++++++++++++++++++++++++++++++ tests/vdsgen_test.py | 137 ---------------------------------------- vdsgen/app.py | 95 ++++++++++++++++++++++++++++ vdsgen/vdsgen.py | 91 --------------------------- 4 files changed, 242 insertions(+), 228 deletions(-) create mode 100644 tests/app_test.py create mode 100644 vdsgen/app.py diff --git a/tests/app_test.py b/tests/app_test.py new file mode 100644 index 0000000..c53aa67 --- /dev/null +++ b/tests/app_test.py @@ -0,0 +1,147 @@ +import unittest +from pkg_resources import require + +require("mock") +from mock import MagicMock, patch, call + +from vdsgen import app + +parser_patch_path = "argparse.ArgumentParser" +app_patch_path = "vdsgen.app" +VDSGenerator_patch_path = app_patch_path + ".VDSGenerator" + + +class ParseArgsTest(unittest.TestCase): + + @patch(parser_patch_path + '.add_mutually_exclusive_group') + @patch(parser_patch_path + '.add_argument_group') + @patch(parser_patch_path + '.add_argument') + @patch(parser_patch_path + '.parse_args', + return_value=MagicMock(empty=False, files=None)) + def test_parser(self, parse_mock, add_mock, add_group_mock, + add_exclusive_group_mock): + group_mock = add_group_mock.return_value + exclusive_group_mock = add_exclusive_group_mock.return_value + + args = app.parse_args() + + add_mock.assert_has_calls( + [call("path", type=str, + help="Root folder to create VDS in. Also where source " + "files are searched for if --prefix given."), + call("-o", "--output", type=str, default=None, dest="output", + help="Output file name. Default is input file prefix with " + "vds suffix."), + call("-e", "--empty", action="store_true", dest="empty", + help="Make empty VDS pointing to datasets " + "that don't exist, yet."), + call("-s", "--stripe_spacing", nargs="?", type=int, default=None, + dest="stripe_spacing", + help="Spacing between two stripes in a module."), + call("-m", "--module_spacing", nargs="?", type=int, default=None, + dest="module_spacing", + help="Spacing between two modules."), + call("--source_node", nargs="?", type=str, default=None, + dest="source_node", + help="Data node in source HDF5 files."), + call("--target_node", nargs="?", type=str, default=None, + dest="target_node", + help="Data node in VDS file.")]) + + add_group_mock.assert_called_with() + group_mock.add_argument.assert_has_calls( + [call("--frames", type=int, default=1, dest="frames", + help="Number of frames to combine into VDS."), + call("--height", type=int, default=256, dest="height", + help="Height of raw datasets."), + call("--width", type=int, default=1024, dest="width", + help="Width of raw datasets."), + call("--data_type", type=str, default="uint16", dest="data_type", + help="Data type of raw datasets.")]) + + add_exclusive_group_mock.assert_called_with(required=True) + exclusive_group_mock.add_argument.assert_has_calls( + [call("-p", "--prefix", type=str, default=None, dest="prefix", + help="Prefix of files - e.g 'stripe_' to combine the images " + "'stripe_1.hdf5', 'stripe_2.hdf5' and 'stripe_3.hdf5' " + "located at ."), + call("-f", "--files", nargs="*", type=str, default=None, + dest="files", + help="Manually define files to combine.")]) + + parse_mock.assert_called_once_with() + self.assertEqual(parse_mock.return_value, args) + + @patch(parser_patch_path + '.error') + @patch(parser_patch_path + '.parse_args', + return_value=MagicMock(empty=True, files=None)) + def test_empty_and_not_files_then_error(self, parse_mock, error_mock): + + app.parse_args() + + error_mock.assert_called_once_with( + "To make an empty VDS you must explicitly define --files for the " + "eventual raw datasets.") + + @patch(parser_patch_path + '.error') + @patch(parser_patch_path + '.parse_args', + return_value=MagicMock(empty=True, files=["file"])) + def test_only_one_file_then_error(self, parse_mock, error_mock): + + app.parse_args() + + error_mock.assert_called_once_with( + "Must define at least two files to combine.") + + +class MainTest(unittest.TestCase): + @patch(VDSGenerator_patch_path) + @patch(app_patch_path + '.parse_args', + return_value=MagicMock( + path="/test/path", prefix="stripe_", empty=True, + files=["file1.hdf5", "file2.hdf5"], output="vds", + frames=3, height=256, width=2048, data_type="int16", + source_node="data", target_node="full_frame", + stripe_spacing=3, module_spacing=127)) + def test_main_empty(self, parse_mock, init_mock): + gen_mock = init_mock.return_value + args_mock = parse_mock.return_value + + app.main() + + parse_mock.assert_called_once_with() + init_mock.assert_called_once_with( + args_mock.path, + prefix=args_mock.prefix, files=args_mock.files, + output=args_mock.output, + source=dict(frames=args_mock.frames, height=args_mock.height, + width=args_mock.width, dtype=args_mock.data_type), + source_node=args_mock.source_node, + target_node=args_mock.target_node, + stripe_spacing=args_mock.stripe_spacing, + module_spacing=args_mock.module_spacing) + + gen_mock.generate_vds.assert_called_once_with() + + @patch(VDSGenerator_patch_path) + @patch(app_patch_path + '.parse_args', + return_value=MagicMock( + path="/test/path", prefix="stripe_", empty=False, + files=["file1.hdf5", "file2.hdf5"], output="vds", + frames=3, height=256, width=2048, data_type="int16", + source_node="data", target_node="full_frame", + stripe_spacing=3, module_spacing=127)) + def test_main_not_empty(self, parse_mock, generate_mock): + args_mock = parse_mock.return_value + + app.main() + + parse_mock.assert_called_once_with() + generate_mock.assert_called_once_with( + args_mock.path, + prefix=args_mock.prefix, output="vds", files=args_mock.files, + source=None, + source_node=args_mock.source_node, + stripe_spacing=args_mock.stripe_spacing, + target_node=args_mock.target_node, + module_spacing=args_mock.module_spacing) \ No newline at end of file diff --git a/tests/vdsgen_test.py b/tests/vdsgen_test.py index 21933c4..c6b0321 100644 --- a/tests/vdsgen_test.py +++ b/tests/vdsgen_test.py @@ -11,7 +11,6 @@ vdsgen_patch_path = "vdsgen.vdsgen" VDSGenerator_patch_path = vdsgen_patch_path + ".VDSGenerator" -parser_patch_path = "argparse.ArgumentParser" h5py_patch_path = "h5py" sys.path.append(os.path.join(os.path.dirname(__file__), "..", "..", "h5py")) @@ -31,88 +30,6 @@ def __init__(self, **kwargs): self.__setattr__(attribute, value) -class ParseArgsTest(unittest.TestCase): - - @patch(parser_patch_path + '.add_mutually_exclusive_group') - @patch(parser_patch_path + '.add_argument_group') - @patch(parser_patch_path + '.add_argument') - @patch(parser_patch_path + '.parse_args', - return_value=MagicMock(empty=False, files=None)) - def test_parser(self, parse_mock, add_mock, add_group_mock, - add_exclusive_group_mock): - group_mock = add_group_mock.return_value - exclusive_group_mock = add_exclusive_group_mock.return_value - - args = vdsgen.parse_args() - - add_mock.assert_has_calls( - [call("path", type=str, - help="Root folder to create VDS in. Also where source " - "files are searched for if --prefix given."), - call("-o", "--output", type=str, default=None, dest="output", - help="Output file name. Default is input file prefix with " - "vds suffix."), - call("-e", "--empty", action="store_true", dest="empty", - help="Make empty VDS pointing to datasets " - "that don't exist, yet."), - call("-s", "--stripe_spacing", nargs="?", type=int, default=None, - dest="stripe_spacing", - help="Spacing between two stripes in a module."), - call("-m", "--module_spacing", nargs="?", type=int, default=None, - dest="module_spacing", - help="Spacing between two modules."), - call("--source_node", nargs="?", type=str, default=None, - dest="source_node", - help="Data node in source HDF5 files."), - call("--target_node", nargs="?", type=str, default=None, - dest="target_node", - help="Data node in VDS file.")]) - - add_group_mock.assert_called_with() - group_mock.add_argument.assert_has_calls( - [call("--frames", type=int, default=1, dest="frames", - help="Number of frames to combine into VDS."), - call("--height", type=int, default=256, dest="height", - help="Height of raw datasets."), - call("--width", type=int, default=1024, dest="width", - help="Width of raw datasets."), - call("--data_type", type=str, default="uint16", dest="data_type", - help="Data type of raw datasets.")]) - - add_exclusive_group_mock.assert_called_with(required=True) - exclusive_group_mock.add_argument.assert_has_calls( - [call("-p", "--prefix", type=str, default=None, dest="prefix", - help="Prefix of files - e.g 'stripe_' to combine the images " - "'stripe_1.hdf5', 'stripe_2.hdf5' and 'stripe_3.hdf5' " - "located at ."), - call("-f", "--files", nargs="*", type=str, default=None, - dest="files", - help="Manually define files to combine.")]) - - parse_mock.assert_called_once_with() - self.assertEqual(parse_mock.return_value, args) - - @patch(parser_patch_path + '.error') - @patch(parser_patch_path + '.parse_args', - return_value=MagicMock(empty=True, files=None)) - def test_empty_and_not_files_then_error(self, parse_mock, error_mock): - - vdsgen.parse_args() - - error_mock.assert_called_once_with( - "To make an empty VDS you must explicitly define --files for the " - "eventual raw datasets.") - - @patch(parser_patch_path + '.error') - @patch(parser_patch_path + '.parse_args', - return_value=MagicMock(empty=True, files=["file"])) - def test_only_one_file_then_error(self, parse_mock, error_mock): - vdsgen.parse_args() - - error_mock.assert_called_once_with( - "Must define at least two files to combine.") - - class VDSGeneratorInitTest(unittest.TestCase): @patch('os.path.isfile', return_value=True) @@ -446,57 +363,3 @@ def test_generate_vds_node_exists_then_error(self, h5file_mock, with self.assertRaises(IOError): gen.generate_vds() - - -class MainTest(unittest.TestCase): - - @patch(VDSGenerator_patch_path) - @patch(vdsgen_patch_path + '.parse_args', - return_value=MagicMock( - path="/test/path", prefix="stripe_", empty=True, - files=["file1.hdf5", "file2.hdf5"], output="vds", - frames=3, height=256, width=2048, data_type="int16", - source_node="data", target_node="full_frame", - stripe_spacing=3, module_spacing=127)) - def test_main_empty(self, parse_mock, init_mock): - gen_mock = init_mock.return_value - args_mock = parse_mock.return_value - - vdsgen.main() - - parse_mock.assert_called_once_with() - init_mock.assert_called_once_with( - args_mock.path, - prefix=args_mock.prefix, files=args_mock.files, - output=args_mock.output, - source=dict(frames=args_mock.frames, height=args_mock.height, - width=args_mock.width, dtype=args_mock.data_type), - source_node=args_mock.source_node, - target_node=args_mock.target_node, - stripe_spacing=args_mock.stripe_spacing, - module_spacing=args_mock.module_spacing) - - gen_mock.generate_vds.assert_called_once_with() - - @patch(VDSGenerator_patch_path) - @patch(vdsgen_patch_path + '.parse_args', - return_value=MagicMock( - path="/test/path", prefix="stripe_", empty=False, - files=["file1.hdf5", "file2.hdf5"], output="vds", - frames=3, height=256, width=2048, data_type="int16", - source_node="data", target_node="full_frame", - stripe_spacing=3, module_spacing=127)) - def test_main_not_empty(self, parse_mock, generate_mock): - args_mock = parse_mock.return_value - - vdsgen.main() - - parse_mock.assert_called_once_with() - generate_mock.assert_called_once_with( - args_mock.path, - prefix=args_mock.prefix, output="vds", files=args_mock.files, - source=None, - source_node=args_mock.source_node, - stripe_spacing=args_mock.stripe_spacing, - target_node=args_mock.target_node, - module_spacing=args_mock.module_spacing) diff --git a/vdsgen/app.py b/vdsgen/app.py new file mode 100644 index 0000000..3a6426e --- /dev/null +++ b/vdsgen/app.py @@ -0,0 +1,95 @@ +import sys +from argparse import ArgumentParser + +from vdsgen import VDSGenerator + + +def parse_args(): + """Parse command line arguments.""" + parser = ArgumentParser() + parser.add_argument("path", type=str, + help="Root folder to create VDS in. Also where source " + "files are searched for if --prefix given.") + + # Definition of file names in - Common prefix or explicit list + file_definition = parser.add_mutually_exclusive_group(required=True) + file_definition.add_argument( + "-p", "--prefix", type=str, default=None, dest="prefix", + help="Prefix of files - e.g 'stripe_' to combine the images " + "'stripe_1.hdf5', 'stripe_2.hdf5' and 'stripe_3.hdf5' located " + "at .") + file_definition.add_argument( + "-f", "--files", nargs="*", type=str, default=None, dest="files", + help="Manually define files to combine.") + parser.add_argument( + "-o", "--output", type=str, default=None, dest="output", + help="Output file name. Default is input file prefix with vds suffix.") + + # Arguments required to allow VDS to be created before raw files exist + parser.add_argument( + "-e", "--empty", action="store_true", dest="empty", + help="Make empty VDS pointing to datasets that don't exist, yet.") + source_metadata = parser.add_argument_group() + source_metadata.add_argument( + "--frames", type=int, default=1, dest="frames", + help="Number of frames to combine into VDS.") + source_metadata.add_argument( + "--height", type=int, default=256, dest="height", + help="Height of raw datasets.") + source_metadata.add_argument( + "--width", type=int, default=1024, dest="width", + help="Width of raw datasets.") + source_metadata.add_argument( + "--data_type", type=str, default="uint16", dest="data_type", + help="Data type of raw datasets.") + + # Arguments to override defaults + parser.add_argument("-s", "--stripe_spacing", nargs="?", type=int, + default=None, dest="stripe_spacing", + help="Spacing between two stripes in a module.") + parser.add_argument("-m", "--module_spacing", nargs="?", type=int, + default=None, dest="module_spacing", + help="Spacing between two modules.") + parser.add_argument("--source_node", nargs="?", type=str, default=None, + dest="source_node", + help="Data node in source HDF5 files.") + parser.add_argument("--target_node", nargs="?", type=str, default=None, + dest="target_node", + help="Data node in VDS file.") + + args = parser.parse_args() + + if args.empty and args.files is None: + parser.error( + "To make an empty VDS you must explicitly define --files for the " + "eventual raw datasets.") + if args.files is not None and len(args.files) < 2: + parser.error("Must define at least two files to combine.") + + return args + + +def main(): + """Run program.""" + args = parse_args() + + if args.empty: + source_metadata = dict(frames=args.frames, height=args.height, + width=args.width, dtype=args.data_type) + else: + source_metadata = None + + gen = VDSGenerator(args.path, + prefix=args.prefix, files=args.files, + output=args.output, + source=source_metadata, + source_node=args.source_node, + target_node=args.target_node, + stripe_spacing=args.stripe_spacing, + module_spacing=args.module_spacing) + + gen.generate_vds() + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/vdsgen/vdsgen.py b/vdsgen/vdsgen.py index 4e1ab7b..a2807c3 100644 --- a/vdsgen/vdsgen.py +++ b/vdsgen/vdsgen.py @@ -3,7 +3,6 @@ import os import sys -from argparse import ArgumentParser import re import logging @@ -17,71 +16,6 @@ VDS = namedtuple("VDS", ["shape", "spacing"]) -def parse_args(): - """Parse command line arguments.""" - parser = ArgumentParser() - parser.add_argument("path", type=str, - help="Root folder to create VDS in. Also where source " - "files are searched for if --prefix given.") - - # Definition of file names in - Common prefix or explicit list - file_definition = parser.add_mutually_exclusive_group(required=True) - file_definition.add_argument( - "-p", "--prefix", type=str, default=None, dest="prefix", - help="Prefix of files - e.g 'stripe_' to combine the images " - "'stripe_1.hdf5', 'stripe_2.hdf5' and 'stripe_3.hdf5' located " - "at .") - file_definition.add_argument( - "-f", "--files", nargs="*", type=str, default=None, dest="files", - help="Manually define files to combine.") - parser.add_argument( - "-o", "--output", type=str, default=None, dest="output", - help="Output file name. Default is input file prefix with vds suffix.") - - # Arguments required to allow VDS to be created before raw files exist - parser.add_argument( - "-e", "--empty", action="store_true", dest="empty", - help="Make empty VDS pointing to datasets that don't exist, yet.") - source_metadata = parser.add_argument_group() - source_metadata.add_argument( - "--frames", type=int, default=1, dest="frames", - help="Number of frames to combine into VDS.") - source_metadata.add_argument( - "--height", type=int, default=256, dest="height", - help="Height of raw datasets.") - source_metadata.add_argument( - "--width", type=int, default=1024, dest="width", - help="Width of raw datasets.") - source_metadata.add_argument( - "--data_type", type=str, default="uint16", dest="data_type", - help="Data type of raw datasets.") - - # Arguments to override defaults - parser.add_argument("-s", "--stripe_spacing", nargs="?", type=int, - default=None, dest="stripe_spacing", - help="Spacing between two stripes in a module.") - parser.add_argument("-m", "--module_spacing", nargs="?", type=int, - default=None, dest="module_spacing", - help="Spacing between two modules.") - parser.add_argument("--source_node", nargs="?", type=str, default=None, - dest="source_node", - help="Data node in source HDF5 files.") - parser.add_argument("--target_node", nargs="?", type=str, default=None, - dest="target_node", - help="Data node in VDS file.") - - args = parser.parse_args() - - if args.empty and args.files is None: - parser.error( - "To make an empty VDS you must explicitly define --files for the " - "eventual raw datasets.") - if args.files is not None and len(args.files) < 2: - parser.error("Must define at least two files to combine.") - - return args - - class VDSGenerator(object): """A class to generate Virtual Datasets from raw HDF5 files.""" @@ -339,28 +273,3 @@ def validate_node(self, vds_file): sub_group = self.target_node.rsplit("/", 1)[0] if vds_file.get(sub_group) is None: vds_file.create_group(sub_group) - - -def main(): - """Run program.""" - args = parse_args() - - if args.empty: - source_metadata = dict(frames=args.frames, height=args.height, - width=args.width, dtype=args.data_type) - else: - source_metadata = None - - gen = VDSGenerator(args.path, - prefix=args.prefix, files=args.files, - output=args.output, - source=source_metadata, - source_node=args.source_node, - target_node=args.target_node, - stripe_spacing=args.stripe_spacing, - module_spacing=args.module_spacing) - - gen.generate_vds() - -if __name__ == "__main__": - sys.exit(main()) From b19f9c03ebfc6a5f118dcc60482bfeef416b9b4c Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Mon, 13 Mar 2017 13:02:33 +0000 Subject: [PATCH 20/28] Rename file to match contained class --- .../{vdsgen_test.py => vdsgenerator_test.py} | 26 +++++++++---------- vdsgen/__init__.py | 2 +- vdsgen/app.py | 2 +- vdsgen/{vdsgen.py => vdsgenerator.py} | 0 4 files changed, 15 insertions(+), 15 deletions(-) rename tests/{vdsgen_test.py => vdsgenerator_test.py} (94%) rename vdsgen/{vdsgen.py => vdsgenerator.py} (100%) diff --git a/tests/vdsgen_test.py b/tests/vdsgenerator_test.py similarity index 94% rename from tests/vdsgen_test.py rename to tests/vdsgenerator_test.py index c6b0321..90d43f3 100644 --- a/tests/vdsgen_test.py +++ b/tests/vdsgenerator_test.py @@ -6,10 +6,10 @@ require("mock") from mock import MagicMock, patch, call -from vdsgen import vdsgen -from vdsgen.vdsgen import VDSGenerator +from vdsgen import vdsgenerator +from vdsgen.vdsgenerator import VDSGenerator -vdsgen_patch_path = "vdsgen.vdsgen" +vdsgen_patch_path = "vdsgen.vdsgenerator" VDSGenerator_patch_path = vdsgen_patch_path + ".VDSGenerator" h5py_patch_path = "h5py" @@ -65,7 +65,7 @@ def test_generate_vds_given_args(self): files = ["stripe_1.h5", "stripe_2.h5"] file_paths = ["/test/path/" + file_ for file_ in files] source_dict = dict(frames=3, height=256, width=2048, dtype="int16") - source = vdsgen.Source(frames=3, height=256, width=2048, dtype="int16") + source = vdsgenerator.Source(frames=3, height=256, width=2048, dtype="int16") gen = VDSGenerator("/test/path", files=files, @@ -183,8 +183,8 @@ def test_grab_metadata(self, h5file_mock): return_value=dict(frames=3, height=256, width=2048, dtype="uint16")) def test_process_source_datasets_given_valid_data(self, grab_mock): gen = VDSGeneratorTester(datasets=["stripe_1.h5", "stripe_2.h5"]) - expected_source = vdsgen.Source(frames=3, height=256, width=2048, - dtype="uint16") + expected_source = vdsgenerator.Source(frames=3, height=256, width=2048, + dtype="uint16") source = gen.process_source_datasets() @@ -206,10 +206,10 @@ def test_process_source_datasets_given_mismatched_data(self, grab_mock): def test_construct_vds_metadata(self): gen = VDSGeneratorTester(datasets=[""] * 6, stripe_spacing=10, module_spacing=100) - source = vdsgen.Source(frames=3, height=256, width=2048, - dtype="uint16") - expected_vds = vdsgen.VDS(shape=(3, 1766, 2048), - spacing=[10, 100, 10, 100, 10, 0]) + source = vdsgenerator.Source(frames=3, height=256, width=2048, + dtype="uint16") + expected_vds = vdsgenerator.VDS(shape=(3, 1766, 2048), + spacing=[10, 100, 10, 100, 10, 0]) vds = gen.construct_vds_metadata(source) @@ -223,9 +223,9 @@ def test_create_vds_maps(self, target_mock, source_mock, map_mock): stripe_spacing=10, module_spacing=100, target_node="full_frame", source_node="data", datasets=["source"] * 6) - source = vdsgen.Source(frames=3, height=256, width=2048, - dtype="uint16") - vds = vdsgen.VDS(shape=(3, 1586, 2048), spacing=[10] * 5 + [0]) + source = vdsgenerator.Source(frames=3, height=256, width=2048, + dtype="uint16") + vds = vdsgenerator.VDS(shape=(3, 1586, 2048), spacing=[10] * 5 + [0]) map_list = gen.create_vds_maps(source, vds) diff --git a/vdsgen/__init__.py b/vdsgen/__init__.py index 7b7928f..a2f2240 100644 --- a/vdsgen/__init__.py +++ b/vdsgen/__init__.py @@ -1,4 +1,4 @@ """Make VDSGenerator easy to import.""" -from vdsgen import VDSGenerator +from vdsgenerator import VDSGenerator __all__ = ["VDSGenerator"] diff --git a/vdsgen/app.py b/vdsgen/app.py index 3a6426e..881561b 100644 --- a/vdsgen/app.py +++ b/vdsgen/app.py @@ -1,7 +1,7 @@ import sys from argparse import ArgumentParser -from vdsgen import VDSGenerator +from vdsgenerator import VDSGenerator def parse_args(): diff --git a/vdsgen/vdsgen.py b/vdsgen/vdsgenerator.py similarity index 100% rename from vdsgen/vdsgen.py rename to vdsgen/vdsgenerator.py From 60f51adda0af89065259e3d67513b658e65a1194 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Mon, 20 Mar 2017 14:17:02 +0000 Subject: [PATCH 21/28] Update to allow shape as an argument, not frames, height, width This allows shape to be ND frames plus height and width --- tests/app_test.py | 15 ++++++--------- tests/vdsgenerator_test.py | 16 +++++++++------- vdsgen/app.py | 15 +++++---------- vdsgen/vdsgenerator.py | 35 ++++++++++++++++++++++++++++------- 4 files changed, 48 insertions(+), 33 deletions(-) diff --git a/tests/app_test.py b/tests/app_test.py index c53aa67..33bdc03 100644 --- a/tests/app_test.py +++ b/tests/app_test.py @@ -50,12 +50,10 @@ def test_parser(self, parse_mock, add_mock, add_group_mock, add_group_mock.assert_called_with() group_mock.add_argument.assert_has_calls( - [call("--frames", type=int, default=1, dest="frames", - help="Number of frames to combine into VDS."), - call("--height", type=int, default=256, dest="height", - help="Height of raw datasets."), - call("--width", type=int, default=1024, dest="width", - help="Width of raw datasets."), + [call("--shape", type=int, nargs="*", default=[1, 256, 2048], + dest="shape", + help="Shape of dataset - 'frames height width', where " + "frames is N dimensional."), call("--data_type", type=str, default="uint16", dest="data_type", help="Data type of raw datasets.")]) @@ -100,7 +98,7 @@ class MainTest(unittest.TestCase): return_value=MagicMock( path="/test/path", prefix="stripe_", empty=True, files=["file1.hdf5", "file2.hdf5"], output="vds", - frames=3, height=256, width=2048, data_type="int16", + shape=[3, 256, 2048], data_type="int16", source_node="data", target_node="full_frame", stripe_spacing=3, module_spacing=127)) def test_main_empty(self, parse_mock, init_mock): @@ -114,8 +112,7 @@ def test_main_empty(self, parse_mock, init_mock): args_mock.path, prefix=args_mock.prefix, files=args_mock.files, output=args_mock.output, - source=dict(frames=args_mock.frames, height=args_mock.height, - width=args_mock.width, dtype=args_mock.data_type), + source=dict(shape=args_mock.shape, dtype=args_mock.data_type), source_node=args_mock.source_node, target_node=args_mock.target_node, stripe_spacing=args_mock.stripe_spacing, diff --git a/tests/vdsgenerator_test.py b/tests/vdsgenerator_test.py index 90d43f3..a7d7a51 100644 --- a/tests/vdsgenerator_test.py +++ b/tests/vdsgenerator_test.py @@ -64,8 +64,9 @@ def test_generate_vds_defaults(self, find_mock, construct_mock, def test_generate_vds_given_args(self): files = ["stripe_1.h5", "stripe_2.h5"] file_paths = ["/test/path/" + file_ for file_ in files] - source_dict = dict(frames=3, height=256, width=2048, dtype="int16") - source = vdsgenerator.Source(frames=3, height=256, width=2048, dtype="int16") + source_dict = dict(shape=(3, 256, 2048), dtype="int16") + source = vdsgenerator.Source(frames=(3,), height=256, width=2048, + dtype="int16") gen = VDSGenerator("/test/path", files=files, @@ -172,7 +173,7 @@ def test_generate_vds_name(self): @patch(h5py_patch_path + '.File', return_value=mock_data) def test_grab_metadata(self, h5file_mock): gen = VDSGeneratorTester(source_node="data") - expected_data = dict(frames=3, height=256, width=2048, dtype="uint16") + expected_data = dict(frames=(3,), height=256, width=2048, dtype="uint16") meta_data = gen.grab_metadata("/test/path/stripe.hdf5") @@ -180,10 +181,11 @@ def test_grab_metadata(self, h5file_mock): self.assertEqual(expected_data, meta_data) @patch(VDSGenerator_patch_path + '.grab_metadata', - return_value=dict(frames=3, height=256, width=2048, dtype="uint16")) + return_value=dict(frames=(3,), height=256, width=2048, dtype="uint16")) def test_process_source_datasets_given_valid_data(self, grab_mock): gen = VDSGeneratorTester(datasets=["stripe_1.h5", "stripe_2.h5"]) - expected_source = vdsgenerator.Source(frames=3, height=256, width=2048, + expected_source = vdsgenerator.Source(frames=(3,), height=256, + width=2048, dtype="uint16") source = gen.process_source_datasets() @@ -206,7 +208,7 @@ def test_process_source_datasets_given_mismatched_data(self, grab_mock): def test_construct_vds_metadata(self): gen = VDSGeneratorTester(datasets=[""] * 6, stripe_spacing=10, module_spacing=100) - source = vdsgenerator.Source(frames=3, height=256, width=2048, + source = vdsgenerator.Source(frames=(3,), height=256, width=2048, dtype="uint16") expected_vds = vdsgenerator.VDS(shape=(3, 1766, 2048), spacing=[10, 100, 10, 100, 10, 0]) @@ -223,7 +225,7 @@ def test_create_vds_maps(self, target_mock, source_mock, map_mock): stripe_spacing=10, module_spacing=100, target_node="full_frame", source_node="data", datasets=["source"] * 6) - source = vdsgenerator.Source(frames=3, height=256, width=2048, + source = vdsgenerator.Source(frames=(3,), height=256, width=2048, dtype="uint16") vds = vdsgenerator.VDS(shape=(3, 1586, 2048), spacing=[10] * 5 + [0]) diff --git a/vdsgen/app.py b/vdsgen/app.py index 881561b..ea7ce62 100644 --- a/vdsgen/app.py +++ b/vdsgen/app.py @@ -31,14 +31,9 @@ def parse_args(): help="Make empty VDS pointing to datasets that don't exist, yet.") source_metadata = parser.add_argument_group() source_metadata.add_argument( - "--frames", type=int, default=1, dest="frames", - help="Number of frames to combine into VDS.") - source_metadata.add_argument( - "--height", type=int, default=256, dest="height", - help="Height of raw datasets.") - source_metadata.add_argument( - "--width", type=int, default=1024, dest="width", - help="Width of raw datasets.") + "--shape", type=int, nargs="*", default=[1, 256, 2048], dest="shape", + help="Shape of dataset - 'frames height width', where frames is N " + "dimensional.") source_metadata.add_argument( "--data_type", type=str, default="uint16", dest="data_type", help="Data type of raw datasets.") @@ -58,6 +53,7 @@ def parse_args(): help="Data node in VDS file.") args = parser.parse_args() + args.shape = tuple(args.shape) if args.empty and args.files is None: parser.error( @@ -74,8 +70,7 @@ def main(): args = parse_args() if args.empty: - source_metadata = dict(frames=args.frames, height=args.height, - width=args.width, dtype=args.data_type) + source_metadata = dict(shape=args.shape, dtype=args.data_type) else: source_metadata = None diff --git a/vdsgen/vdsgenerator.py b/vdsgen/vdsgenerator.py index a2807c3..a464b1b 100644 --- a/vdsgen/vdsgenerator.py +++ b/vdsgen/vdsgenerator.py @@ -2,7 +2,6 @@ """A CLI tool for generating virtual datasets from individual HDF5 files.""" import os -import sys import re import logging @@ -23,6 +22,7 @@ class VDSGenerator(object): # Constants CREATE = "w" # Will overwrite any existing file APPEND = "a" + FULL_SLICE = slice(None) # Default Values stripe_spacing = 10 # Pixel spacing between stripes in a module @@ -92,12 +92,31 @@ def __init__(self, path, prefix=None, files=None, output=None, source=None, self.source_metadata = self.process_source_datasets() # Else, store given source metadata else: + frames, height, width = self.parse_shape(source['shape']) self.source_metadata = Source( - frames=source['frames'], height=source['height'], - width=source['width'], dtype=source['dtype']) + frames=frames, height=height, width=width, + dtype=source['dtype']) self.output_file = os.path.abspath(os.path.join(self.path, self.name)) + @staticmethod + def parse_shape(shape): + """Split shape into height, width and frames. + + Args: + shape(tuple): Shape of dataset + + Returns: + frames, height, width + + """ + # The last two elements of shape are the height and width of the image + height, width = shape[-2:] + # Everything before that is the frames for each axis + frames = shape[:-2] + + return frames, height, width + def generate_vds(self): """Generate a virtual dataset.""" if os.path.isfile(self.output_file): @@ -174,7 +193,7 @@ def grab_metadata(self, file_path): """ h5_data = h5.File(file_path, 'r')[self.source_node] - frames, height, width = h5_data.shape + frames, height, width = self.parse_shape(h5_data.shape) data_type = h5_data.dtype return dict(frames=frames, height=height, width=width, dtype=data_type) @@ -218,7 +237,7 @@ def construct_vds_metadata(self, source): spacing[-1] = 0 height = (source.height * stripes) + sum(spacing) - shape = (source.frames, height, source.width) + shape = source.frames + (height, source.width) return VDS(shape=shape, spacing=spacing) @@ -233,7 +252,7 @@ def create_vds_maps(self, source, vds_data): list(VirtualMap): Maps describing links between raw data and VDS """ - source_shape = (source.frames, source.height, source.width) + source_shape = source.frames + (source.height, source.width) vds = h5.VirtualTarget(self.output_file, self.target_node, shape=vds_data.shape) @@ -249,7 +268,9 @@ def create_vds_maps(self, source, vds_data): stop = start + source.height + vds_data.spacing[idx] current_position = stop - v_target = vds[:, start:stop, :] + index = tuple([self.FULL_SLICE] * len(source.frames) + + [slice(start, stop)] + [self.FULL_SLICE]) + v_target = vds[index] v_map = h5.VirtualMap(v_source, v_target, dtype=source.dtype) map_list.append(v_map) From d5802a08b81422d365e9d7da6b89d1b32668240c Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Tue, 21 Mar 2017 10:09:37 +0000 Subject: [PATCH 22/28] Make command line arguments more clear --- tests/app_test.py | 56 +++++++++++++++++++++++------------------------ vdsgen/app.py | 55 +++++++++++++++++++++++----------------------- 2 files changed, 55 insertions(+), 56 deletions(-) diff --git a/tests/app_test.py b/tests/app_test.py index 33bdc03..258c594 100644 --- a/tests/app_test.py +++ b/tests/app_test.py @@ -20,21 +20,40 @@ class ParseArgsTest(unittest.TestCase): return_value=MagicMock(empty=False, files=None)) def test_parser(self, parse_mock, add_mock, add_group_mock, add_exclusive_group_mock): - group_mock = add_group_mock.return_value + empty_mock = MagicMock() + other_mock = MagicMock() + add_group_mock.side_effect = [None, None, empty_mock, other_mock] exclusive_group_mock = add_exclusive_group_mock.return_value args = app.parse_args() - add_mock.assert_has_calls( - [call("path", type=str, - help="Root folder to create VDS in. Also where source " - "files are searched for if --prefix given."), - call("-o", "--output", type=str, default=None, dest="output", + add_exclusive_group_mock.assert_called_with(required=True) + exclusive_group_mock.add_argument.assert_has_calls( + [call("-p", "--prefix", type=str, default=None, dest="prefix", + help="Prefix of files to search for - e.g 'stripe_' " + "to combine 'stripe_1.hdf5' and 'stripe_2.hdf5'."), + call("-f", "--files", nargs="*", type=str, default=None, + dest="files", + help="Explicit names of raw files in .")]) + + add_mock.assert_called_with( + "path", type=str, help="Root folder of source files and VDS.") + + add_group_mock.assert_has_calls([call()] * 2) + empty_mock.add_argument.assert_has_calls( + [call("-e", "--empty", action="store_true", dest="empty", + help="Make empty VDS pointing to datasets " + "that don't exist yet."), + call("--shape", type=int, nargs="*", default=[1, 256, 2048], + dest="shape", + help="Shape of dataset - 'frames height width', where " + "frames is N dimensional."), + call("--data_type", type=str, default="uint16", dest="data_type", + help="Data type of raw datasets.")]) + other_mock.add_argument.assert_has_calls( + [call("-o", "--output", type=str, default=None, dest="output", help="Output file name. Default is input file prefix with " "vds suffix."), - call("-e", "--empty", action="store_true", dest="empty", - help="Make empty VDS pointing to datasets " - "that don't exist, yet."), call("-s", "--stripe_spacing", nargs="?", type=int, default=None, dest="stripe_spacing", help="Spacing between two stripes in a module."), @@ -48,25 +67,6 @@ def test_parser(self, parse_mock, add_mock, add_group_mock, dest="target_node", help="Data node in VDS file.")]) - add_group_mock.assert_called_with() - group_mock.add_argument.assert_has_calls( - [call("--shape", type=int, nargs="*", default=[1, 256, 2048], - dest="shape", - help="Shape of dataset - 'frames height width', where " - "frames is N dimensional."), - call("--data_type", type=str, default="uint16", dest="data_type", - help="Data type of raw datasets.")]) - - add_exclusive_group_mock.assert_called_with(required=True) - exclusive_group_mock.add_argument.assert_has_calls( - [call("-p", "--prefix", type=str, default=None, dest="prefix", - help="Prefix of files - e.g 'stripe_' to combine the images " - "'stripe_1.hdf5', 'stripe_2.hdf5' and 'stripe_3.hdf5' " - "located at ."), - call("-f", "--files", nargs="*", type=str, default=None, - dest="files", - help="Manually define files to combine.")]) - parse_mock.assert_called_once_with() self.assertEqual(parse_mock.return_value, args) diff --git a/vdsgen/app.py b/vdsgen/app.py index ea7ce62..3d769ee 100644 --- a/vdsgen/app.py +++ b/vdsgen/app.py @@ -7,50 +7,49 @@ def parse_args(): """Parse command line arguments.""" parser = ArgumentParser() - parser.add_argument("path", type=str, - help="Root folder to create VDS in. Also where source " - "files are searched for if --prefix given.") + parser.add_argument( + "path", type=str, help="Root folder of source files and VDS.") # Definition of file names in - Common prefix or explicit list file_definition = parser.add_mutually_exclusive_group(required=True) file_definition.add_argument( "-p", "--prefix", type=str, default=None, dest="prefix", - help="Prefix of files - e.g 'stripe_' to combine the images " - "'stripe_1.hdf5', 'stripe_2.hdf5' and 'stripe_3.hdf5' located " - "at .") + help="Prefix of files to search for - e.g 'stripe_' to combine " + "'stripe_1.hdf5' and 'stripe_2.hdf5'.") file_definition.add_argument( "-f", "--files", nargs="*", type=str, default=None, dest="files", - help="Manually define files to combine.") - parser.add_argument( - "-o", "--output", type=str, default=None, dest="output", - help="Output file name. Default is input file prefix with vds suffix.") + help="Explicit names of raw files in .") # Arguments required to allow VDS to be created before raw files exist - parser.add_argument( + empty_vds = parser.add_argument_group() + empty_vds.add_argument( "-e", "--empty", action="store_true", dest="empty", - help="Make empty VDS pointing to datasets that don't exist, yet.") - source_metadata = parser.add_argument_group() - source_metadata.add_argument( + help="Make empty VDS pointing to datasets that don't exist yet.") + empty_vds.add_argument( "--shape", type=int, nargs="*", default=[1, 256, 2048], dest="shape", help="Shape of dataset - 'frames height width', where frames is N " "dimensional.") - source_metadata.add_argument( + empty_vds.add_argument( "--data_type", type=str, default="uint16", dest="data_type", help="Data type of raw datasets.") - # Arguments to override defaults - parser.add_argument("-s", "--stripe_spacing", nargs="?", type=int, - default=None, dest="stripe_spacing", - help="Spacing between two stripes in a module.") - parser.add_argument("-m", "--module_spacing", nargs="?", type=int, - default=None, dest="module_spacing", - help="Spacing between two modules.") - parser.add_argument("--source_node", nargs="?", type=str, default=None, - dest="source_node", - help="Data node in source HDF5 files.") - parser.add_argument("--target_node", nargs="?", type=str, default=None, - dest="target_node", - help="Data node in VDS file.") + # Arguments to override defaults - each is atomic + other_args = parser.add_argument_group() + other_args.add_argument( + "-o", "--output", type=str, default=None, dest="output", + help="Output file name. Default is input file prefix with vds suffix.") + other_args.add_argument( + "-s", "--stripe_spacing", nargs="?", type=int, default=None, + dest="stripe_spacing", help="Spacing between two stripes in a module.") + other_args.add_argument( + "-m", "--module_spacing", nargs="?", type=int, default=None, + dest="module_spacing", help="Spacing between two modules.") + other_args.add_argument( + "--source_node", nargs="?", type=str, default=None, dest="source_node", + help="Data node in source HDF5 files.") + other_args.add_argument( + "--target_node", nargs="?", type=str, default=None, dest="target_node", + help="Data node in VDS file.") args = parser.parse_args() args.shape = tuple(args.shape) From dd02106ad1168b39615890873486428354718e61 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Tue, 21 Mar 2017 10:10:53 +0000 Subject: [PATCH 23/28] Update create_virtual_dataset call for latest version Change fill_value -> fillvalue --- tests/vdsgenerator_test.py | 4 ++-- vdsgen/vdsgenerator.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/vdsgenerator_test.py b/tests/vdsgenerator_test.py index a7d7a51..ec17a66 100644 --- a/tests/vdsgenerator_test.py +++ b/tests/vdsgenerator_test.py @@ -313,7 +313,7 @@ def test_generate_vds_create(self, construct_mock, create_mock, h5file_mock.assert_called_once_with( "/test/path/vds.hdf5", "w", libver="latest") vds_file_mock.create_virtual_dataset.assert_called_once_with( - VMlist=create_mock.return_value, fill_value=0x1) + VMlist=create_mock.return_value, fillvalue=0x1) @patch('os.path.isfile', return_value=True) @patch(VDSGenerator_patch_path + '.validate_node') @@ -345,7 +345,7 @@ def test_generate_vds_append(self, construct_mock, create_mock, call("/test/path/vds.hdf5", "r", libver="latest"), call("/test/path/vds.hdf5", "a", libver="latest")]) vds_file_mock.create_virtual_dataset.assert_called_once_with( - VMlist=create_mock.return_value, fill_value=0x1) + VMlist=create_mock.return_value, fillvalue=0x1) @patch('os.path.isfile', return_value=True) @patch(h5py_patch_path + '.File', return_value=file_mock) diff --git a/vdsgen/vdsgenerator.py b/vdsgen/vdsgenerator.py index a464b1b..7091ee0 100644 --- a/vdsgen/vdsgenerator.py +++ b/vdsgen/vdsgenerator.py @@ -139,7 +139,7 @@ def generate_vds(self): logging.info("Creating VDS at %s", self.output_file) with h5.File(self.output_file, self.mode, libver="latest") as vds: self.validate_node(vds) - vds.create_virtual_dataset(VMlist=map_list, fill_value=0x1) + vds.create_virtual_dataset(VMlist=map_list, fillvalue=0x1) logging.info("Creation successful!") From f1bad84d4c48d6e1a343e5a795a15ef8f6c87a5a Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Tue, 21 Mar 2017 10:28:56 +0000 Subject: [PATCH 24/28] Tidy up existing vds node check --- vdsgen/vdsgenerator.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/vdsgen/vdsgenerator.py b/vdsgen/vdsgenerator.py index 7091ee0..216e1af 100644 --- a/vdsgen/vdsgenerator.py +++ b/vdsgen/vdsgenerator.py @@ -22,6 +22,7 @@ class VDSGenerator(object): # Constants CREATE = "w" # Will overwrite any existing file APPEND = "a" + READ = "r" FULL_SLICE = slice(None) # Default Values @@ -120,14 +121,14 @@ def parse_shape(shape): def generate_vds(self): """Generate a virtual dataset.""" if os.path.isfile(self.output_file): - with h5.File(self.output_file, "r", libver="latest") as vds: + with h5.File(self.output_file, self.READ, libver="latest") as vds: node = vds.get(self.target_node) - if node is not None: - raise IOError("VDS {file} already has an entry for node " - "{node}".format(file=self.output_file, - node=self.target_node)) - else: - self.mode = self.APPEND + if node is not None: + raise IOError("VDS {file} already has an entry for node " + "{node}".format(file=self.output_file, + node=self.target_node)) + else: + self.mode = self.APPEND file_names = [file_.split('/')[-1] for file_ in self.datasets] logging.info("Combining datasets %s into %s", From fcf4d2db4fe188f058f7cffa6c6bccbd244749ae Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Tue, 21 Mar 2017 11:02:21 +0000 Subject: [PATCH 25/28] Update to display defaults in CLI help message --- tests/app_test.py | 47 ++++++++++++++++++++++++++--------------------- vdsgen/app.py | 25 ++++++++++++++----------- 2 files changed, 40 insertions(+), 32 deletions(-) diff --git a/tests/app_test.py b/tests/app_test.py index 258c594..84fb6c7 100644 --- a/tests/app_test.py +++ b/tests/app_test.py @@ -6,27 +6,32 @@ from vdsgen import app -parser_patch_path = "argparse.ArgumentParser" app_patch_path = "vdsgen.app" +parser_patch_path = app_patch_path + ".ArgumentParser" VDSGenerator_patch_path = app_patch_path + ".VDSGenerator" class ParseArgsTest(unittest.TestCase): - @patch(parser_patch_path + '.add_mutually_exclusive_group') - @patch(parser_patch_path + '.add_argument_group') - @patch(parser_patch_path + '.add_argument') - @patch(parser_patch_path + '.parse_args', - return_value=MagicMock(empty=False, files=None)) - def test_parser(self, parse_mock, add_mock, add_group_mock, - add_exclusive_group_mock): + @patch(VDSGenerator_patch_path) + @patch(app_patch_path + '.ArgumentDefaultsHelpFormatter') + @patch(parser_patch_path) + def test_parser(self, parser_init_mock, formatter_mock, gen_mock): + parser_mock = parser_init_mock.return_value + add_mock = parser_mock.add_argument + add_group_mock = parser_mock.add_argument_group + add_exclusive_group_mock = parser_mock.add_mutually_exclusive_group + parse_mock = parser_mock.parse_args + parse_mock.return_value = MagicMock(empty=False, files=None) empty_mock = MagicMock() other_mock = MagicMock() - add_group_mock.side_effect = [None, None, empty_mock, other_mock] + add_group_mock.side_effect = [empty_mock, other_mock] exclusive_group_mock = add_exclusive_group_mock.return_value args = app.parse_args() + parser_init_mock.assert_called_once_with( + formatter_class=formatter_mock) add_exclusive_group_mock.assert_called_with(required=True) exclusive_group_mock.add_argument.assert_has_calls( [call("-p", "--prefix", type=str, default=None, dest="prefix", @@ -52,20 +57,20 @@ def test_parser(self, parse_mock, add_mock, add_group_mock, help="Data type of raw datasets.")]) other_mock.add_argument.assert_has_calls( [call("-o", "--output", type=str, default=None, dest="output", - help="Output file name. Default is input file prefix with " - "vds suffix."), - call("-s", "--stripe_spacing", nargs="?", type=int, default=None, - dest="stripe_spacing", + help="Output file name. If None then generated as input " + "file prefix with vds suffix."), + call("-s", "--stripe_spacing", nargs="?", type=int, + default=gen_mock.stripe_spacing, dest="stripe_spacing", help="Spacing between two stripes in a module."), - call("-m", "--module_spacing", nargs="?", type=int, default=None, - dest="module_spacing", + call("-m", "--module_spacing", nargs="?", type=int, + default=gen_mock.module_spacing, dest="module_spacing", help="Spacing between two modules."), - call("--source_node", nargs="?", type=str, default=None, - dest="source_node", - help="Data node in source HDF5 files."), - call("--target_node", nargs="?", type=str, default=None, - dest="target_node", - help="Data node in VDS file.")]) + call("--source_node", nargs="?", type=str, + default=gen_mock.source_node, + dest="source_node", help="Data node in source HDF5 files."), + call("--target_node", nargs="?", type=str, + default=gen_mock.target_node, + dest="target_node", help="Data node in VDS file.")]) parse_mock.assert_called_once_with() self.assertEqual(parse_mock.return_value, args) diff --git a/vdsgen/app.py b/vdsgen/app.py index 3d769ee..7742080 100644 --- a/vdsgen/app.py +++ b/vdsgen/app.py @@ -1,12 +1,12 @@ import sys -from argparse import ArgumentParser +from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter from vdsgenerator import VDSGenerator def parse_args(): """Parse command line arguments.""" - parser = ArgumentParser() + parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument( "path", type=str, help="Root folder of source files and VDS.") @@ -37,19 +37,22 @@ def parse_args(): other_args = parser.add_argument_group() other_args.add_argument( "-o", "--output", type=str, default=None, dest="output", - help="Output file name. Default is input file prefix with vds suffix.") + help="Output file name. If None then generated as input file prefix " + "with vds suffix.") other_args.add_argument( - "-s", "--stripe_spacing", nargs="?", type=int, default=None, - dest="stripe_spacing", help="Spacing between two stripes in a module.") + "-s", "--stripe_spacing", nargs="?", type=int, + default=VDSGenerator.stripe_spacing, dest="stripe_spacing", + help="Spacing between two stripes in a module.") other_args.add_argument( - "-m", "--module_spacing", nargs="?", type=int, default=None, - dest="module_spacing", help="Spacing between two modules.") + "-m", "--module_spacing", nargs="?", type=int, + default=VDSGenerator.module_spacing, dest="module_spacing", + help="Spacing between two modules.") other_args.add_argument( - "--source_node", nargs="?", type=str, default=None, dest="source_node", - help="Data node in source HDF5 files.") + "--source_node", nargs="?", type=str, default=VDSGenerator.source_node, + dest="source_node", help="Data node in source HDF5 files.") other_args.add_argument( - "--target_node", nargs="?", type=str, default=None, dest="target_node", - help="Data node in VDS file.") + "--target_node", nargs="?", type=str, default=VDSGenerator.target_node, + dest="target_node", help="Data node in VDS file.") args = parser.parse_args() args.shape = tuple(args.shape) From 73bb8b7ac9ed7fa95c219e2de4b68bfa8c97f428 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Wed, 22 Mar 2017 14:29:28 +0000 Subject: [PATCH 26/28] Improve logging * Add more debug logging * Use named logger * Add flag to set log level --- tests/app_test.py | 12 ++++++++---- tests/vdsgenerator_test.py | 4 ++-- vdsgen/app.py | 6 +++++- vdsgen/vdsgenerator.py | 40 +++++++++++++++++++++++++------------- 4 files changed, 41 insertions(+), 21 deletions(-) diff --git a/tests/app_test.py b/tests/app_test.py index 84fb6c7..5eebc71 100644 --- a/tests/app_test.py +++ b/tests/app_test.py @@ -105,7 +105,8 @@ class MainTest(unittest.TestCase): files=["file1.hdf5", "file2.hdf5"], output="vds", shape=[3, 256, 2048], data_type="int16", source_node="data", target_node="full_frame", - stripe_spacing=3, module_spacing=127)) + stripe_spacing=3, module_spacing=127, + log_level=2)) def test_main_empty(self, parse_mock, init_mock): gen_mock = init_mock.return_value args_mock = parse_mock.return_value @@ -121,7 +122,8 @@ def test_main_empty(self, parse_mock, init_mock): source_node=args_mock.source_node, target_node=args_mock.target_node, stripe_spacing=args_mock.stripe_spacing, - module_spacing=args_mock.module_spacing) + module_spacing=args_mock.module_spacing, + log_level=args_mock.log_level) gen_mock.generate_vds.assert_called_once_with() @@ -132,7 +134,8 @@ def test_main_empty(self, parse_mock, init_mock): files=["file1.hdf5", "file2.hdf5"], output="vds", frames=3, height=256, width=2048, data_type="int16", source_node="data", target_node="full_frame", - stripe_spacing=3, module_spacing=127)) + stripe_spacing=3, module_spacing=127, + log_level=2)) def test_main_not_empty(self, parse_mock, generate_mock): args_mock = parse_mock.return_value @@ -146,4 +149,5 @@ def test_main_not_empty(self, parse_mock, generate_mock): source_node=args_mock.source_node, stripe_spacing=args_mock.stripe_spacing, target_node=args_mock.target_node, - module_spacing=args_mock.module_spacing) \ No newline at end of file + module_spacing=args_mock.module_spacing, + log_level=args_mock.log_level) diff --git a/tests/vdsgenerator_test.py b/tests/vdsgenerator_test.py index ec17a66..c58f9af 100644 --- a/tests/vdsgenerator_test.py +++ b/tests/vdsgenerator_test.py @@ -221,10 +221,10 @@ def test_construct_vds_metadata(self): @patch(h5py_patch_path + '.VirtualSource') @patch(h5py_patch_path + '.VirtualTarget') def test_create_vds_maps(self, target_mock, source_mock, map_mock): - gen = VDSGeneratorTester(output_file= "/test/path/vds.hdf5", + gen = VDSGeneratorTester(output_file="/test/path/vds.hdf5", stripe_spacing=10, module_spacing=100, target_node="full_frame", source_node="data", - datasets=["source"] * 6) + datasets=["source"] * 6, name="vds.hdf5") source = vdsgenerator.Source(frames=(3,), height=256, width=2048, dtype="uint16") vds = vdsgenerator.VDS(shape=(3, 1586, 2048), spacing=[10] * 5 + [0]) diff --git a/vdsgen/app.py b/vdsgen/app.py index 7742080..5cd015e 100644 --- a/vdsgen/app.py +++ b/vdsgen/app.py @@ -53,6 +53,9 @@ def parse_args(): other_args.add_argument( "--target_node", nargs="?", type=str, default=VDSGenerator.target_node, dest="target_node", help="Data node in VDS file.") + other_args.add_argument( + "-l", "--log_level", type=int, default=VDSGenerator.log_level, + dest="log_level", help="Logging level (off=3, info=2, debug=1).") args = parser.parse_args() args.shape = tuple(args.shape) @@ -83,7 +86,8 @@ def main(): source_node=args.source_node, target_node=args.target_node, stripe_spacing=args.stripe_spacing, - module_spacing=args.module_spacing) + module_spacing=args.module_spacing, + log_level=args.log_level) gen.generate_vds() diff --git a/vdsgen/vdsgenerator.py b/vdsgen/vdsgenerator.py index 216e1af..5472055 100644 --- a/vdsgen/vdsgenerator.py +++ b/vdsgen/vdsgenerator.py @@ -9,8 +9,6 @@ import h5py as h5 -logging.basicConfig(level=logging.INFO) - Source = namedtuple("Source", ["frames", "height", "width", "dtype"]) VDS = namedtuple("VDS", ["shape", "spacing"]) @@ -31,10 +29,16 @@ class VDSGenerator(object): source_node = "data" # Data node in source HDF5 files target_node = "full_frame" # Data node in VDS file mode = CREATE # Write mode for vds file + log_level = 2 + + logger = logging.getLogger("VDSGenerator") + logger.addHandler(logging.StreamHandler()) + logger.setLevel(log_level * 10) def __init__(self, path, prefix=None, files=None, output=None, source=None, source_node=None, target_node=None, - stripe_spacing=None, module_spacing=None): + stripe_spacing=None, module_spacing=None, + log_level=None): """ Args: path(str): Root folder to find raw files and create VDS @@ -48,6 +52,8 @@ def __init__(self, path, prefix=None, files=None, output=None, source=None, target_node(str): Data node in VDS file stripe_spacing(int): Spacing between stripes in module module_spacing(int): Spacing between modules + log_level(int): Logging level (off=3, info=2, debug=1) - + Default is info """ if (prefix is None and files is None) or \ @@ -65,6 +71,8 @@ def __init__(self, path, prefix=None, files=None, output=None, source=None, self.stripe_spacing = stripe_spacing if module_spacing is not None: self.module_spacing = module_spacing + if log_level is not None: + self.logger.setLevel(log_level * 10) # If Files not given, find files using path and prefix. if files is None: @@ -130,20 +138,14 @@ def generate_vds(self): else: self.mode = self.APPEND - file_names = [file_.split('/')[-1] for file_ in self.datasets] - logging.info("Combining datasets %s into %s", - ", ".join(file_names), self.name) - vds_data = self.construct_vds_metadata(self.source_metadata) map_list = self.create_vds_maps(self.source_metadata, vds_data) - logging.info("Creating VDS at %s", self.output_file) + self.logger.info("Creating VDS at %s", self.output_file) with h5.File(self.output_file, self.mode, libver="latest") as vds: self.validate_node(vds) vds.create_virtual_dataset(VMlist=map_list, fillvalue=0x1) - logging.info("Creation successful!") - def find_files(self): """Find HDF5 files in given folder with given prefix. @@ -166,6 +168,8 @@ def find_files(self): raise IOError("Folder must contain more than one matching HDF5 " "file.") else: + self.logger.debug("Found datasets %s", + ", ".join([f.split("/")[-1] for f in files])) return files def construct_vds_name(self, files): @@ -181,6 +185,7 @@ def construct_vds_name(self, files): _, ext = os.path.splitext(files[0]) vds_name = "{prefix}vds{ext}".format(prefix=self.prefix, ext=ext) + self.logger.debug("Generated VDS name: %s", vds_name) return vds_name def grab_metadata(self, file_path): @@ -215,8 +220,11 @@ def process_source_datasets(self): raise ValueError("Files have mismatched " "{}".format(attribute)) - return Source(frames=data['frames'], height=data['height'], - width=data['width'], dtype=data['dtype']) + source = Source(frames=data['frames'], height=data['height'], + width=data['width'], dtype=data['dtype']) + + self.logger.debug("Source metadata retrieved: %s", source) + return source def construct_vds_metadata(self, source): """Construct VDS data attributes from source attributes. @@ -240,7 +248,9 @@ def construct_vds_metadata(self, source): height = (source.height * stripes) + sum(spacing) shape = source.frames + (height, source.width) - return VDS(shape=shape, spacing=spacing) + vds = VDS(shape=shape, spacing=spacing) + self.logger.debug("VDS metadata constructed: %s", vds) + return vds def create_vds_maps(self, source, vds_data): """Create a list of VirtualMaps of raw data to the VDS. @@ -260,7 +270,6 @@ def create_vds_maps(self, source, vds_data): map_list = [] current_position = 0 for idx, dataset in enumerate(self.datasets): - logging.info("Processing dataset %s", idx + 1) v_source = h5.VirtualSource(dataset, self.source_node, shape=source_shape) @@ -273,6 +282,9 @@ def create_vds_maps(self, source, vds_data): [slice(start, stop)] + [self.FULL_SLICE]) v_target = vds[index] v_map = h5.VirtualMap(v_source, v_target, dtype=source.dtype) + + self.logger.debug("Mapping dataset %s to %s of %s.", + dataset.split("/")[-1], index, self.name) map_list.append(v_map) return map_list From d11df125eeb5d1cfc379bc93e4cbb4e26642bd70 Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Wed, 22 Mar 2017 15:41:09 +0000 Subject: [PATCH 27/28] Improve validate_node function --- tests/vdsgenerator_test.py | 20 +++++++------------- vdsgen/vdsgenerator.py | 6 ++---- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/tests/vdsgenerator_test.py b/tests/vdsgenerator_test.py index c58f9af..4aaf63e 100644 --- a/tests/vdsgenerator_test.py +++ b/tests/vdsgenerator_test.py @@ -250,12 +250,12 @@ def setUp(self): self.file_mock = MagicMock() def test_validate_node_creates(self): - gen = VDSGeneratorTester(target_node="entry/detector/detector1") + gen = VDSGeneratorTester(target_node="/entry/detector/detector1") self.file_mock.get.return_value = None gen.validate_node(self.file_mock) - self.file_mock.create_group.assert_called_once_with("entry/detector") + self.file_mock.create_group.assert_called_once_with("/entry/detector") def test_validate_node_exists_then_no_op(self): gen = VDSGeneratorTester(target_node="entry/detector/detector1") @@ -265,19 +265,13 @@ def test_validate_node_exists_then_no_op(self): self.file_mock.create_group.assert_not_called() - def test_validate_node_invalid_then_error(self): - - gen = VDSGeneratorTester(target_node="/entry/detector/detector1") - with self.assertRaises(ValueError): - gen.validate_node(self.file_mock) + def test_validate_node_trailing_slash_then_removed(self): + gen = VDSGeneratorTester(target_node="/entry/detector/detector1//") + self.file_mock.get.return_value = None - gen = VDSGeneratorTester(target_node="entry/detector/detector1/") - with self.assertRaises(ValueError): - gen.validate_node(self.file_mock) + gen.validate_node(self.file_mock) - gen = VDSGeneratorTester(target_node="/entry/detector/detector1/") - with self.assertRaises(ValueError): - gen.validate_node(self.file_mock) + self.file_mock.create_group.assert_called_once_with("/entry/detector") class GenerateVDSTest(unittest.TestCase): diff --git a/vdsgen/vdsgenerator.py b/vdsgen/vdsgenerator.py index 5472055..2b08985 100644 --- a/vdsgen/vdsgenerator.py +++ b/vdsgen/vdsgenerator.py @@ -292,16 +292,14 @@ def create_vds_maps(self, source, vds_data): def validate_node(self, vds_file): """Check if it is possible to create the given node. - Check the target node is valid (no leading or trailing slashes) Create any sub-group of the target node if it doesn't exist. Args: vds_file(h5py.File): File to check for node """ - if self.target_node.startswith("/") or self.target_node.endswith("/"): - raise ValueError("Target node should have no leading or trailing " - "slashes, got {}".format(self.target_node)) + while self.target_node.endswith("/"): + self.target_node = self.target_node[:-1] if "/" in self.target_node: sub_group = self.target_node.rsplit("/", 1)[0] From f75705c6951763ab3028d35550bcd05deb6c245c Mon Sep 17 00:00:00 2001 From: Gary Yendell Date: Fri, 24 Mar 2017 15:24:02 +0000 Subject: [PATCH 28/28] Update CLI help message and flags --- tests/app_test.py | 42 +++++++++++++++++++++++++++++++----------- vdsgen/app.py | 44 ++++++++++++++++++++++++++++++++------------ 2 files changed, 63 insertions(+), 23 deletions(-) diff --git a/tests/app_test.py b/tests/app_test.py index 5eebc71..2a173a0 100644 --- a/tests/app_test.py +++ b/tests/app_test.py @@ -27,10 +27,27 @@ def test_parser(self, parser_init_mock, formatter_mock, gen_mock): other_mock = MagicMock() add_group_mock.side_effect = [empty_mock, other_mock] exclusive_group_mock = add_exclusive_group_mock.return_value + expected_message = """ +------------------------------------------------------------------------------- +A script to create a virtual dataset composed of multiple raw HDF5 files. + +The minimum required arguments are and either -p or -f . + +For example: + + > ../vdsgen/app.py /scratch/images -p stripe_ + > ../vdsgen/app.py /scratch/images -f stripe_1.hdf5 stripe_2.hdf5 + +You can create an empty VDS, for raw files that don't exist yet, with the -e +flag; you will then need to provide --shape and --data_type, though defaults +are provided for these. +------------------------------------------------------------------------------- +""" args = app.parse_args() parser_init_mock.assert_called_once_with( + usage=expected_message, formatter_class=formatter_mock) add_exclusive_group_mock.assert_called_with(required=True) exclusive_group_mock.add_argument.assert_has_calls( @@ -53,24 +70,27 @@ def test_parser(self, parser_init_mock, formatter_mock, gen_mock): dest="shape", help="Shape of dataset - 'frames height width', where " "frames is N dimensional."), - call("--data_type", type=str, default="uint16", dest="data_type", - help="Data type of raw datasets.")]) + call("-t", "--data_type", type=str, default="uint16", + dest="data_type", help="Data type of raw datasets.")]) other_mock.add_argument.assert_has_calls( [call("-o", "--output", type=str, default=None, dest="output", help="Output file name. If None then generated as input " "file prefix with vds suffix."), - call("-s", "--stripe_spacing", nargs="?", type=int, - default=gen_mock.stripe_spacing, dest="stripe_spacing", + call("-s", "--stripe_spacing", type=int, dest="stripe_spacing", + default=gen_mock.stripe_spacing, help="Spacing between two stripes in a module."), - call("-m", "--module_spacing", nargs="?", type=int, - default=gen_mock.module_spacing, dest="module_spacing", + call("-m", "--module_spacing", type=int, dest="module_spacing", + default=gen_mock.module_spacing, help="Spacing between two modules."), - call("--source_node", nargs="?", type=str, + call("--source_node", type=str, dest="source_node", default=gen_mock.source_node, - dest="source_node", help="Data node in source HDF5 files."), - call("--target_node", nargs="?", type=str, - default=gen_mock.target_node, - dest="target_node", help="Data node in VDS file.")]) + help="Data node in source HDF5 files."), + call("--target_node", type=str, + default=gen_mock.target_node, dest="target_node", + help="Data node in VDS file."), + call("-l", "--log_level", type=int, dest="log_level", + default=gen_mock.log_level, + help="Logging level (off=3, info=2, debug=1).")]) parse_mock.assert_called_once_with() self.assertEqual(parse_mock.return_value, args) diff --git a/vdsgen/app.py b/vdsgen/app.py index 5cd015e..4eeb52b 100644 --- a/vdsgen/app.py +++ b/vdsgen/app.py @@ -3,10 +3,28 @@ from vdsgenerator import VDSGenerator +help_message = """ +------------------------------------------------------------------------------- +A script to create a virtual dataset composed of multiple raw HDF5 files. + +The minimum required arguments are and either -p or -f . + +For example: + + > ../vdsgen/app.py /scratch/images -p stripe_ + > ../vdsgen/app.py /scratch/images -f stripe_1.hdf5 stripe_2.hdf5 + +You can create an empty VDS, for raw files that don't exist yet, with the -e +flag; you will then need to provide --shape and --data_type, though defaults +are provided for these. +------------------------------------------------------------------------------- +""" + def parse_args(): """Parse command line arguments.""" - parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) + parser = ArgumentParser(usage=help_message, + formatter_class=ArgumentDefaultsHelpFormatter) parser.add_argument( "path", type=str, help="Root folder of source files and VDS.") @@ -30,7 +48,7 @@ def parse_args(): help="Shape of dataset - 'frames height width', where frames is N " "dimensional.") empty_vds.add_argument( - "--data_type", type=str, default="uint16", dest="data_type", + "-t", "--data_type", type=str, default="uint16", dest="data_type", help="Data type of raw datasets.") # Arguments to override defaults - each is atomic @@ -40,22 +58,24 @@ def parse_args(): help="Output file name. If None then generated as input file prefix " "with vds suffix.") other_args.add_argument( - "-s", "--stripe_spacing", nargs="?", type=int, - default=VDSGenerator.stripe_spacing, dest="stripe_spacing", + "-s", "--stripe_spacing", type=int, dest="stripe_spacing", + default=VDSGenerator.stripe_spacing, help="Spacing between two stripes in a module.") other_args.add_argument( - "-m", "--module_spacing", nargs="?", type=int, - default=VDSGenerator.module_spacing, dest="module_spacing", + "-m", "--module_spacing", type=int, dest="module_spacing", + default=VDSGenerator.module_spacing, help="Spacing between two modules.") other_args.add_argument( - "--source_node", nargs="?", type=str, default=VDSGenerator.source_node, - dest="source_node", help="Data node in source HDF5 files.") + "--source_node", type=str, dest="source_node", + default=VDSGenerator.source_node, + help="Data node in source HDF5 files.") other_args.add_argument( - "--target_node", nargs="?", type=str, default=VDSGenerator.target_node, - dest="target_node", help="Data node in VDS file.") + "--target_node", type=str, dest="target_node", + default=VDSGenerator.target_node, help="Data node in VDS file.") other_args.add_argument( - "-l", "--log_level", type=int, default=VDSGenerator.log_level, - dest="log_level", help="Logging level (off=3, info=2, debug=1).") + "-l", "--log_level", type=int, dest="log_level", + default=VDSGenerator.log_level, + help="Logging level (off=3, info=2, debug=1).") args = parser.parse_args() args.shape = tuple(args.shape)