Skip to content

Commit

Permalink
Merge pull request #228 from texadactyl/master
Browse files Browse the repository at this point in the history
Fix hdf_reader.py and create utility h5diag - issue #226
  • Loading branch information
texadactyl authored Aug 10, 2021
2 parents 7c4373f + 7e2fb41 commit c310179
Show file tree
Hide file tree
Showing 5 changed files with 157 additions and 62 deletions.
1 change: 1 addition & 0 deletions VERSION-HISTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ This file is a version history of turbo_seti amendments, beginning with version
<br>
| Date | Version | Contents |
| :--: | :--: | :-- |
| 2021-08-10 | 2.0.22 | Non/broken HDF5 input files need better diagnosis (Issue #226). |
| 2021-08-07 | 2.0.21 | New signal_processing source file, "dedoppler.py" (discussion in PR #220). |
| 2021-08-06 | 2.0.20 | New utility, "stix" (Issue #221). |
| 2021-07-28 | 2.0.19 | Update fil2h5 to handle YUGE data matrixes. |
Expand Down
1 change: 1 addition & 0 deletions blimpy/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from . import utils
from . import fil2h5
from . import h52fil
from . import h5diag
from . import bl_scrunch
from . import calcload
from . import rawhdr
Expand Down
78 changes: 78 additions & 0 deletions blimpy/h5diag.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
r""" h5diag """

import os
import sys
from argparse import ArgumentParser
import h5py
from astropy.coordinates import Angle


def oops(msg):
print("\n*** h5diag: {}\n".format(msg))
sys.exit(86)


def read_header(h5):
""" Read header and return a Python dictionary of key:value pairs
"""

header = {} # Initialise as a nil dictionary.

for key, val in h5["data"].attrs.items():
#if six.PY3:
# key = bytes(key, "ascii")
if isinstance(val, bytes):
val = val.decode("ascii")
if key == "src_raj":
header[key] = Angle(val, unit="hr")
elif key == "src_dej":
header[key] = Angle(val, unit="deg")
else:
header[key] = val

return header


def examine(filename):
r""" Diagnose the given HDF5 file"""
h5 = h5py.File(filename, mode="r")
if "CLASS" in h5.attrs:
classstr = h5.attrs["CLASS"]
else:
oops("CLASS attribute missing")
if classstr != "FILTERBANK":
oops("Expected CLASS attribute to be 'FILTERBANK' but saw '{}'".format(classstr))
if "VERSION" in h5.attrs:
versionstr = h5.attrs["VERSION"]
else:
oops("VERSION attribute missing")
print("VERSION is ", versionstr)
header = read_header(h5)
print("Header:", header)
if not "data" in h5:
oops("data attribute missing")
if h5["data"].ndim != 3:
oops("Expected data.ndim to be 3 but saw '{}'".format(h5["data"].ndim))
print("h5diag: data shape:", h5["data"].shape)


def cmd_tool(args=None):
""" Command line tool h5diag """
parser = ArgumentParser(description="Command line utility for diagnosing HDF5 files.")
parser.add_argument("filename", type=str, help="Path of file to read")
if args is None:
args = sys.argv[1:]
parse_args = parser.parse_args(args)

if not os.path.isfile(parse_args.filename):
oops("Not a file: {}".format(parse_args.filename))
if not h5py.is_hdf5(parse_args.filename):
oops("Not an HDF5 file: {}".format(parse_args.filename))

print("h5diag: Begin")
examine(parse_args.filename)
print("h5diag: No errors detected")


if __name__ == "__main__":
cmd_tool()
136 changes: 75 additions & 61 deletions blimpy/io/hdf_reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ def __init__(self, filename, f_start=None, f_stop=None, t_start=None, t_stop=Non
""" Constructor.
Args:
filename (str): filename of blimpy file.
filename (str): file path of HDF5 file.
f_start (float): start frequency, in MHz
f_stop (float): stop frequency, in MHz
t_start (int): start time bin
Expand All @@ -25,74 +25,88 @@ def __init__(self, filename, f_start=None, f_stop=None, t_start=None, t_stop=Non
"""
super(H5Reader, self).__init__()

if filename and os.path.isfile(filename) and h5py.is_hdf5(filename):

#These values may be modified once code for multi_beam and multi_stokes observations are possible.
self.freq_axis = 2
self.time_axis = 0
self.beam_axis = 1 # Place holder
self.stokes_axis = 4 # Place holder

self.filename = filename
self.filestat = os.stat(filename)
self.filesize = self.filestat.st_size/(1024.0**2)
self.load_data = load_data
self.h5 = h5py.File(self.filename, mode='r')
self.read_header()
self.file_size_bytes = os.path.getsize(self.filename) # In bytes
self.n_ints_in_file = self.h5["data"].shape[self.time_axis] #
self.n_channels_in_file = self.h5["data"].shape[self.freq_axis] #
self.n_beams_in_file = self.header['nifs'] #Placeholder for future development.
self.n_pols_in_file = 1 #Placeholder for future development.
self._n_bytes = int(self.header['nbits'] / 8) #number of bytes per digit.
self._d_type = self._setup_dtype()
self.file_shape = (self.n_ints_in_file,self.n_beams_in_file,self.n_channels_in_file)

if self.header['foff'] < 0:
self.f_end = self.header['fch1']
self.f_begin = self.f_end + self.n_channels_in_file*self.header['foff']
else:
self.f_begin = self.header['fch1']
self.f_end = self.f_begin + self.n_channels_in_file*self.header['foff']
if not os.path.isfile(filename):
raise IOError("Not a file: {}".format(filename))
if not h5py.is_hdf5(filename):
raise IOError("Not an HDF5 file: {}".format(filename))

#These values may be modified once code for multi_beam and multi_stokes observations are possible.
self.freq_axis = 2
self.time_axis = 0
self.beam_axis = 1 # Place holder
self.stokes_axis = 4 # Place holder

self.filename = filename
self.filestat = os.stat(filename)
self.filesize = self.filestat.st_size/(1024.0**2)
self.load_data = load_data
self.h5 = h5py.File(self.filename, mode='r')
if "CLASS" in self.h5.attrs:
classstr = self.h5.attrs["CLASS"]
else:
raise ValueError("CLASS attribute missing")
if classstr != "FILTERBANK":
raise ValueError("Expected CLASS attribute to be 'FILTERBANK' but saw '{}'".format(classstr))
if not "VERSION" in self.h5.attrs:
raise ValueError("VERSION attribute missing")
if not "data" in self.h5:
raise ValueError("data attribute missing")
if self.h5["data"].ndim != 3:
raise ValueError("Expected data.ndim to be 3 but saw '{}'".format(self.h5["data"].ndim))
self.read_header()
self.file_size_bytes = os.path.getsize(self.filename) # In bytes
self.n_ints_in_file = self.h5["data"].shape[self.time_axis] #
self.n_channels_in_file = self.h5["data"].shape[self.freq_axis] #
self.n_beams_in_file = self.header['nifs'] #Placeholder for future development.
self.n_pols_in_file = 1 #Placeholder for future development.
self._n_bytes = int(self.header['nbits'] / 8) #number of bytes per digit.
self._d_type = self._setup_dtype()
self.file_shape = (self.n_ints_in_file,self.n_beams_in_file,self.n_channels_in_file)

if self.header['foff'] < 0:
self.f_end = self.header['fch1']
self.f_begin = self.f_end + self.n_channels_in_file*self.header['foff']
else:
self.f_begin = self.header['fch1']
self.f_end = self.f_begin + self.n_channels_in_file*self.header['foff']

self.t_begin = 0
self.t_end = self.n_ints_in_file
self.t_begin = 0
self.t_end = self.n_ints_in_file

#Taking care all the frequencies are assigned correctly.
self._setup_selection_range(f_start=f_start, f_stop=f_stop, t_start=t_start, t_stop=t_stop, init=True)
#Convert input frequencies into what their corresponding channel number would be.
self._setup_chans()
#Update frequencies ranges from channel number.
self._setup_freqs()
#Taking care all the frequencies are assigned correctly.
self._setup_selection_range(f_start=f_start, f_stop=f_stop, t_start=t_start, t_stop=t_stop, init=True)
#Convert input frequencies into what their corresponding channel number would be.
self._setup_chans()
#Update frequencies ranges from channel number.
self._setup_freqs()

#Applying data size limit to load.
if max_load is not None and max_load > 0:
self.max_data_array_size = max_load * GIGA
#Applying data size limit to load.
if max_load is not None and max_load > 0:
self.max_data_array_size = max_load * GIGA

if self.file_size_bytes > self.max_data_array_size:
self.large_file = True
else:
self.large_file = False

if self.load_data:
if self.large_file:
# Only checking the selection, if the file is too large.
if self.f_start or self.f_stop or self.t_start or self.t_stop:
if self.isheavy():
self.warn_memory("Selection", self._calc_selection_size())
self._init_empty_selection()
else:
self.read_data()
else:
self.warn_memory("File", self.file_size_bytes)
if self.file_size_bytes > self.max_data_array_size:
self.large_file = True
else:
self.large_file = False

if self.load_data:
if self.large_file:
# Only checking the selection, if the file is too large.
if self.f_start or self.f_stop or self.t_start or self.t_stop:
if self.isheavy():
self.warn_memory("Selection", self._calc_selection_size())
self._init_empty_selection()
else:
self.read_data()
else:
self.read_data()
self.warn_memory("File", self.file_size_bytes)
self._init_empty_selection()
else:
logger.debug("Skipping loading data ...")
self._init_empty_selection()
self.read_data()
else:
raise IOError("Need a file to open, please give me one!")
logger.debug("Skipping loading data ...")
self._init_empty_selection()


def read_header(self):
""" Read header and return a Python dictionary of key:value pairs
Expand Down
3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"""
from setuptools import setup, find_packages

__version__ = '2.0.21'
__version__ = '2.0.22'

with open("README.md", "r") as fh:
long_description = fh.read()
Expand All @@ -16,6 +16,7 @@
'rawutil = blimpy.guppi:cmd_tool',
'fil2h5 = blimpy.fil2h5:cmd_tool',
'h52fil = blimpy.h52fil:cmd_tool',
'h5diag = blimpy.h5diag:cmd_tool',
'bl_scrunch = blimpy.bl_scrunch:cmd_tool',
'matchfils = blimpy.match_fils:cmd_tool',
'bldice = blimpy.dice:cmd_tool',
Expand Down

0 comments on commit c310179

Please sign in to comment.