diff --git a/src/nectarchain/data/container/__init__.py b/src/nectarchain/data/container/__init__.py index 464a8848..5b525634 100644 --- a/src/nectarchain/data/container/__init__.py +++ b/src/nectarchain/data/container/__init__.py @@ -9,3 +9,4 @@ from .eventSource import * from .gainContainer import * from .waveformsContainer import * +from .pedestalContainer import * diff --git a/src/nectarchain/data/container/pedestalContainer.py b/src/nectarchain/data/container/pedestalContainer.py new file mode 100644 index 00000000..10c1ec05 --- /dev/null +++ b/src/nectarchain/data/container/pedestalContainer.py @@ -0,0 +1,66 @@ +import logging + +logging.basicConfig(format="%(asctime)s %(name)s %(levelname)s %(message)s") +log = logging.getLogger(__name__) +log.handlers = logging.getLogger("__main__").handlers + +import numpy as np +from ctapipe.containers import Field + +from .core import NectarCAMContainer + + +class NectarCAMPedestalContainer(NectarCAMContainer): + """ + A container that holds estimated pedestals + + Fields: + nsamples (int): The number of samples in the waveforms. + nevents (np.ndarray): The number of events used to estimate the pedestals for each pixel. + pixels_id (np.ndarray): An array of pixel IDs. + ucts_timestamp_min (int): The minimum of the input events UCTS timestamps. + ucts_timestamp_max (int): The maximum of the input events UCTS timestamps. + pedestal_mean_hg (np.ndarray): An array of high gain mean pedestals. + pedestal_mean_lg (np.ndarray): An array of low gain mean pedestals. + pedestal_std_hg (np.ndarray): An array of standard deviations of high gain pedestals. + pedestal_std_lg (np.ndarray): An array of standard deviations of low gain pedestals. + """ + + nsamples = Field( + type=np.uint8, + description="number of samples in the waveforms", + ) + + nevents = Field( + type=np.ndarray, dtype=np.float64, ndim=1, + description="number of events used to estimate the pedestals for each pixel", + ) + + pixels_id = Field(type=np.ndarray, dtype=np.uint16, ndim=1, description="pixel ids") + + ucts_timestamp_min = Field( + type=np.uint64, + description="minimum of the input events UCTS timestamps", + ) + + ucts_timestamp_max = Field( + type=np.uint64, + description="maximum of the input events UCTS timestamps", + ) + + pedestal_mean_hg = Field( + type=np.ndarray, dtype=np.float64, ndim=2, description="high gain mean pedestals" + ) + + pedestal_mean_lg = Field( + type=np.ndarray, dtype=np.float64, ndim=2, description="low gain mean pedestals" + ) + + pedestal_std_hg = Field( + type=np.ndarray, dtype=np.float64, ndim=2, + description="high gain pedestals standard deviations" + ) + pedestal_std_lg = Field( + type=np.ndarray, dtype=np.float64, ndim=2, + description="low gain pedestals standard deviations" + ) diff --git a/src/nectarchain/data/container/tests/test_pedestal.py b/src/nectarchain/data/container/tests/test_pedestal.py new file mode 100644 index 00000000..c3ad920e --- /dev/null +++ b/src/nectarchain/data/container/tests/test_pedestal.py @@ -0,0 +1,99 @@ +import numpy as np +import tempfile +from ctapipe.io import HDF5TableWriter +from nectarchain.data.container import NectarCAMPedestalContainer + +def generate_mock_pedestal_container(): + # fixed values + npixels = 10 + nevents_max = 100 + nevents_min = 10 + nsamples = np.uint8(60) + # random values for other fields + nevents = np.float64(np.random.randint(nevents_min, nevents_max, size=(npixels,))) + pixels_id = np.array([2, 4, 3, 8, 6, 9, 7, 1, 5, 10], dtype=np.uint16) + ucts_timestamp_min = np.uint64(np.random.randint(1e8)) + ucts_timestamp_max = np.uint64(np.random.randint(1e8 + 100)) + pedestal_mean_hg = np.float64(np.random.uniform(240, 260, size=(npixels, nsamples))) + pedestal_mean_lg = np.float64(np.random.uniform(240, 260, size=(npixels, nsamples))) + pedestal_std_hg = np.float64(np.random.normal(size=(npixels, nsamples))) + pedestal_std_lg = np.float64(np.random.normal(size=(npixels, nsamples))) + + # create pedestal container + pedestal_container = NectarCAMPedestalContainer( + nsamples=nsamples, + nevents=nevents, + pixels_id=pixels_id, + ucts_timestamp_min=ucts_timestamp_min, + ucts_timestamp_max=ucts_timestamp_max, + pedestal_mean_hg=pedestal_mean_hg, + pedestal_mean_lg=pedestal_mean_lg, + pedestal_std_hg=pedestal_std_hg, + pedestal_std_lg=pedestal_std_lg + ) + pedestal_container.validate() + + # create dictionary that duplicates content + dict = {'nsamples': nsamples, + 'nevents': nevents, + 'pixels_id': pixels_id, + 'ucts_timestamp_min': ucts_timestamp_min, + 'ucts_timestamp_max': ucts_timestamp_max, + 'pedestal_mean_hg': pedestal_mean_hg, + 'pedestal_mean_lg': pedestal_mean_lg, + 'pedestal_std_hg': pedestal_std_hg, + 'pedestal_std_lg': pedestal_std_lg, + } + + # return both container and input content + return pedestal_container, dict + +class TestNectarCAMPedestalContainer: + + def test_create_pedestal_container_mem(self): + # create mock pedestal container + pedestal_container, dict = generate_mock_pedestal_container() + + # check that all fields are filled correctly with input values + assert pedestal_container.nsamples == dict['nsamples'] + assert pedestal_container.nevents.tolist() == dict['nevents'].tolist() + assert pedestal_container.ucts_timestamp_min == dict['ucts_timestamp_min'] + assert pedestal_container.ucts_timestamp_max == dict['ucts_timestamp_max'] + assert np.allclose(pedestal_container.pedestal_mean_hg, dict['pedestal_mean_hg']) + assert np.allclose(pedestal_container.pedestal_mean_lg, dict['pedestal_mean_lg']) + assert np.allclose(pedestal_container.pedestal_std_hg, dict['pedestal_std_hg']) + assert np.allclose(pedestal_container.pedestal_std_lg, dict['pedestal_std_lg']) + + # FIXME + # Guillaume is working on generalizing the fromhdf5 method to all containers + # This test should work once it's done + # def test_pedestal_container_io(self): + # input_pedestal_container, dict = generate_mock_pedestal_container() + # with tempfile.TemporaryDirectory() as tmpdirname: + # outpath = tmpdirname+"/pedestal_container_0.h5" + # writer = HDF5TableWriter( + # filename=outpath, + # mode="w", + # group_name="data", + # ) + # writer.write(table_name="NectarCAMPedestalContainer", + # containers=input_pedestal_container) + # writer.close() + # + # pedestal_container = next(NectarCAMPedestalContainer.from_hdf5(outpath)) + # + # #check that container is an instance of the proper class + # assert isinstance(pedestal_container,NectarCAMPedestalContainer) + # + # #check content + # assert pedestal_container.nsamples == dict['nsamples'] + # assert pedestal_container.nevents.tolist() == dict['nevents'].tolist() + # assert pedestal_container.ucts_timestamp_min == dict['ucts_timestamp_min'] + # assert pedestal_container.ucts_timestamp_max == dict['ucts_timestamp_max'] + # assert np.allclose(pedestal_container.pedestal_mean_hg, dict['pedestal_mean_hg']) + # assert np.allclose(pedestal_container.pedestal_mean_lg, dict['pedestal_mean_lg']) + # assert np.allclose(pedestal_container.pedestal_std_hg, dict['pedestal_std_hg']) + # assert np.allclose(pedestal_container.pedestal_std_lg, dict['pedestal_std_lg']) + +if __name__ == "__main__": + pass diff --git a/src/nectarchain/makers/calibration/pedestalMakers.py b/src/nectarchain/makers/calibration/pedestalMakers.py index ec0be630..add05a1e 100644 --- a/src/nectarchain/makers/calibration/pedestalMakers.py +++ b/src/nectarchain/makers/calibration/pedestalMakers.py @@ -1,16 +1,148 @@ import logging +import os +import numpy as np + logging.basicConfig(format="%(asctime)s %(name)s %(levelname)s %(message)s") log = logging.getLogger(__name__) log.handlers = logging.getLogger("__main__").handlers +import pathlib +import tables + +from ctapipe.core.traits import ComponentNameList + from .core import NectarCAMCalibrationTool +from ..component import NectarCAMComponent +from ...data.container import NectarCAMPedestalContainer __all__ = ["PedestalNectarCAMCalibrationTool"] class PedestalNectarCAMCalibrationTool(NectarCAMCalibrationTool): - def start(self): - raise NotImplementedError( - "The computation of the pedestal calibration is not yet implemented, feel free to contribute !:)" - ) + name = "PedestalNectarCAMCalibrationTool" + + componentsList = ComponentNameList(NectarCAMComponent, + default_value=["PedestalEstimationComponent"], + help="List of Component names to be applied, the order will be respected", ).tag( + config=True) + + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + + def _init_output_path(self): + """ + Initialize output path + """ + + if self.events_per_slice is None: + ext = ".h5" + else: + ext = f"_sliced{self.events_per_slice}.h5" + if self.max_events is None: + filename = f"{self.name}_run{self.run_number}{ext}" + else: + filename = f"{self.name}_run{self.run_number}_maxevents{self.max_events}{ext}" + + self.output_path = pathlib.Path( + f"{os.environ.get('NECTARCAMDATA', '/tmp')}/PedestalEstimation/{filename}") + + def _combine_results(self): + """ + Method that combines sliced results to reduce memory load + Can only be called after the file with the sliced results has been saved to disk + """ + + # re-open results + # TODO: update to use nectarchain generators when available + with tables.open_file(self.output_path) as h5file: + # Loop over sliced results to fill the combined results + self.log.info("Combine sliced results") + for i, result in enumerate(h5file.root.__members__): + if result == 'data_combined': + log.error('Trying to combine results that already contain combined data') + table = h5file.root[result][NectarCAMPedestalContainer.__name__][0] + if i == 0: + # fill/initialize fields for the combined results based on first slice + nsamples = table['nsamples'] + nevents = table['nevents'] + pixels_id = table['pixels_id'] + ucts_timestamp_min = table['ucts_timestamp_min'] + ucts_timestamp_max = table['ucts_timestamp_max'] + pedestal_mean_hg = table['pedestal_mean_hg'] * table['nevents'][:, + np.newaxis] + pedestal_mean_lg = table['pedestal_mean_lg'] * table['nevents'][:, + np.newaxis] + pedestal_std_hg = table['pedestal_std_hg'] ** 2 * table['nevents'][:, + np.newaxis] + pedestal_std_lg = table['pedestal_std_lg'] ** 2 * table['nevents'][:, + np.newaxis] + else: + # cumulated number of events + nevents += table['nevents'] + # min/max of time interval + ucts_timestamp_min = np.minimum(ucts_timestamp_min, + table['ucts_timestamp_min']) + ucts_timestamp_max = np.maximum(ucts_timestamp_max, + table['ucts_timestamp_max']) + # add mean, std sum elements + pedestal_mean_hg += table['pedestal_mean_hg'] * table['nevents'][:, + np.newaxis] + pedestal_mean_lg += table['pedestal_mean_lg'] * table['nevents'][:, + np.newaxis] + pedestal_std_hg += table['pedestal_std_hg'] ** 2 * table['nevents'][:, + np.newaxis] + pedestal_std_lg += table['pedestal_std_lg'] ** 2 * table['nevents'][:, + np.newaxis] + + # calculate final values of mean and std + pedestal_mean_hg /= nevents[:, np.newaxis] + pedestal_mean_lg /= nevents[:, np.newaxis] + pedestal_std_hg /= nevents[:, np.newaxis] + pedestal_std_hg = np.sqrt(pedestal_std_hg) + pedestal_std_lg /= nevents[:, np.newaxis] + pedestal_std_lg = np.sqrt(pedestal_std_lg) + + output = NectarCAMPedestalContainer( + nsamples=nsamples, + nevents=nevents, + pixels_id=pixels_id, + ucts_timestamp_min=ucts_timestamp_min, + ucts_timestamp_max=ucts_timestamp_max, + pedestal_mean_hg=pedestal_mean_hg, + pedestal_mean_lg=pedestal_mean_lg, + pedestal_std_hg=pedestal_std_hg, + pedestal_std_lg=pedestal_std_lg, ) + + return output + + def finish(self, return_output_component=False, *args, **kwargs): + """ + Redefines finish method to combine sliced results + """ + + self.log.info("finishing Tool") + + # finish components + output = self._finish_components(*args, **kwargs) + + # close writer + self.writer.close() + + # Check if there are slices + if self.events_per_slice is None: + # If not nothing to do + pass + else: + # combine results + output = self._combine_results() + # add combined results to output + # re-initialise writer to store combined results + self._init_writer(sliced=True, group_name='data_combined') + # add combined results to writer + self._write_container(output) + self.writer.close() + + self.log.info("Shutting down.") + if return_output_component: + return output diff --git a/src/nectarchain/makers/calibration/tests/test_pedestal_tool.py b/src/nectarchain/makers/calibration/tests/test_pedestal_tool.py new file mode 100644 index 00000000..2172f864 --- /dev/null +++ b/src/nectarchain/makers/calibration/tests/test_pedestal_tool.py @@ -0,0 +1,232 @@ +import tempfile + +import numpy as np +import tables +from ctapipe.utils import get_dataset_path +from ctapipe_io_nectarcam.constants import N_SAMPLES +from nectarchain.data.container import NectarCAMPedestalContainer +from nectarchain.makers.calibration import PedestalNectarCAMCalibrationTool + +run_number = 3938 +run_file = get_dataset_path('NectarCAM.Run3938.30events.fits.fz') +# number of working pixels in run +n_pixels = 1834 + + +class TestPedestalCalibrationTool: + + def test_base(self): + """ + Test basic functionality, including IO on disk + """ + + # setup + n_slices = 3 + events_per_slice = 10 + max_events = n_slices * events_per_slice + with tempfile.TemporaryDirectory() as tmpdirname: + outfile = tmpdirname + "/pedestal.h5" + + # run tool + tool = PedestalNectarCAMCalibrationTool( + run_number=run_number, + run_file=run_file, + max_events=max_events, + events_per_slice=events_per_slice, + log_level=0, + output_path=outfile, + overwrite=True, + filter_method=None, + ) + + tool.initialize() + tool.setup() + + tool.start() + output = tool.finish(return_output_component=True) + + # Check output in memory + assert output.nsamples == N_SAMPLES + assert np.all(output.nevents == max_events) + assert np.shape(output.pixels_id) == (n_pixels,) + assert output.ucts_timestamp_min == np.uint64(1674462932637854793) + assert output.ucts_timestamp_max == np.uint64(1674462932695877994) + assert np.shape(output.pedestal_mean_hg) == (n_pixels, N_SAMPLES) + assert np.shape(output.pedestal_mean_lg) == (n_pixels, N_SAMPLES) + assert np.shape(output.pedestal_std_hg) == (n_pixels, N_SAMPLES) + assert np.shape(output.pedestal_std_lg) == (n_pixels, N_SAMPLES) + assert np.allclose(output.pedestal_mean_hg, 245., atol=20.) + assert np.allclose(output.pedestal_mean_lg, 245., atol=20.) + assert np.allclose(output.pedestal_std_hg, 10, atol=10) + assert np.allclose(output.pedestal_std_lg, 2.5, atol=2.) + + # Check output on disk + # FIXME: use tables for the moment, update when h5 reader in nectarchain is working + with tables.open_file(outfile) as h5file: + for s in range(n_slices): + # Check individual groups + group_name = "data_{}".format(s + 1) + assert group_name in h5file.root.__members__ + table = h5file.root[group_name][NectarCAMPedestalContainer.__name__][0] + assert table['nsamples'] == N_SAMPLES + assert np.all(table['nevents'] == events_per_slice) + assert np.shape(table['pixels_id']) == (n_pixels,) + assert np.shape(table['pedestal_mean_hg']) == (n_pixels, N_SAMPLES) + assert np.shape(table['pedestal_mean_lg']) == (n_pixels, N_SAMPLES) + assert np.shape(table['pedestal_std_hg']) == (n_pixels, N_SAMPLES) + assert np.shape(table['pedestal_std_lg']) == (n_pixels, N_SAMPLES) + # Check combined results + group_name = "data_combined" + table = h5file.root[group_name][NectarCAMPedestalContainer.__name__][0] + assert table['nsamples'] == N_SAMPLES + assert np.all(table['nevents'] == max_events) + assert np.shape(table['pixels_id']) == (n_pixels,) + assert table['ucts_timestamp_min'] == np.uint64(1674462932637854793) + assert table['ucts_timestamp_max'] == np.uint64(1674462932695877994) + assert np.shape(table['pedestal_mean_hg']) == (n_pixels, N_SAMPLES) + assert np.shape(table['pedestal_mean_lg']) == (n_pixels, N_SAMPLES) + assert np.shape(table['pedestal_std_hg']) == (n_pixels, N_SAMPLES) + assert np.shape(table['pedestal_std_lg']) == (n_pixels, N_SAMPLES) + assert np.allclose(table['pedestal_mean_hg'], 245., atol=20.) + assert np.allclose(table['pedestal_mean_lg'], 245., atol=20.) + assert np.allclose(table['pedestal_std_hg'], 10, atol=10) + assert np.allclose(table['pedestal_std_lg'], 2.5, atol=2.) + + def test_timesel(self): + """ + Test time selection + """ + + # setup + n_slices = 3 + events_per_slice = 10 + max_events = n_slices * events_per_slice + tmin = 1674462932637860000 + tmax = 1674462932695700000 + with tempfile.TemporaryDirectory() as tmpdirname: + outfile = tmpdirname + "/pedestal.h5" + + # run tool + tool = PedestalNectarCAMCalibrationTool( + run_number=run_number, + run_file=run_file, + max_events=max_events, + events_per_slice=events_per_slice, + log_level=0, + output_path=outfile, + ucts_tmin=tmin, + ucts_tmax=tmax, + overwrite=True, + filter_method=None, + ) + + tool.initialize() + tool.setup() + + tool.start() + output = tool.finish(return_output_component=True) + + # check output + assert output.nsamples == N_SAMPLES + assert np.all(output.nevents <= max_events) + assert np.shape(output.pixels_id) == (n_pixels,) + assert output.ucts_timestamp_min >= tmin + assert output.ucts_timestamp_max <= tmax + assert np.shape(output.pedestal_mean_hg) == (n_pixels, N_SAMPLES) + assert np.shape(output.pedestal_mean_lg) == (n_pixels, N_SAMPLES) + assert np.shape(output.pedestal_std_hg) == (n_pixels, N_SAMPLES) + assert np.shape(output.pedestal_std_lg) == (n_pixels, N_SAMPLES) + assert np.allclose(output.pedestal_mean_hg, 245., atol=20.) + assert np.allclose(output.pedestal_mean_lg, 245., atol=20.) + assert np.allclose(output.pedestal_std_hg, 10, atol=10) + assert np.allclose(output.pedestal_std_lg, 2.5, atol=2.) + + def test_WaveformsStdFilter(self): + """ + Test filtering based on waveforms standard dev + """ + + # setup + n_slices = 3 + events_per_slice = 10 + max_events = n_slices * events_per_slice + with tempfile.TemporaryDirectory() as tmpdirname: + outfile = tmpdirname + "/pedestal.h5" + + # run tool + tool = PedestalNectarCAMCalibrationTool( + run_number=run_number, + run_file=run_file, + max_events=max_events, + events_per_slice=events_per_slice, + log_level=0, + output_path=outfile, + overwrite=True, + filter_method='WaveformsStdFilter', + wfs_std_threshold=4., + ) + + tool.initialize() + tool.setup() + + tool.start() + output = tool.finish(return_output_component=True) + + # check output + assert output.nsamples == N_SAMPLES + assert np.all(output.nevents <= max_events) + assert np.shape(output.pixels_id) == (n_pixels,) + assert np.shape(output.pedestal_mean_hg) == (n_pixels, N_SAMPLES) + assert np.shape(output.pedestal_mean_lg) == (n_pixels, N_SAMPLES) + assert np.shape(output.pedestal_std_hg) == (n_pixels, N_SAMPLES) + assert np.shape(output.pedestal_std_lg) == (n_pixels, N_SAMPLES) + assert np.allclose(output.pedestal_mean_hg, 245., atol=20.) + assert np.allclose(output.pedestal_mean_lg, 245., atol=20.) + # verify that fluctuations are reduced + assert np.allclose(output.pedestal_std_hg, 3., atol=2.) + assert np.allclose(output.pedestal_std_lg, 2.5, atol=2.) + + def test_ChargeDistributionFilter(self): + """ + Test filtering based on waveforms charge distribution + """ + + # setup + n_slices = 2 + events_per_slice = 10 + max_events = n_slices * events_per_slice - 1 + with tempfile.TemporaryDirectory() as tmpdirname: + outfile = tmpdirname + "/pedestal.h5" + + # run tool + tool = PedestalNectarCAMCalibrationTool( + run_number=run_number, + run_file=run_file, + max_events=max_events, + events_per_slice=events_per_slice, + log_level=0, + output_path=outfile, + overwrite=True, + filter_method='ChargeDistributionFilter', + charge_sigma_low_thr=1., + charge_sigma_high_thr=2., + ) + + tool.initialize() + tool.setup() + + tool.start() + output = tool.finish(return_output_component=True) + + # check output + assert output.nsamples == N_SAMPLES + assert np.all(output.nevents <= max_events) + assert np.shape(output.pixels_id) == (n_pixels,) + assert np.shape(output.pedestal_mean_hg) == (n_pixels, N_SAMPLES) + assert np.shape(output.pedestal_mean_lg) == (n_pixels, N_SAMPLES) + assert np.shape(output.pedestal_std_hg) == (n_pixels, N_SAMPLES) + assert np.shape(output.pedestal_std_lg) == (n_pixels, N_SAMPLES) + assert np.allclose(output.pedestal_mean_hg, 245., atol=20.) + assert np.allclose(output.pedestal_mean_lg, 245., atol=20.) + assert np.allclose(output.pedestal_std_hg, 10., atol=10.) + assert np.allclose(output.pedestal_std_lg, 2.5, atol=3.) diff --git a/src/nectarchain/makers/component/PedestalComponent.py b/src/nectarchain/makers/component/PedestalComponent.py new file mode 100644 index 00000000..bc879f40 --- /dev/null +++ b/src/nectarchain/makers/component/PedestalComponent.py @@ -0,0 +1,432 @@ +import logging + +logging.basicConfig(format="%(asctime)s %(name)s %(levelname)s %(message)s") +log = logging.getLogger(__name__) +log.handlers = logging.getLogger("__main__").handlers + +import copy +import numpy.ma as ma + +from .core import NectarCAMComponent +from ctapipe.containers import EventType +from ctapipe_io_nectarcam.containers import NectarCAMDataContainer +from ctapipe_io_nectarcam.constants import N_GAINS, HIGH_GAIN, LOW_GAIN +from ctapipe.core.traits import Integer, Unicode, Float, Dict +from ...data.container import NectarCAMPedestalContainer +from ...utils import ComponentUtils +from .waveformsComponent import WaveformsComponent +from .chargesComponent import ChargesComponent + +import numpy as np + +__all__ = ["PedestalEstimationComponent", ] + + +class PedestalEstimationComponent(NectarCAMComponent): + """ + Component that computes calibration pedestal coefficients from raw data. + Waveforms can be filtered based on time, stanndard deviation of the waveforms + or charge distribution within the sample. + Use the `events_per_slice' parameter of `NectarCAMComponent' to reduce memory load. + + Parameters + ---------- + ucts_tmin : int + Minimum UCTS timestamp for events used in pedestal estimation + ucts_tmax : int + Maximum UCTS timestamp for events used in pedestal estimation + filter_method : str + The waveforms filter method to be used. + Inplemented methods: WaveformsStdFilter (standard deviation of waveforms), + ChargeDistributionFilter (charge distribution). + wfs_std_threshold : float + Threshold of waveforms standard deviation in ADC counts above which a waveform is + excluded from pedestal computation. + charge_sigma_high_thr : float + Threshold in charge distribution (number of sigmas above mean) beyond which a waveform + is excluded from pedestal computation. + charge_sigma_low_thr : float + Threshold in charge distribution (number of sigmas below mean) beyond which a waveform + is excluded from pedestal computation. + """ + + ucts_tmin = Integer(None, + help="Minimum UCTS timestamp for events used in pedestal estimation", + allow_none=True, ).tag(config=True) + + ucts_tmax = Integer(None, + help="Maximum UCTS timestamp for events used in pedestal estimation", + allow_none=True, ).tag(config=True) + + filter_method = Unicode( + None, + help="The waveforms filter method to be used.\n" + "Inplemented methods: WaveformsStdFilter (standard deviation of waveforms),\n" + " ChargeDistributionFilter (charge distribution).", + read_only=False, + allow_none=True, + ).tag(config=True) + + wfs_std_threshold = Float( + 4., + help="Threshold of waveforms standard deviation in ADC counts above which a waveform is excluded from pedestal computation.", + ).tag(config=True) + + charge_sigma_high_thr = Float( + 3., + help="Threshold in charge distribution (number of sigmas above mean) beyond which a waveform is excluded from pedestal computation.", + ).tag(config=True) + + charge_sigma_low_thr = Float( + 3., + help="Threshold in charge distribution (number of sigmas below mean) beyond which a waveform is excluded from pedestal computation.", + ).tag(config=True) + + # I do not understand why but the ChargesComponents traits are not loaded + # FIXME + method = Unicode( + default_value="FullWaveformSum", + help="the charge extraction method", + ).tag(config=True) + + extractor_kwargs = Dict( + default_value={}, + help="The kwargs to be pass to the charge extractor method", + ).tag(config=True) + + SubComponents = copy.deepcopy(NectarCAMComponent.SubComponents) + SubComponents.default_value = ["WaveformsComponent", + "ChargesComponent"] + SubComponents.read_only = True + + def __init__(self, subarray, config=None, parent=None, *args, **kwargs): + """ + Component that computes calibration pedestal coefficients from raw data. + Waveforms can be filtered based on time, standard deviation of the waveforms + or charge distribution within the sample. + Use the `events_per_slice' parameter of `NectarCAMComponent' to reduce memory load. + + Parameters + ---------- + ucts_tmin : int + Minimum UCTS timestamp for events used in pedestal estimation + ucts_tmax : int + Maximum UCTS timestamp for events used in pedestal estimation + filter_method : str + The waveforms filter method to be used. + Inplemented methods: WaveformsStdFilter (standard deviation of waveforms), + ChargeDistributionFilter (charge distribution). + wfs_std_threshold : float + Threshold of waveforms standard deviation in ADC counts above which a waveform is + excluded from pedestal computation. + charge_sigma_high_thr : float + Threshold in charge distribution (number of sigmas above mean) beyond which a waveform + is excluded from pedestal computation. + charge_sigma_low_thr : float + Threshold in charge distribution (number of sigmas below mean) beyond which a waveform + is excluded from pedestal computation. + """ + + super().__init__(subarray=subarray, config=config, parent=parent, *args, **kwargs) + + # initialize members + self._waveformsContainers = None + self._chargesContainers = None + self._wfs_mask = None + self._ped_stats = {} + + # initialize waveforms component + waveformsComponent_kwargs = {} + waveformsComponent_configurable_traits = ComponentUtils.get_configurable_traits( + WaveformsComponent) + for key in kwargs.keys(): + if key in waveformsComponent_configurable_traits.keys(): + waveformsComponent_kwargs[key] = kwargs[key] + self.waveformsComponent = WaveformsComponent( + subarray=subarray, + config=config, + parent=parent, *args, + **waveformsComponent_kwargs, ) + + @staticmethod + def calculate_stats(waveformsContainers, wfs_mask, statistics): + """ + Calculate statistics for the pedestals from a waveforms container. + + Parameters + ---------- + waveformsContainers : `~nectarchain.data.container.WaveformsContainer` + Waveforms container + wfs_mask : `numpy.ndarray` + Mask to apply to exclude outliers with shape (n_pixels,n_samples) + statistics : `list` + Names of the statistics (numpy.ma attributes) to compute + + Returns + ---------- + ped_stats : `dict` + A dictionary containing 3D (n_chan,n_pixels,n_samples) arrays for each statistic + """ + + ped_stats = {} + + for stat in statistics: + # Calculate the statistic along axis = 0, that is over events + ped_stat_hg = getattr(ma, stat)( + ma.masked_array(waveformsContainers.wfs_hg, wfs_mask), axis=0) + ped_stat_lg = getattr(ma, stat)( + ma.masked_array(waveformsContainers.wfs_lg, wfs_mask), axis=0) + + # Create a 3D array for the statistic + array_shape = np.append([N_GAINS], np.shape(waveformsContainers.wfs_hg[0])) + ped_stat = np.zeros(array_shape) + ped_stat[HIGH_GAIN] = ped_stat_hg + ped_stat[LOW_GAIN] = ped_stat_lg + + # Store the result in the dictionary + ped_stats[stat] = ped_stat + + return ped_stats + + def __call__(self, event: NectarCAMDataContainer, *args, **kwargs): + """ + Fill the waveform container looping over the events of type SKY_PEDESTAL. + """ + + if event.trigger.event_type == EventType.SKY_PEDESTAL: + self.waveformsComponent(event=event, *args, **kwargs) + else: + pass + + def timestamp_mask(self, tmin, tmax): + """ + Generates a mask to filter waveforms outside the required time interval + + Parameters + ---------- + tmin : int + Minimum time of the required interval + tmax : int + Maximum time of the required interval + + Returns + ---------- + mask : `~numpy.ndarray` + A boolean array of shape (n_events,n_pixels,n_samples) that identifies waveforms to be masked + """ + # Check if time filtering affects the data + if tmin > self._waveformsContainers.ucts_timestamp.min() or tmax < self._waveformsContainers.ucts_timestamp.max(): + log.info( + f"Apply time interval selection: UCTS timestamps in interval {tmin}-{tmax}") + # Define the mask on UCTS timestamps + t_mask = ((self._waveformsContainers.ucts_timestamp < tmin) | ( + self._waveformsContainers.ucts_timestamp > tmax)) + # Log information + nonzero = np.count_nonzero(t_mask) + log.info(f"{len(t_mask) - nonzero}/{len(t_mask)} waveforms pass time selection.") + # Create waveforms mask to apply time selection + new_mask = np.logical_or(self._wfs_mask, t_mask[:, np.newaxis, np.newaxis]) + else: + log.info( + f"The entire time interval will be used: UCTS timestamps in {tmin}-{tmax}") + new_mask = self._wfs_mask + + # Return waveforms mask + return new_mask + + def waveformsStdFilter_mask(self, threshold): + """ + Generates a mask to filter waveforms that have a standard deviation above a threshold. + This option is effective for dark room verification data. + + Parameters + ---------- + threshold : float + Waveform standard deviation (in ADC counts) above which the waveform is filtered out + + Returns + ---------- + mask : `~numpy.ndarray` + A boolean array of shape (n_events,n_pixels,n_samples) that identifies waveforms to be masked + """ + + # Log + log.info(f"apply {self.filter_method} method with threshold = {threshold} ADC counts") + + # For each event and pixel calculate the waveform std + wfs_std = np.std(self._waveformsContainers.wfs_hg, axis=2) + + # Mask events/pixels that exceed the threshold + std_mask = (wfs_std > threshold) + # Log information + # number of masked waveforms + nonzero = np.count_nonzero(std_mask) + # number of total waveforms + tot_wfs = self._waveformsContainers.nevents * self._waveformsContainers.npixels + # fraction of waveforms masked (%) + frac = 100 * nonzero / tot_wfs + log.info(f"{frac:.2f}% of the waveforms filtered out based on standard deviation.") + + # Create waveforms mask to apply time selection + new_mask = np.logical_or(self._wfs_mask, std_mask[:, :, np.newaxis]) + + return new_mask + + def chargeDistributionFilter_mask(self, sigma_low, sigma_high): + """ + Generates a mask to filter waveforms that have a charge in the tails of the distribution. + This option is useful for data with NSB. + + Parameters + ---------- + sigma_low : float + Number of standard deviation below mean charge beyond which waveforms are filtered out + sigma_high : float + Number of standard deviation above mean charge beyond which waveforms are filtered out + + Returns + ---------- + mask : `~numpy.ndarray` + A boolean array of shape (n_events,n_pixels,n_samples) that identifies waveforms to be masked + """ + + # Log + log.info(f"apply {self.filter_method} method to filter waveforms with charge outside " + f"the interval -{sigma_low}-{sigma_high} standard deviations around the mean value") + + # Check if waveforms and charges have the same shape + wfs_shape = np.shape(self._waveformsContainers.wfs_hg) + charges_shape = np.shape(self._chargesContainers.charges_hg) + if wfs_shape[0] == charges_shape[0] and wfs_shape[1] == charges_shape[1]: + + # For each event and pixel calculate the charge mean and std over all events + # taking into account the mask already calculated + charge_array = ma.masked_array(self._chargesContainers.charges_hg, + np.any(self._wfs_mask, axis=2)) + charge_mean = ma.mean(charge_array, axis=0) + charge_std = ma.std(charge_array, axis=0) + + # Mask events/pixels that are outside the core of the distribution + low_threshold = charge_mean - sigma_low * charge_std + low_mask = (self._chargesContainers.charges_hg < low_threshold[np.newaxis, :]) + high_threshold = charge_mean + sigma_high * charge_std + high_mask = (self._chargesContainers.charges_hg > high_threshold[np.newaxis, :]) + charge_mask = np.logical_or(low_mask, high_mask) + # Log information + # number of masked waveforms + nonzero = np.count_nonzero(charge_mask) + # number of total waveforms + tot_wfs = self._waveformsContainers.nevents * self._waveformsContainers.npixels + # fraction of waveforms masked (%) + frac = 100 * nonzero / tot_wfs + log.info( + f"{frac:.2f}% of the waveforms filtered out based on charge distribution.") + + # Create waveforms mask to apply time selection + new_mask = np.logical_or(self._wfs_mask, charge_mask[:, :, np.newaxis]) + else: + log.warning( + "Waveforms and charges have incompatible shapes. No filtering applied.") + new_mask = self._wfs_mask + + return new_mask + + def finish(self,*args,**kwargs): + """ + Finish the component by filtering the waveforms and calculating the pedestal quantities + """ + + # Use only pedestal type events + waveformsContainers = self.waveformsComponent.finish() + self._waveformsContainers = waveformsContainers.containers[EventType.SKY_PEDESTAL] + + # If we want to filter based on charges distribution + # make sure that the charge distribution container is filled + if self.filter_method == "ChargeDistributionFilter" and \ + self._chargesContainers is None: + log.debug("Compute charges from waveforms") + chargesComponent_kwargs = {} + chargesComponent_configurable_traits = ComponentUtils.get_configurable_traits( + ChargesComponent) + for key in kwargs.keys(): + if key in chargesComponent_configurable_traits.keys(): + chargesComponent_kwargs[key] = kwargs[key] + self._chargesContainers = ChargesComponent.create_from_waveforms( + waveformsContainer=self._waveformsContainers, + subarray=self.subarray, + config=self.config, + parent=self.parent, *args, + **chargesComponent_kwargs, ) + + # Check if waveforms container is empty + if self._waveformsContainers is None: + log.warning("Waveforms container is none, pedestals cannot be evaluated") + # container with no results + return None + elif self._waveformsContainers.nevents is None or self._waveformsContainers.nevents == 0: + log.warning("Waveforms container is empty, pedestals cannot be evaluated") + # container with no results + return None + else: + # Build mask to filter the waveforms + # Mask based on the high gain channel that is most sensitive to signals + # Initialize empty mask + self._wfs_mask = np.zeros(np.shape(self._waveformsContainers.wfs_hg), + dtype=bool) + + # Time selection + # set the minimum time + tmin = np.maximum( + self.ucts_tmin or self._waveformsContainers.ucts_timestamp.min(), + self._waveformsContainers.ucts_timestamp.min()) + print('TMIN',self.ucts_tmin,self._waveformsContainers.ucts_timestamp.min(),tmin) + # set the maximum time + tmax = np.minimum( + self.ucts_tmax or self._waveformsContainers.ucts_timestamp.max(), + self._waveformsContainers.ucts_timestamp.max()) + print('TMAX', self.ucts_tmax, self._waveformsContainers.ucts_timestamp.max(), tmax) + # Add time selection to mask + self._wfs_mask = self.timestamp_mask(tmin, tmax) + + # Filter Waveforms + if self.filter_method is None: + log.info('no filtering applied to waveforms') + elif self.filter_method == 'WaveformsStdFilter': + self._wfs_mask = self.waveformsStdFilter_mask(self.wfs_std_threshold) + elif self.filter_method == 'ChargeDistributionFilter': + self._wfs_mask = self.chargeDistributionFilter_mask(self.charge_sigma_low_thr, + self.charge_sigma_high_thr) + else: + log.warning( + f"required filtering method {self.filter_method} not available") + log.warning("no filtering applied to waveforms") + + # compute statistics for the pedestals + # the statistic names must be valid numpy.ma attributes + statistics = ['mean', 'std'] + self._ped_stats = self.calculate_stats(self._waveformsContainers, + self._wfs_mask, + statistics) + + # calculate the number of events per pixel used to compute te quantitites + # start wit total number of events + nevents = np.ones(len(self._waveformsContainers.pixels_id)) + nevents *= self._waveformsContainers.nevents + # subtract masked events + # use the first sample for each event/pixel + # assumes that a waveform is either fully masked or not + nevents -= np.sum(self._wfs_mask[:, :, 0], axis=0) + + # Fill and return output container + output = NectarCAMPedestalContainer( + nsamples=self._waveformsContainers.nsamples, + nevents=nevents, + pixels_id=self._waveformsContainers.pixels_id, + ucts_timestamp_min=np.uint64(tmin), + ucts_timestamp_max=np.uint64(tmax), + pedestal_mean_hg=self._ped_stats['mean'][HIGH_GAIN], + pedestal_mean_lg=self._ped_stats['mean'][LOW_GAIN], + pedestal_std_hg=self._ped_stats['std'][HIGH_GAIN], + pedestal_std_lg=self._ped_stats['std'][LOW_GAIN], ) + + return output diff --git a/src/nectarchain/makers/component/__init__.py b/src/nectarchain/makers/component/__init__.py index 8ab6a9a6..f7b85ec9 100644 --- a/src/nectarchain/makers/component/__init__.py +++ b/src/nectarchain/makers/component/__init__.py @@ -1,6 +1,7 @@ from .chargesComponent import * from .core import * from .FlatFieldSPEComponent import * +from .PedestalComponent import * from .gainComponent import * from .photostatistic_algorithm import * from .photostatistic_component import * @@ -20,6 +21,7 @@ "FlatFieldCombinedSPEStdNectarCAMComponent", "ChargesComponent", "WaveformsComponent", + "PedestalEstimationComponent", "PhotoStatisticNectarCAMComponent", "PhotoStatisticAlgorithm", ] diff --git a/src/nectarchain/makers/core.py b/src/nectarchain/makers/core.py index 8759034d..08a18fe3 100644 --- a/src/nectarchain/makers/core.py +++ b/src/nectarchain/makers/core.py @@ -217,31 +217,31 @@ def _get_provided_component_kwargs(self, componentName: str): output_component_kwargs[key] = getattr(self, key) return output_component_kwargs - def _init_writer(self, sliced: bool = False, slice_index: int = 0): + def _init_writer(self, sliced: bool = False, slice_index: int = 0, group_name = None): if hasattr(self, "writer"): self.writer.close() if sliced: - if slice_index == 0: - if self.overwrite: - try: - log.info( - f"overwrite set to true, trying to remove file {self.output_path}" - ) - os.remove(self.output_path) - log.info(f"{self.output_path} removed") - except OSError: - pass - else: - if os.path.exists(self.output_path): - raise Exception( - f"file {self.output_path} does exist,\n set overwrite to True if you want to overwrite" - ) + if group_name is None: + if slice_index == 0: + if self.overwrite: + try: + log.info( + f"overwrite set to true, trying to remove file {self.output_path}" + ) + os.remove(self.output_path) + log.info(f"{self.output_path} removed") + except OSError: + pass + else: + if os.path.exists(self.output_path): + raise Exception( + f"file {self.output_path} does exist,\n set overwrite to True if you want to overwrite" + ) + group_name = f"data_{slice_index}" self.log.info( - f"initilization of writter in sliced mode (slice index = {slice_index})" + f"initilization of writer in sliced mode (output written to {group_name})" ) - # self.output_path = pathlib.Path(f"{self.output_path.parent)}/{self.output_path.stem}_1{self.output_path.suffix}" - group_name = f"data_{slice_index}" mode = "a" else: self.log.info("initilization of writter in full mode") @@ -259,7 +259,8 @@ def _init_writer(self, sliced: bool = False, slice_index: int = 0): raise Exception( f"file {self.output_path} does exist,\n set overwrite to True if you want to overwrite" ) - group_name = "data" + if group_name is None: + group_name = "data" mode = "w" try: os.makedirs(self.output_path.parent, exist_ok=True) diff --git a/src/nectarchain/user_scripts/ltibaldo/example_pedestal.py b/src/nectarchain/user_scripts/ltibaldo/example_pedestal.py new file mode 100644 index 00000000..ef276ea6 --- /dev/null +++ b/src/nectarchain/user_scripts/ltibaldo/example_pedestal.py @@ -0,0 +1,36 @@ +import logging +import os +import pathlib + +logging.basicConfig( + format="%(asctime)s %(name)s %(levelname)s %(message)s", level=logging.INFO +) +log = logging.getLogger(__name__) +log.handlers = logging.getLogger("__main__").handlers + +from nectarchain.makers.calibration import ( + PedestalNectarCAMCalibrationTool, +) + +run_number = 3938 +max_events= 9999 +events_per_slice = 1000 +outfile = "/Users/ltibaldo/tmp/test_pedestal/pedestal_{}.h5".format(run_number) + +tool = PedestalNectarCAMCalibrationTool( + progress_bar=True, + run_number=run_number, + max_events=max_events, + events_per_slice=events_per_slice, + log_level=20, + output_path=outfile, + overwrite = True, + filter_method = 'WaveformsStdFilter', + wfs_std_threshold = 4., +) + +tool.initialize() +tool.setup() + +tool.start() +output = tool.finish(return_output_component=True) \ No newline at end of file diff --git a/src/nectarchain/user_scripts/ltibaldo/extract_charges.py b/src/nectarchain/user_scripts/ltibaldo/extract_charges.py new file mode 100644 index 00000000..b942f5ce --- /dev/null +++ b/src/nectarchain/user_scripts/ltibaldo/extract_charges.py @@ -0,0 +1,27 @@ +import logging +import os +import pathlib + +logging.basicConfig( + format="%(asctime)s %(name)s %(levelname)s %(message)s", level=logging.INFO +) +log = logging.getLogger(__name__) +log.handlers = logging.getLogger("__main__").handlers + +from nectarchain.makers import ChargesNectarCAMCalibrationTool + +run_number = 3938 +max_events = 100 + +tool = ChargesNectarCAMCalibrationTool( + progress_bar=True, + method = "FullWaveformSum", + run_number=run_number, + max_events=max_events, + log_level=20, +) + +tool.initialize() +tool.setup() +tool.start() +tool.finish(return_output_component=False) \ No newline at end of file diff --git a/src/nectarchain/user_scripts/ltibaldo/extract_waveforms.py b/src/nectarchain/user_scripts/ltibaldo/extract_waveforms.py new file mode 100644 index 00000000..9767b39b --- /dev/null +++ b/src/nectarchain/user_scripts/ltibaldo/extract_waveforms.py @@ -0,0 +1,24 @@ +import logging + +logging.basicConfig( + format="%(asctime)s %(name)s %(levelname)s %(message)s", level=logging.INFO +) +log = logging.getLogger(__name__) +log.handlers = logging.getLogger("__main__").handlers + +from nectarchain.makers import WaveformsNectarCAMCalibrationTool + +run_number = 3938 +tool = WaveformsNectarCAMCalibrationTool( + progress_bar=True, + run_number=run_number, + max_events=499, + log_level=20, + events_per_slice = 100, +) + +tool.initialize() +tool.setup() + +tool.start() +output = tool.finish() \ No newline at end of file diff --git a/src/nectarchain/user_scripts/ltibaldo/plot_pedestal_output.py b/src/nectarchain/user_scripts/ltibaldo/plot_pedestal_output.py new file mode 100644 index 00000000..6b7c1409 --- /dev/null +++ b/src/nectarchain/user_scripts/ltibaldo/plot_pedestal_output.py @@ -0,0 +1,60 @@ +import numpy as np +import tables +import matplotlib.pyplot as plt + +filename = '/Users/ltibaldo/tmp/test_pedestal/pedestal_3938.h5' +pixel_id = 132 +sample = 13 +nsamples = 60 + +container_name = 'NectarCAMPedestalContainer' + +h5file = tables.open_file(filename) + +# arrays to store slice results +pedestals = np.zeros([len(h5file.root.__members__) - 1, nsamples]) +pedestals_std = np.zeros([len(h5file.root.__members__) - 1, nsamples]) +tmin = np.array([]) +tmax = np.array([]) + +# fill results for plotting +i = 0 +for result in h5file.root.__members__: + table = h5file.root[result]['NectarCAMPedestalContainer'][0] + wf = table['pedestal_mean_hg'][table['pixels_id'] == pixel_id][0] + std = table['pedestal_std_hg'][table['pixels_id'] == pixel_id][0] + if result == 'data_combined': + pedestal_combined = wf + pedestal_std = std + else: + pedestals[i] = wf + pedestals_std[i] = std + tmin = np.append(tmin,table['ucts_timestamp_min']) + tmax = np.append(tmax, table['ucts_timestamp_max']) + i += 1 + +tmean = 0.5 * (tmin + tmax) + +fig1 = plt.figure() +ax1 = plt.axes() +ax1.set_title(f"Pixel {pixel_id}") +ax1.set_xlabel('sample (ns)') +ax1.set_ylabel('pedestal (ADC counts)') +for s in range(len(pedestals)): + ax1.plot(pedestals[s],color='0.5',alpha=0.2) +ax1.plot(pedestal_combined,color='r') + +fig2 = plt.figure() +ax2 = plt.axes() +ax2.set_title(f"Pixel {pixel_id} Sample {sample}") +ax2.set_xlabel('UCTS timestamp') +ax2.set_ylabel('pedestal (ADC counts)') +ax2.errorbar(tmean,pedestals[:,sample], + xerr=[tmean-tmin,tmax-tmean], + yerr=[pedestals_std[:,sample]], + fmt = 'o', color='k',capsize=0.) +ax2.axhspan(pedestal_combined[sample]-pedestal_std[sample], + pedestal_combined[sample]+pedestal_std[sample], + facecolor='r', alpha=0.2, zorder=0) + +plt.show() \ No newline at end of file