From cd4ba71091b82f87f0497f4e25f54eee210c201c Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Mon, 7 Nov 2022 14:05:38 -0700 Subject: [PATCH 1/6] hdfio: If the initial write fails with a BlockIOError try again after 1 second. --- pyiron_base/database/filetable.py | 10 ++--- pyiron_base/jobs/job/generic.py | 10 ++--- pyiron_base/storage/hdfio.py | 61 +++++++++++++++++++++---------- 3 files changed, 52 insertions(+), 29 deletions(-) diff --git a/pyiron_base/database/filetable.py b/pyiron_base/database/filetable.py index 26fbbf287..1a813f552 100644 --- a/pyiron_base/database/filetable.py +++ b/pyiron_base/database/filetable.py @@ -2,10 +2,10 @@ import os import pandas import datetime -import h5io from pyfileindex import PyFileIndex from pyiron_base.interfaces.singleton import Singleton from pyiron_base.database.generic import IsDatabase +from pyiron_base.storage.hdfio import write_hdf5, read_hdf5 table_columns = { "id": None, @@ -322,7 +322,7 @@ def get_job_status(self, job_id): def set_job_status(self, job_id, status): db_entry = self.get_item_by_id(item_id=job_id) self._job_table.loc[self._job_table.id == job_id, "status"] = status - h5io.write_hdf5( + write_hdf5( db_entry["project"] + db_entry["subjob"] + ".h5", status, title=db_entry["subjob"][1:] + "/status", @@ -356,15 +356,15 @@ def get_extract(path, mtime): def get_hamilton_from_file(hdf5_file, job_name): - return h5io.read_hdf5(hdf5_file, job_name + "/TYPE").split(".")[-1].split("'")[0] + return read_hdf5(hdf5_file, job_name + "/TYPE").split(".")[-1].split("'")[0] def get_hamilton_version_from_file(hdf5_file, job_name): - return h5io.read_hdf5(hdf5_file, job_name + "/VERSION") + return read_hdf5(hdf5_file, job_name + "/VERSION") def get_job_status_from_file(hdf5_file, job_name): if os.path.exists(hdf5_file): - return h5io.read_hdf5(hdf5_file, job_name + "/status") + return read_hdf5(hdf5_file, job_name + "/status") else: return None diff --git a/pyiron_base/jobs/job/generic.py b/pyiron_base/jobs/job/generic.py index 8ba0dbdb3..bd2728891 100644 --- a/pyiron_base/jobs/job/generic.py +++ b/pyiron_base/jobs/job/generic.py @@ -8,7 +8,6 @@ from datetime import datetime import os import posixpath -import h5io import signal import warnings @@ -46,6 +45,7 @@ from pyiron_base.utils.deprecate import deprecate from pyiron_base.jobs.job.extension.server.generic import Server from pyiron_base.database.filetable import FileTable +from pyiron_base.storage.hdfio import write_hdf5, read_hdf5 __author__ = "Joerg Neugebauer, Jan Janssen" __copyright__ = ( @@ -182,12 +182,12 @@ def __init__(self, project, job_name): self._status = JobStatus(db=project.db, job_id=self.job_id) self.refresh_job_status() elif os.path.exists(self.project_hdf5.file_name): - initial_status = h5io.read_hdf5( + initial_status = read_hdf5( self.project_hdf5.file_name, job_name + "/status" ) self._status = JobStatus(initial_status=initial_status) if "job_id" in self.list_nodes(): - self._job_id = h5io.read_hdf5( + self._job_id = read_hdf5( self.project_hdf5.file_name, job_name + "/job_id" ) else: @@ -470,7 +470,7 @@ def refresh_job_status(self): ) elif state.database.database_is_disabled: self._status = JobStatus( - initial_status=h5io.read_hdf5( + initial_status=read_hdf5( self.project_hdf5.file_name, self.job_name + "/status" ) ) @@ -1132,7 +1132,7 @@ def save(self): if not state.database.database_is_disabled: job_id = self.project.db.add_item_dict(self.db_entry()) self._job_id = job_id - h5io.write_hdf5( + write_hdf5( self.project_hdf5.file_name, job_id, title=self.job_name + "/job_id", diff --git a/pyiron_base/storage/hdfio.py b/pyiron_base/storage/hdfio.py index 8cb5247f3..9895e9255 100644 --- a/pyiron_base/storage/hdfio.py +++ b/pyiron_base/storage/hdfio.py @@ -150,7 +150,7 @@ def __getitem__(self, item): # underlying file once, this reduces the number of file opens in the most-likely case from 2 to 1 (1 to # check whether the data is there and 1 to read it) and increases in the worst case from 1 to 2 (1 to # try to read it here and one more time to verify it's not a group below). - obj = h5io.read_hdf5(self.file_name, title=self._get_h5_path(item)) + obj = read_hdf5(self.file_name, title=self._get_h5_path(item)) if self._is_convertable_dtype_object_array(obj): obj = self._convert_dtype_obj_array(obj.copy()) return obj @@ -258,23 +258,13 @@ def __setitem__(self, key, value): use_json = False elif isinstance(value, tuple): value = list(value) - try: - h5io.write_hdf5( - self.file_name, - value, - title=self._get_h5_path(key), - overwrite="update", - use_json=use_json, - ) - except BlockingIOError: - time.sleep(1) - h5io.write_hdf5( - self.file_name, - value, - title=self._get_h5_path(key), - overwrite="update", - use_json=use_json, - ) + write_hdf5( + self.file_name, + value, + title=self._get_h5_path(key), + overwrite="update", + use_json=use_json, + ) def __delitem__(self, key): """ @@ -912,7 +902,7 @@ def _read(self, item): Returns: dict, list, float, int: data or data object """ - return h5io.read_hdf5(self.file_name, title=self._get_h5_path(item)) + return read_hdf5(self.file_name, title=self._get_h5_path(item)) # def _open_store(self, mode="r"): # """ @@ -1481,3 +1471,36 @@ def create_project_from_hdf5(self): Project: pyiron project object """ return self._project.__class__(path=self.file_path) + + +def read_hdf5(fname, title='h5io', slash='ignore') + try: + return h5io.read_hdf5(fname=fname, title=title, slash=slash) + except BlockingIOError: + time.sleep(1) + return h5io.read_hdf5(fname=fname, title=title, slash=slash) + + +def write_hdf5(fname, data, overwrite=False, compression=4, + title='h5io', slash='error', use_json=False): + try: + h5io.write_hdf5( + fname=fname, + data=data, + overwrite=overwrite, + compression=compression, + title=title, + slash=slash, + use_json=use_json + ) + except BlockingIOError: + time.sleep(1) + h5io.write_hdf5( + fname=fname, + data=data, + overwrite=overwrite, + compression=compression, + title=title, + slash=slash, + use_json=use_json + ) \ No newline at end of file From fc7c5aa448b98af0863685a20ed3226cdf39b34e Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Mon, 7 Nov 2022 14:08:14 -0700 Subject: [PATCH 2/6] fix function definition --- pyiron_base/storage/hdfio.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyiron_base/storage/hdfio.py b/pyiron_base/storage/hdfio.py index 9895e9255..a4c9aa550 100644 --- a/pyiron_base/storage/hdfio.py +++ b/pyiron_base/storage/hdfio.py @@ -1473,7 +1473,7 @@ def create_project_from_hdf5(self): return self._project.__class__(path=self.file_path) -def read_hdf5(fname, title='h5io', slash='ignore') +def read_hdf5(fname, title='h5io', slash='ignore'): try: return h5io.read_hdf5(fname=fname, title=title, slash=slash) except BlockingIOError: From ab97f8e7eee2e81d245596f9267ffac901907da1 Mon Sep 17 00:00:00 2001 From: pyiron-runner Date: Mon, 7 Nov 2022 21:10:15 +0000 Subject: [PATCH 3/6] Format black --- pyiron_base/storage/hdfio.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/pyiron_base/storage/hdfio.py b/pyiron_base/storage/hdfio.py index a4c9aa550..e104cb972 100644 --- a/pyiron_base/storage/hdfio.py +++ b/pyiron_base/storage/hdfio.py @@ -1473,7 +1473,7 @@ def create_project_from_hdf5(self): return self._project.__class__(path=self.file_path) -def read_hdf5(fname, title='h5io', slash='ignore'): +def read_hdf5(fname, title="h5io", slash="ignore"): try: return h5io.read_hdf5(fname=fname, title=title, slash=slash) except BlockingIOError: @@ -1481,8 +1481,15 @@ def read_hdf5(fname, title='h5io', slash='ignore'): return h5io.read_hdf5(fname=fname, title=title, slash=slash) -def write_hdf5(fname, data, overwrite=False, compression=4, - title='h5io', slash='error', use_json=False): +def write_hdf5( + fname, + data, + overwrite=False, + compression=4, + title="h5io", + slash="error", + use_json=False, +): try: h5io.write_hdf5( fname=fname, @@ -1491,7 +1498,7 @@ def write_hdf5(fname, data, overwrite=False, compression=4, compression=compression, title=title, slash=slash, - use_json=use_json + use_json=use_json, ) except BlockingIOError: time.sleep(1) @@ -1502,5 +1509,5 @@ def write_hdf5(fname, data, overwrite=False, compression=4, compression=compression, title=title, slash=slash, - use_json=use_json - ) \ No newline at end of file + use_json=use_json, + ) From c1367ef74ea646e858ee937bbabc2d2bf124b099 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Mon, 7 Nov 2022 14:14:16 -0700 Subject: [PATCH 4/6] Call the function itself again --- pyiron_base/storage/hdfio.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyiron_base/storage/hdfio.py b/pyiron_base/storage/hdfio.py index e104cb972..5cfb69ee4 100644 --- a/pyiron_base/storage/hdfio.py +++ b/pyiron_base/storage/hdfio.py @@ -1478,7 +1478,7 @@ def read_hdf5(fname, title="h5io", slash="ignore"): return h5io.read_hdf5(fname=fname, title=title, slash=slash) except BlockingIOError: time.sleep(1) - return h5io.read_hdf5(fname=fname, title=title, slash=slash) + return read_hdf5(fname=fname, title=title, slash=slash) def write_hdf5( @@ -1502,7 +1502,7 @@ def write_hdf5( ) except BlockingIOError: time.sleep(1) - h5io.write_hdf5( + write_hdf5( fname=fname, data=data, overwrite=overwrite, From d2d598b1ff28811fd3f7b1028ff55197b49d8d60 Mon Sep 17 00:00:00 2001 From: Jan Janssen Date: Mon, 21 Nov 2022 07:31:33 -0700 Subject: [PATCH 5/6] Add counter and warning to logger --- pyiron_base/storage/hdfio.py | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/pyiron_base/storage/hdfio.py b/pyiron_base/storage/hdfio.py index 5cfb69ee4..a135b7dd6 100644 --- a/pyiron_base/storage/hdfio.py +++ b/pyiron_base/storage/hdfio.py @@ -1473,12 +1473,16 @@ def create_project_from_hdf5(self): return self._project.__class__(path=self.file_path) -def read_hdf5(fname, title="h5io", slash="ignore"): +def read_hdf5(fname, title="h5io", slash="ignore", _counter=0): try: return h5io.read_hdf5(fname=fname, title=title, slash=slash) except BlockingIOError: - time.sleep(1) - return read_hdf5(fname=fname, title=title, slash=slash) + state.logger.warn("Two or more processes tried to access the file " + fname + ". Try again in 1sec. It is the " + _counter + " time.") + if _counter < 10: + time.sleep(1) + return read_hdf5(fname=fname, title=title, slash=slash, _counter=_counter+1) + else: + raise BlockingIOError("Tried 10 times, but still get a BlockingIOError") def write_hdf5( @@ -1489,6 +1493,7 @@ def write_hdf5( title="h5io", slash="error", use_json=False, + _counter=0 ): try: h5io.write_hdf5( @@ -1501,13 +1506,18 @@ def write_hdf5( use_json=use_json, ) except BlockingIOError: - time.sleep(1) - write_hdf5( - fname=fname, - data=data, - overwrite=overwrite, - compression=compression, - title=title, - slash=slash, - use_json=use_json, - ) + state.logger.warn("Two or more processes tried to access the file " + fname + ". Try again in 1sec. It is the " + _counter + " time.") + if _counter < 10: + time.sleep(1) + write_hdf5( + fname=fname, + data=data, + overwrite=overwrite, + compression=compression, + title=title, + slash=slash, + use_json=use_json, + _counter=_counter+1 + ) + else: + raise BlockingIOError("Tried 10 times, but still get a BlockingIOError") From 1231944f8cad7b1262996ce72a67355ae9aeff19 Mon Sep 17 00:00:00 2001 From: pyiron-runner Date: Mon, 21 Nov 2022 14:34:00 +0000 Subject: [PATCH 6/6] Format black --- pyiron_base/storage/hdfio.py | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) diff --git a/pyiron_base/storage/hdfio.py b/pyiron_base/storage/hdfio.py index a135b7dd6..0de4f8910 100644 --- a/pyiron_base/storage/hdfio.py +++ b/pyiron_base/storage/hdfio.py @@ -1477,10 +1477,18 @@ def read_hdf5(fname, title="h5io", slash="ignore", _counter=0): try: return h5io.read_hdf5(fname=fname, title=title, slash=slash) except BlockingIOError: - state.logger.warn("Two or more processes tried to access the file " + fname + ". Try again in 1sec. It is the " + _counter + " time.") + state.logger.warn( + "Two or more processes tried to access the file " + + fname + + ". Try again in 1sec. It is the " + + _counter + + " time." + ) if _counter < 10: time.sleep(1) - return read_hdf5(fname=fname, title=title, slash=slash, _counter=_counter+1) + return read_hdf5( + fname=fname, title=title, slash=slash, _counter=_counter + 1 + ) else: raise BlockingIOError("Tried 10 times, but still get a BlockingIOError") @@ -1493,7 +1501,7 @@ def write_hdf5( title="h5io", slash="error", use_json=False, - _counter=0 + _counter=0, ): try: h5io.write_hdf5( @@ -1506,7 +1514,13 @@ def write_hdf5( use_json=use_json, ) except BlockingIOError: - state.logger.warn("Two or more processes tried to access the file " + fname + ". Try again in 1sec. It is the " + _counter + " time.") + state.logger.warn( + "Two or more processes tried to access the file " + + fname + + ". Try again in 1sec. It is the " + + _counter + + " time." + ) if _counter < 10: time.sleep(1) write_hdf5( @@ -1517,7 +1531,7 @@ def write_hdf5( title=title, slash=slash, use_json=use_json, - _counter=_counter+1 + _counter=_counter + 1, ) else: raise BlockingIOError("Tried 10 times, but still get a BlockingIOError")