Added support for NAVGEM files which include a text header (#115)

This fix ignores everything before the first GRIB2 message. It moves byte by byte within the first 2k bytes testing for "GRIB" that indicates begining of the first GRIB2 message. Co-authored-by: Tim Cera <[email protected]>
NOAA-MDL · Jan 5, 2024 · d6365a8 · d6365a8
1 parent 52f1be3
commit d6365a8
Showing 1 changed file with 16 additions and 1 deletion.
diff --git a/grib2io/_grib2io.py b/grib2io/_grib2io.py
@@ -197,7 +197,22 @@ def _build_index(self, no_data=False):
             try:
                 # Read first 4 bytes and decode...looking for "GRIB"
                 pos = self._filehandle.tell()
-                header = struct.unpack('>i',self._filehandle.read(4))[0]
+
+                # Ignore headers (usually text) that are not part of the GRIB2
+                # file.  For example, NAVGEM files have a http header at the
+                # beginning that needs to be ignored.
+
+                # Read a byte at a time until "GRIB" is found.  Using
+                # "wgrib2" on a NAVGEM file, the header was 421 bytes and
+                # decided to go to 2048 bytes to be safe. For normal GRIB2
+                # files this should be quick and break out of the first
+                # loop when "test_offset" is 0.
+                for test_offset in range(2048):
+                    self._filehandle.seek(pos + test_offset)
+                    header = struct.unpack(">i", self._filehandle.read(4))[0]
+                    if header.to_bytes(4, "big") == b"GRIB":
+                        pos = pos + test_offset
+                        break
 
                 # Test header. Then get information from GRIB2 Section 0: the discipline
                 # number, edition number (should always be 2), and GRIB2 message size.