From 7d0ac0182fd1da6e1a2ead5896c394a0fd316835 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=B8rgen=20Kvalsvik?= Date: Thu, 16 May 2019 23:32:48 +0200 Subject: [PATCH] Add interface for trace groups The group is an implementation of the core idea of [1], albeit slightly generalised. In effect, it allows users on unstructured data group traces with a matching subset of header words, adding structured access to a large set of previously unsupported [2] files. [1] https://github.com/equinor/segyio/issues/316 [2] Unsupported in the sense that only "a bunch of traces" was usable --- python/CMakeLists.txt | 1 + python/examples/make-shot-gather.py | 41 +++++ python/segyio/gather.py | 256 ++++++++++++++++++++++++++++ python/segyio/segy.py | 4 + python/test/segy.py | 124 ++++++++++++++ test-data/shot-gather.sgy | Bin 0 -> 24340 bytes 6 files changed, 426 insertions(+) create mode 100644 python/examples/make-shot-gather.py create mode 100644 test-data/shot-gather.sgy diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index a21822c5d..13052d19b 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -182,3 +182,4 @@ add_test(NAME python.example.subcube COMMAND ${python} copy-sub-cube.py tes add_test(NAME python.example.rotate COMMAND ${python} make-rotated-copies.py test-data/small.sgy ex-rotate.sgy) add_test(NAME python.example.scan_min_max COMMAND ${python} scan_min_max.py test-data/small.sgy) add_test(NAME python.example.multi-text COMMAND ${python} make-multiple-text.py test-data/multi-text.sgy) +add_test(NAME python.example.shot-gather COMMAND ${python} make-shot-gather.py test-data/shot-gather.py) diff --git a/python/examples/make-shot-gather.py b/python/examples/make-shot-gather.py new file mode 100644 index 000000000..afff1164e --- /dev/null +++ b/python/examples/make-shot-gather.py @@ -0,0 +1,41 @@ +import sys +import numpy as np +import segyio + +def main(): + if len(sys.argv) < 2: + sys.exit('Usage: {} [output]'.format(sys.argv[0])) + + spec = segyio.spec() + filename = sys.argv[1] + + spec.format = 1 + spec.samples = range(25) + spec.tracecount = 61 + + with segyio.create(filename, spec) as f: + + trno = 0 + keys = [2, 3, 5, 8] + lens = [10, 12, 13, 26] + + for key, length in zip(keys, lens): + for i in range(length): + + f.header[trno].update( + offset = 1, + fldr = key, + grnofr = (i % 2) + 1, + tracf = spec.tracecount - i, + ) + + trace = np.linspace(start = key, + stop = key + 1, + num = len(spec.samples)) + f.trace[trno] = trace + trno += 1 + + f.bin.update() + +if __name__ == '__main__': + main() diff --git a/python/segyio/gather.py b/python/segyio/gather.py index e7705b9b3..2537bba7f 100644 --- a/python/segyio/gather.py +++ b/python/segyio/gather.py @@ -1,6 +1,10 @@ +import collections import itertools import numpy as np +try: from future_builtins import zip +except ImportError: pass + import segyio.tools as tools class Gather(object): @@ -173,3 +177,255 @@ def gen(): yield iline[:, xind] return gen() + +class Group(object): + """ + The inner representation of the Groups abstraction provided by Group. + + A collection of trace indices that have identical `key`. + + Notes + ----- + .. versionadded:: 1.9 + """ + def __init__(self, key, parent, index): + self.parent = parent + self.index = index + self.key = key + + @property + def header(self): + """ + A generator of the the read-only headers in this group + + Returns + ------- + headers : iterator of Header + + Notes + ----- + The generator respects the order of the index - to iterate over headers + in a different order, the index attribute can be re-organised. + + .. versionadded:: 1.9 + """ + source = self.parent.header + for i in self.index: + yield source[i] + + @property + def trace(self): + """ + A generator of the the read-only traces in this group + + Returns + ------- + headers : iterator of Header + + Notes + ----- + The generator respects the order of the index - to iterate over headers + in a different order, the index attribute can be re-organised. + + .. versionadded:: 1.9 + """ + source = self.parent.trace + for i in self.index: + yield source[i] + + def sort(self, fields): + """ + Sort the traces in the group, obeying the `fields` order of + most-to-least significant word. + """ + # TODO: examples + + headers = [dict(self.parent.header[i]) for i in self.index] + index = list(zip(headers, self.index)) + # sorting is stable, so sort the whole set by field, applied in the + # reverse order: + for field in reversed(fields): + index.sort(key = lambda x: x[0][field]) + + # strip off all the headers + index = [i for _, i in index] + self.index = index + +class Groups(collections.Mapping): + """ + The Groups implements the dict interface, grouping all traces that match a + given `fingerprint`. The fingerprint is a signature derived from a set of + trace header words, called a `key`. + + Consider a file with five traces, and some selected header words: + 0: {offset: 1, fldr: 1} + 1: {offset: 1, fldr: 2} + 2: {offset: 1, fldr: 1} + 3: {offset: 2, fldr: 1} + 4: {offset: 1, fldr: 2} + + With key = (offset, fldr), there are 3 groups: + {offset: 1, fldr: 1 }: [0, 2] + {offset: 1, fldr: 2 }: [1, 4] + {offset: 2, fldr: 1 }: [3] + + With a key = offset, there are 2 groups: + {offset: 1}: [0, 1, 2, 4] + {offset: 2}: [3] + + The Groups class is intended to easily process files without the rigid + in/crossline structure of iline/xline/gather, but where there is sufficient + structure in the headers. This is common for some types of pre-stack data, + shot gather data etc. + + Notes + ----- + .. versionadded:: 1.9 + """ + # TODO: only group in range of traces? + # TODO: cache header dicts? + def __init__(self, trace, header, key): + bins = collections.OrderedDict() + for i, h in enumerate(header[:]): + k = self.fingerprint(h[key]) + if k in bins: + bins[k].append(i) + else: + bins[k] = [i] + + self.trace = trace + self.header = header + self.key = key + self.bins = bins + + @staticmethod + def normalize_keys(items): + """ + Normalize the key representation to integers, so that they're hashable, + even when a key is built with enumerators. + + This function is intended for internal use, and provides the mapping + from accepted key representation to a canonical key. + + Parameters + ---------- + items : iterator of (int_like, array_like) + + Returns + ------- + items : generator of (int, array_like) + + Warnings + -------- + This function provides no guarantees for value and type compatibility, + even between minor versions. + + Notes + ----- + .. versionadded:: 1.9 + """ + return ((int(k), v) for k, v in items) + + @staticmethod + def fingerprint(key): + """ + Compute a hashable fingerprint for a key. This function is intended for + internal use. Relies on normalize_keys for transforming keys to + canonical form. The output of this function is used for the group -> + index mapping. + + Parameters + ---------- + key : int_like or dict of {int_like: int} or iterable of (int_like,int) + + Returns + ------- + key + A normalized canonical representation of key + + Warnings + -------- + This function provides no guarantees for value and type compatibility, + even between minor versions. + + Notes + ----- + .. versionadded:: 1.9 + """ + try: + return int(key) + except TypeError: + pass + + try: + items = key.items() + except AttributeError: + items = iter(key) + + # map k -> tracefield -> int + items = Groups.normalize_keys(items) + return tuple(sorted(items)) + + def __len__(self): + """x.__len__() <==> len(x)""" + return len(self.bins) + + def __contains__(self, key): + """x.__len__() <==> len(x)""" + return self.fingerprint(key) in self.bins + + def __getitem__(self, key): + """g[key] + + Read the group associated with key. + + Key can be any informal mapping between a header word (TraceField, su + header words, or raw integers) and a value. + + Parameters + ---------- + key + + Returns + ------- + group : Group + + Notes + ----- + .. versionadded:: 1.9 + + Examples + -------- + + Group on FieldRecord, and get the group FieldRecord == 5: + + >>> fr = segyio.TraceField.FieldRecord + >>> records = f.group(fr) + >>> record5 = records[5] + """ + key = self.fingerprint(key) + return Group(key, self, self.bins[key]) + + def values(self): + for key, index in self.bins.items(): + yield Group(key, self, index) + + def items(self): + for key, index in self.bins.items(): + yield key, Group(key, self, index) + + def __iter__(self): + return self.bins.keys() + + def sort(self, fields): + """ + Reorganise the indices in all groups by fields + """ + bins = collections.OrderedDict() + + for key, index in self.bins.items(): + g = Group(key, self, index) + g.sort(fields) + bins[key] = g.index + + self.bins = bins diff --git a/python/segyio/segy.py b/python/segyio/segy.py index 30e47c871..1e77b86d0 100644 --- a/python/segyio/segy.py +++ b/python/segyio/segy.py @@ -964,6 +964,10 @@ def interpret(self, ilines, xlines, offsets=None, sorting=TraceSortingFormat.INL return self + def group(self, word): + from .gather import Groups + return Groups(self.trace, self.header, word) + class spec(object): def __init__(self): diff --git a/python/test/segy.py b/python/test/segy.py index e088399c5..31fae2d8c 100644 --- a/python/test/segy.py +++ b/python/test/segy.py @@ -1723,3 +1723,127 @@ def test_interpret_invalid_args(): il = [1, 2, 3, 4, 4] xl = [20, 21, 22, 23, 24] f.interpret(il, xl, sorting=0) + +def test_group_single_key(): + with segyio.open('test-data/shot-gather.sgy', ignore_geometry = True) as f: + group = f.group(segyio.su.fldr) + assert len(group) == 4 + + assert 2 in group + assert 4 not in group + + expected_keys = [2, 3, 5, 8] + + for key, shot in zip(expected_keys, group.values()): + assert key == shot.key + + for header in shot.header: + assert key == header[segyio.su.fldr] + + for trace in shot.trace: + assert key == trace[0] + +def test_group_key_variations(): + with segyio.open('test-data/shot-gather.sgy', ignore_geometry = True) as f: + group = f.group((segyio.su.fldr, segyio.su.grnofr)) + # both dict and iterator-of-pair accepted + assert { segyio.su.fldr: 2, segyio.su.grnofr: 1 } in group + assert ((segyio.su.fldr, 2), (segyio.su.grnofr, 1)) in group + # not order sensitive + assert ((segyio.su.grnofr, 1), (segyio.su.fldr, 2)) in group + assert { segyio.su.fldr: 4, segyio.su.grnofr: 1 } not in group + + +def test_group_multi_key_corret_index(): + with segyio.open('test-data/shot-gather.sgy', ignore_geometry = True) as f: + group = f.group((segyio.su.fldr, segyio.su.grnofr)) + assert len(group) == 8 + + expected_keys = [ + ((segyio.su.fldr, 2), (segyio.su.grnofr, 1)), + ((segyio.su.fldr, 2), (segyio.su.grnofr, 2)), + ((segyio.su.fldr, 3), (segyio.su.grnofr, 1)), + ((segyio.su.fldr, 3), (segyio.su.grnofr, 2)), + ((segyio.su.fldr, 5), (segyio.su.grnofr, 1)), + ((segyio.su.fldr, 5), (segyio.su.grnofr, 2)), + ((segyio.su.fldr, 8), (segyio.su.grnofr, 1)), + ((segyio.su.fldr, 8), (segyio.su.grnofr, 2)), + ] + + indices = [ + # fldr = 2, grnofr = 1 + [0, 2, 4, 6, 8], + # fldr = 2, grnofr = 2 + [1, 3, 5, 7, 9], + + # fldr = 3, grnofr = 1 + [10, 12, 14, 16, 18, 20], + # fldr = 3, grnofr = 2 + [11, 13, 15, 17, 19, 21], + + # fldr = 5, grnofr = 1 + [22, 24, 26, 28, 30, 32, 34], + # fldr = 5, grnofr = 2 + [23, 25, 27, 29, 31, 33], + + # fldr = 8, grnofr = 1 + [35, 37, 39, 41, 43, 45, 47, 49, 51, 53, 55, 57, 59], + # fldr = 8, grnofr = 2 + [36, 38, 40, 42, 44, 46, 48, 50, 52, 54, 56, 58, 60], + ] + + # this test checks that every individual group maps to the known index + # it's supposed to model. it would be more elegant to represent the + # indices as a key -> index dict, but that makes verifying that every + # key/index pair is covered uglier + assert len(expected_keys) == len(indices) + + for key, index, shot in zip(expected_keys, indices, group.values()): + assert index == shot.index + assert key == shot.key + +def test_specific_group_sort(): + with segyio.open('test-data/shot-gather.sgy', ignore_geometry = True) as f: + group = f.group((segyio.su.fldr, segyio.su.grnofr)) + + left_key = ((segyio.su.fldr, 2), (segyio.su.grnofr, 1)) + left_shot = group[left_key] + left_shot.sort([segyio.su.tracf, segyio.su.grnofr, segyio.su.fldr]) + # tracf is descending, so sorting by it ascending should reverse order + assert left_shot.index == [8, 6, 4, 2, 0] + + right_key = ((segyio.su.fldr, 2), (segyio.su.grnofr, 2)) + right_shot = group[right_key] + # the next shot is untouched + assert right_shot.index == [1, 3, 5, 7, 9] + +def test_all_group_sort(): + with segyio.open('test-data/shot-gather.sgy', ignore_geometry = True) as f: + group = f.group((segyio.su.fldr, segyio.su.grnofr)) + group.sort([segyio.su.tracf, segyio.su.grnofr, segyio.su.fldr]) + + left_key = ((segyio.su.fldr, 2), (segyio.su.grnofr, 1)) + left_shot = group[left_key] + assert left_shot.index == [8, 6, 4, 2, 0] + + right_key = ((segyio.su.fldr, 2), (segyio.su.grnofr, 2)) + right_shot = group[right_key] + assert right_shot.index == [9, 7, 5, 3, 1] + +def test_groups_gather_equivalence(): + with segyio.open('test-data/small.sgy') as f: + groups = f.group((segyio.su.iline, segyio.su.xline)) + key = ((segyio.su.iline, 1), (segyio.su.xline, 21)) + group = groups[key] + assert len(group.index) == 1 + + # stack gathers in a list-of-traces, but there should only be 1 in this + # group + from_group = np.stack(groups[key].trace) + assert from_group.shape == (1, 50) + + from_group = from_group[0] + from_gather = f.gather[1, 21] + + # group[(il, xl)] == gather[il, xl] + npt.assert_array_equal(from_group, from_gather) diff --git a/test-data/shot-gather.sgy b/test-data/shot-gather.sgy new file mode 100644 index 0000000000000000000000000000000000000000..0e93849dedc62140e1b7e3762de041140b12a809 GIT binary patch literal 24340 zcmeI4OHUI~6vwN?#IN8E5tXOB1bKVs1DGfZBBFNfM{tG6QxQ!J7`L(^_$kWm7`w(6 z*|x;6?971Jo&QmjKAdWYo}A8{c-j2s_Ri_Of6k%&4`j5mR6Le3m7TS6(d<#w`lhNG zTG=X=REebCGh=BJpkkkQcM_TXxT(x9`>~z$AFu1wemaxTD)D*OjK`ANEZBfnj{5p; zd1GUKP0enqH_Nl{=NC7Z*VT=;n;T2(YYz(6R)Sg`R3e8( zTsl?NwN~G&y1(>ht#YMPRb6Wlc%=~^O>l{b3ht}UBYsY;1-y-_mRers$^5;`N(l62$aeY|C^!fY8(CF0Wf~kGzP&#V7Mb=7(41%cDf2Rktl z?85zf5wnkiTk{+Qr}2#IZ<50A^J;nq2bAykZz4}`!kgcMHy;C!fp3HFfbW3E!4u#K z@LljEcoKXMJO!Qt-v>{Fr@;@v55W(?kHFEQG;uYR@AvsNmGAfYHI?u8`8AdA_xUxI z@AvsNmGAfYHI?u8`8AdA_xUxI@AvsNmGAfYHI?u8`8Dk-a;y1^VDwN3c46>pdKMDO z@1e|~-!ZfG!FY;($1Hdj{0uw?o&(Q=7r+bPMeq`M3A_w`4t@@P0bT*GfM0@F!K>g` z;Me&?1i6~Z@1gK(D!+%quc`bV3csfEdno*x%I~4@Ybw8o!mp|P9tyvv@_Q)!n#%8? z@M|i+hr+L^{2mIwrt*6z{F-(~yaz!?$UCsFS&IMdnL)pUw};YE=8VD6lwcPIzovL% zg6AMOjb~hcli|$M_A-YIhNc9&s6n3MDIcDL;543b{Y{25Pg~0zG8mc??4kyFiYL=} z4uaEo#`QND&OB`{bI4$5O0bI>