Skip to content

Commit

Permalink
Add interface for trace groups
Browse files Browse the repository at this point in the history
The group is an implementation of the core idea of [1], albeit slightly
generalised. In effect, it allows users on unstructured data group
traces with a matching subset of header words, adding structured access
to a large set of previously unsupported [2] files.

[1] #316
[2] Unsupported in the sense that only "a bunch of traces" was usable
  • Loading branch information
jokva committed May 18, 2019
1 parent fdb6c65 commit 7d0ac01
Show file tree
Hide file tree
Showing 6 changed files with 426 additions and 0 deletions.
1 change: 1 addition & 0 deletions python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -182,3 +182,4 @@ add_test(NAME python.example.subcube COMMAND ${python} copy-sub-cube.py tes
add_test(NAME python.example.rotate COMMAND ${python} make-rotated-copies.py test-data/small.sgy ex-rotate.sgy)
add_test(NAME python.example.scan_min_max COMMAND ${python} scan_min_max.py test-data/small.sgy)
add_test(NAME python.example.multi-text COMMAND ${python} make-multiple-text.py test-data/multi-text.sgy)
add_test(NAME python.example.shot-gather COMMAND ${python} make-shot-gather.py test-data/shot-gather.py)
41 changes: 41 additions & 0 deletions python/examples/make-shot-gather.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
import sys
import numpy as np
import segyio

def main():
if len(sys.argv) < 2:
sys.exit('Usage: {} [output]'.format(sys.argv[0]))

spec = segyio.spec()
filename = sys.argv[1]

spec.format = 1
spec.samples = range(25)
spec.tracecount = 61

with segyio.create(filename, spec) as f:

trno = 0
keys = [2, 3, 5, 8]
lens = [10, 12, 13, 26]

for key, length in zip(keys, lens):
for i in range(length):

f.header[trno].update(
offset = 1,
fldr = key,
grnofr = (i % 2) + 1,
tracf = spec.tracecount - i,
)

trace = np.linspace(start = key,
stop = key + 1,
num = len(spec.samples))
f.trace[trno] = trace
trno += 1

f.bin.update()

if __name__ == '__main__':
main()
256 changes: 256 additions & 0 deletions python/segyio/gather.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import collections
import itertools
import numpy as np

try: from future_builtins import zip
except ImportError: pass

import segyio.tools as tools

class Gather(object):
Expand Down Expand Up @@ -173,3 +177,255 @@ def gen():
yield iline[:, xind]

return gen()

class Group(object):
"""
The inner representation of the Groups abstraction provided by Group.
A collection of trace indices that have identical `key`.
Notes
-----
.. versionadded:: 1.9
"""
def __init__(self, key, parent, index):
self.parent = parent
self.index = index
self.key = key

@property
def header(self):
"""
A generator of the the read-only headers in this group
Returns
-------
headers : iterator of Header
Notes
-----
The generator respects the order of the index - to iterate over headers
in a different order, the index attribute can be re-organised.
.. versionadded:: 1.9
"""
source = self.parent.header
for i in self.index:
yield source[i]

@property
def trace(self):
"""
A generator of the the read-only traces in this group
Returns
-------
headers : iterator of Header
Notes
-----
The generator respects the order of the index - to iterate over headers
in a different order, the index attribute can be re-organised.
.. versionadded:: 1.9
"""
source = self.parent.trace
for i in self.index:
yield source[i]

def sort(self, fields):
"""
Sort the traces in the group, obeying the `fields` order of
most-to-least significant word.
"""
# TODO: examples

headers = [dict(self.parent.header[i]) for i in self.index]
index = list(zip(headers, self.index))
# sorting is stable, so sort the whole set by field, applied in the
# reverse order:
for field in reversed(fields):
index.sort(key = lambda x: x[0][field])

# strip off all the headers
index = [i for _, i in index]
self.index = index

class Groups(collections.Mapping):
"""
The Groups implements the dict interface, grouping all traces that match a
given `fingerprint`. The fingerprint is a signature derived from a set of
trace header words, called a `key`.
Consider a file with five traces, and some selected header words:
0: {offset: 1, fldr: 1}
1: {offset: 1, fldr: 2}
2: {offset: 1, fldr: 1}
3: {offset: 2, fldr: 1}
4: {offset: 1, fldr: 2}
With key = (offset, fldr), there are 3 groups:
{offset: 1, fldr: 1 }: [0, 2]
{offset: 1, fldr: 2 }: [1, 4]
{offset: 2, fldr: 1 }: [3]
With a key = offset, there are 2 groups:
{offset: 1}: [0, 1, 2, 4]
{offset: 2}: [3]
The Groups class is intended to easily process files without the rigid
in/crossline structure of iline/xline/gather, but where there is sufficient
structure in the headers. This is common for some types of pre-stack data,
shot gather data etc.
Notes
-----
.. versionadded:: 1.9
"""
# TODO: only group in range of traces?
# TODO: cache header dicts?
def __init__(self, trace, header, key):
bins = collections.OrderedDict()
for i, h in enumerate(header[:]):
k = self.fingerprint(h[key])
if k in bins:
bins[k].append(i)
else:
bins[k] = [i]

self.trace = trace
self.header = header
self.key = key
self.bins = bins

@staticmethod
def normalize_keys(items):
"""
Normalize the key representation to integers, so that they're hashable,
even when a key is built with enumerators.
This function is intended for internal use, and provides the mapping
from accepted key representation to a canonical key.
Parameters
----------
items : iterator of (int_like, array_like)
Returns
-------
items : generator of (int, array_like)
Warnings
--------
This function provides no guarantees for value and type compatibility,
even between minor versions.
Notes
-----
.. versionadded:: 1.9
"""
return ((int(k), v) for k, v in items)

@staticmethod
def fingerprint(key):
"""
Compute a hashable fingerprint for a key. This function is intended for
internal use. Relies on normalize_keys for transforming keys to
canonical form. The output of this function is used for the group ->
index mapping.
Parameters
----------
key : int_like or dict of {int_like: int} or iterable of (int_like,int)
Returns
-------
key
A normalized canonical representation of key
Warnings
--------
This function provides no guarantees for value and type compatibility,
even between minor versions.
Notes
-----
.. versionadded:: 1.9
"""
try:
return int(key)
except TypeError:
pass

try:
items = key.items()
except AttributeError:
items = iter(key)

# map k -> tracefield -> int
items = Groups.normalize_keys(items)
return tuple(sorted(items))

def __len__(self):
"""x.__len__() <==> len(x)"""
return len(self.bins)

def __contains__(self, key):
"""x.__len__() <==> len(x)"""
return self.fingerprint(key) in self.bins

def __getitem__(self, key):
"""g[key]
Read the group associated with key.
Key can be any informal mapping between a header word (TraceField, su
header words, or raw integers) and a value.
Parameters
----------
key
Returns
-------
group : Group
Notes
-----
.. versionadded:: 1.9
Examples
--------
Group on FieldRecord, and get the group FieldRecord == 5:
>>> fr = segyio.TraceField.FieldRecord
>>> records = f.group(fr)
>>> record5 = records[5]
"""
key = self.fingerprint(key)
return Group(key, self, self.bins[key])

def values(self):
for key, index in self.bins.items():
yield Group(key, self, index)

def items(self):
for key, index in self.bins.items():
yield key, Group(key, self, index)

def __iter__(self):
return self.bins.keys()

def sort(self, fields):
"""
Reorganise the indices in all groups by fields
"""
bins = collections.OrderedDict()

for key, index in self.bins.items():
g = Group(key, self, index)
g.sort(fields)
bins[key] = g.index

self.bins = bins
4 changes: 4 additions & 0 deletions python/segyio/segy.py
Original file line number Diff line number Diff line change
Expand Up @@ -964,6 +964,10 @@ def interpret(self, ilines, xlines, offsets=None, sorting=TraceSortingFormat.INL

return self

def group(self, word):
from .gather import Groups
return Groups(self.trace, self.header, word)


class spec(object):
def __init__(self):
Expand Down
Loading

0 comments on commit 7d0ac01

Please sign in to comment.