Skip to content

Instantly share code, notes, and snippets.

@wassname
Last active April 21, 2016 01:45
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save wassname/4bd878e4d24e27a6bbaedfff4a4e7b37 to your computer and use it in GitHub Desktop.
Save wassname/4bd878e4d24e27a6bbaedfff4a4e7b37 to your computer and use it in GitHub Desktop.
Using segypy to read large files by assuming they have a fixed layout
"""
See: https://github.com/sixty-north/segpy/issues/29
This iherits it licence from segpy
This is for segpy commit: 91562fd
"""
import os
import sys
root = os.path.dirname(os.path.dirname(__file__))
segpy_path=os.path.join(root,'segpy')
os.sys.path.append(segpy_path)
import segpy
from segpy import __version__
from segpy.encoding import ASCII
from segpy.packer import make_header_packer
from segpy.trace_header import TraceHeaderRev1
from segpy.util import file_length, filename_from_handle, make_sorted_distinct_sequence, hash_for_file, UNKNOWN_FILENAME
from segpy.datatypes import DATA_SAMPLE_FORMAT_TO_SEG_Y_TYPE, SEG_Y_TYPE_DESCRIPTION, SEG_Y_TYPE_TO_CTYPE, size_in_bytes
from segpy.toolkit import (extract_revision,
bytes_per_sample,
read_binary_reel_header,
read_trace_header,
catalog_traces,
read_binary_values,
REEL_HEADER_NUM_BYTES,
TRACE_HEADER_NUM_BYTES,
TEXTUAL_HEADER_NUM_BYTES,
BINARY_HEADER_NUM_BYTES,
read_textual_reel_header,
read_extended_textual_headers,
guess_textual_header_encoding)
from segpy.reader import SegYReader
from segpy.header import SubFormatMeta
def create_read_writer(fh, encoding=None, trace_header_format=TraceHeaderRev1, endian='>', progress=None, cache_directory=".segpy", fast=False):
"""Create a SegYReadWriter (or one of its subclasses)
This function is the preferred method for creating SegYReadWriter
objects. It reads basic header information and attempts to build
indexes for traces, CDP numbers (for 2D surveys), and inline and
cross line co-ordinates (for 3D surveys) to facilitate subsequent
random-access to traces.
Args:
fh: A file-like-object open in binary mode positioned such
that the beginning of the reel header will be the next
byte to be read. For disk-based SEG Y files, this is the
beginning of the file.
encoding: An optional text encoding for the textual headers. If
None (the default) a heuristic will be used to guess the
header encoding.
trace_header_format: An optional class defining the layout of the
trace header. Defaults to TraceHeaderRev1.
endian: '>' for big-endian data (the standard and default), '<'
for little-endian (non-standard)
progress: A unary callable which will be passed a number
between zero and one indicating the progress made. If
provided, this callback will be invoked at least once with
an argument equal to one.
cache_directory: The directory for the cache file. Relative paths
are interpreted as being relative to the directory containing
the SEG Y file. Absolute paths are used as is. If
cache_directory is None, caching is disabled.
Raises:
ValueError: ``fh`` is unsuitable for some reason, such as not
being open, not being seekable, not being in
binary mode, or being too short.
Returns:
A SegYReadWriter object. Depending on the exact type of the
SegYReadWriter returned different capabilities may be
available. Inspect the returned object to determine these
capabilities, or be prepared for capabilities not defined in
the SegYReadWriter base class to be unavailable. The underlying
file-like object must remain open for the duration of use of
the returned reader object. It is the caller's responsibility
to close the underlying file.
Example:
with open('my_seismic_data.sgy', 'rb') as fh:
reader = create_reader(fh)
print(reader.num_traces())
"""
if hasattr(fh, 'encoding') and fh.encoding is not None:
raise TypeError(
"SegYReader must be provided with a binary mode file object")
if not fh.seekable():
raise TypeError(
"SegYReader must be provided with a seekable file object")
if fh.closed:
raise ValueError(
"SegYReader must be provided with an open file object")
num_file_bytes = file_length(fh)
if num_file_bytes < REEL_HEADER_NUM_BYTES:
raise ValueError(
"SEG Y file {!r} of {} bytes is too short".format(
filename_from_handle(fh),
num_file_bytes))
if endian not in ('<', '>'):
raise ValueError("Unrecognised endian value {!r}".format(endian))
reader = None
cache_file_path = None
if cache_directory is not None:
sha1 = hash_for_file(fh, encoding, trace_header_format, endian)
seg_y_path = filename_from_handle(fh)
cache_file_path = segpy.reader._locate_cache_file(seg_y_path, cache_directory, sha1)
if cache_file_path is not None:
reader = segpy.reader._load_reader_from_cache(cache_file_path, seg_y_path)
if reader is None:
reader = _make_read_writer(fh, encoding, trace_header_format, endian, progress, fast=fast)
if cache_directory is not None:
segpy.reader._save_reader_to_cache(reader, cache_file_path)
return reader
def _make_read_writer(fh, encoding, trace_header_format, endian, progress, fast=False):
"""
Makes a fixed length segy reader. If catalouging fails it attempts a fixed
length catalog.
"""
if encoding is None:
encoding = guess_textual_header_encoding(fh)
if encoding is None:
encoding = ASCII
textual_reel_header = read_textual_reel_header(fh, encoding)
binary_reel_header = read_binary_reel_header(fh, endian)
extended_textual_header = read_extended_textual_headers(fh, binary_reel_header, encoding)
revision = extract_revision(binary_reel_header)
bps = bytes_per_sample(binary_reel_header, revision)
if fast:
try:
trace_offset_catalog, trace_length_catalog, cdp_catalog, line_catalog = catalog_fixed_length_traces(fh, binary_reel_header, trace_header_format,endian, progress)
except Exception as e:
trace_offset_catalog, trace_length_catalog, cdp_catalog, line_catalog = catalog_traces(fh, bps, trace_header_format,endian, progress)
else:
try:
trace_offset_catalog, trace_length_catalog, cdp_catalog, line_catalog = catalog_traces(fh, bps, trace_header_format,endian, progress)
except Exception as e:
print("Error cataloging traces, trying fixed length catalog.")
fh.seek(REEL_HEADER_NUM_BYTES)
trace_offset_catalog, trace_length_catalog, cdp_catalog, line_catalog = catalog_fixed_length_traces(fh, binary_reel_header, trace_header_format,endian, progress)
# This part deviated from segpy
# For the case where it has 2d and 3d attributes, we want it to be 3d in this case
# TODO implement this change to segpy
if line_catalog is not None:
return SegYReadWriter3D(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog,
trace_length_catalog, line_catalog, trace_header_format, encoding, endian)
if cdp_catalog is not None:
return SegYReadWriter2D(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog,
trace_length_catalog, cdp_catalog, trace_header_format, encoding, endian)
return SegYReadWriter(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog,
trace_length_catalog, trace_header_format, encoding, endian)
def catalog_fixed_length_traces(fh, binary_reel_header, trace_header_format=TraceHeaderRev1, endian='>', progress=None):
"""Build catalogs to for a fixed length SEG Y file. This is much faster
than the full catalog, but has limitations. No CDP, or inline, xline
catalogs, and it only works for segy files with fixed legth SEG Y files.
Note:
This function is faster than the full catalog, but has limitations.
No CDP, or inline, xline catalogs, and it only works for SEG Y files
with a fixed number of samples in each trace.
Two catalogs will be built:
1. A catalog mapping trace_samples index (0-based) to the position of that
trace_samples header in the file.
2. A catalog mapping trace_samples index (0-based) to the number of
samples in that trace_samples.
Args:
fh: A file-like-object open in binary mode, positioned at the
start of the first trace_samples header.
bps: The number of bytes per sample, such as obtained by a call
to bytes_per_sample()
trace_header_format: The class defining the trace header format.
Defaults to TraceHeaderRev1.
endian: '>' for big-endian data (the standard and default), '<'
for little-endian (non-standard)
progress: A unary callable which will be passed a number
between zero and one indicating the progress made. If
provided, this callback will be invoked at least once with
an argument equal to 1
Returns:
A 4-tuple of the form (trace_samples-offset-catalog,
trace_samples-length-catalog,
None,
None)` where
each catalog is an instance of ``collections.Mapping`` or None
if no catalog could be built.
"""
revision = extract_revision(binary_reel_header)
bps = bytes_per_sample(binary_reel_header, revision)
progress_callback = progress if progress is not None else lambda p: None
if not callable(progress_callback):
raise TypeError("catalog_traces(): progress callback must be callable")
class CatalogSubFormat(metaclass=SubFormatMeta,
parent_format=trace_header_format,
parent_field_names=(
'file_sequence_num',
'ensemble_num',
'num_samples',
'inline_number',
'crossline_number',
)):
pass
num_file_bytes = file_length(fh)
num_samples=binary_reel_header.num_samples
num_traces_float = (num_file_bytes-REEL_HEADER_NUM_BYTES)/(TRACE_HEADER_NUM_BYTES+num_samples*bps)
num_traces = int(num_traces_float)
if num_traces != num_traces_float:
raise ValueError(
"SEG Y file {!r} of {} bytes is not consistent with a fixed trace length".format(
filename_from_handle(fh),
num_file_bytes))
trace_offset_catalog_builder = segpy.catalog.CatalogBuilder()
trace_length_catalog_builder = segpy.catalog.CatalogBuilder()
for trace_index in range(num_traces):
pos_begin=REEL_HEADER_NUM_BYTES+(num_samples * bps+TRACE_HEADER_NUM_BYTES) * trace_index
trace_length_catalog_builder.add(trace_index, num_samples)
trace_offset_catalog_builder.add(trace_index, pos_begin)
trace_offset_catalog = trace_offset_catalog_builder.create()
trace_length_catalog = trace_length_catalog_builder.create()
progress_callback(1)
return (trace_offset_catalog,
trace_length_catalog,
None,
None)
class SegYReadWriter(SegYReader):
"""
Mixin that extends SegyReader with Writing capabilities
"""
def trace_position(self,trace_index):
if not (0 <= trace_index < self.num_traces()):
raise ValueError("Trace index out of range.")
return self._trace_offset_catalog[trace_index]
def write_trace_header(self,trace_index,trace_header,force=True):
"""
Write a trace header in place
"""
pos=self.trace_position(trace_index)
try:
read_trace_header(self._fh, self._trace_header_packer, pos=pos)
except Exception as e:
print ("Could not read a trace header from trace_index={}, skipping writting. Pass force=False to force".format(trace_index))
else:
segpy.toolkit.write_trace_header(self._fh,trace_header,self._trace_header_packer,pos)
def write_binary_reel_header(self,binary_reel_header):
self._fh.seek(REEL_HEADER_NUM_BYTES)
segpy.toolkit.write_binary_reel_header(self._fh, binary_reel_header, self.endian)
def write_textual_reel_header(self,textual_reel_header):
self._fh.seek(0)
segpy.toolkit.write_textual_reel_header(self._fh, textual_reel_header, self.encoding)
def write_extended_textual_headers(self,extended_textual_header):
self._fh.seek(TEXTUAL_HEADER_NUM_BYTES)
segpy.toolkit.write_extended_textual_headers(self._fh, extended_textual_header, self.encoding)
def write_trace_samples(self,trace_index,samples):
num_samples_in_trace = self.num_trace_samples(trace_index)
start_pos = (self.trace_position(trace_index) + TRACE_HEADER_NUM_BYTES)
if not num_samples_in_trace==len(samples):
raise ValueError(
"Length of samples {} does not fit in trace size {}".format(len(samples),num_samples_in_trace))
segpy.toolkit.write_trace_samples(self._fh, samples, self.data_sample_format, pos=start_pos, endian='>')
class SegYReadWriter2D(segpy.reader.SegYReader2D,SegYReadWriter):
pass
class SegYReadWriter3D(segpy.reader.SegYReader3D,SegYReadWriter):
pass
def main(argv=None):
import sys
if argv is None:
argv = sys.argv[1:]
class ProgressBar(object):
def __init__(self, num_chars, character='.'):
self._num_chars = num_chars
self._character = character
self._ratchet = 0
def __call__(self, proportion):
existing = self._num_marks(self._ratchet)
required = self._num_marks(proportion)
print(self._character * (required - existing), end='')
self._ratchet = proportion
def _num_marks(self, p):
return int(round(p * self._num_chars))
filename = argv[0]
with open(filename, 'r+b') as segy_file:
segy_reader = create_read_writer(segy_file, progress=ProgressBar(30))
trace_header = segy_reader.trace_header(0)
trace_header.shotpoint_scalar=trace_header.shotpoint_scalar
segy_reader.write_trace_header(0,trace_header)
binary_reel_header = segy_reader.binary_reel_header
binary_reel_header.num_samples=binary_reel_header.num_samples
segy_reader.write_binary_reel_header(binary_reel_header)
trace_samples=segy_reader.trace_samples(0)
segy_reader.write_trace_samples(0,trace_samples)
with open(filename, 'rb') as segy_file:
segy_reader = create_read_writer(segy_file, progress=ProgressBar(30))
print()
print("Filename: ", segy_reader.filename)
print("SEG Y revision: ", segy_reader.revision)
print("Number of traces: ", segy_reader.num_traces())
print("Data format: ",
segy_reader.data_sample_format_description)
print("Dimensionality: ", segy_reader.dimensionality)
try:
print("Number of CDPs: ", segy_reader.num_cdps())
except AttributeError:
pass
try:
print("Number of inlines: ", segy_reader.num_inlines())
print("Number of crosslines: ", segy_reader.num_xlines())
except AttributeError:
pass
print("=== BEGIN TEXTUAL REEL HEADER ===")
for line in segy_reader.textual_reel_header:
print(line[3:])
print("=== END TEXTUAL REEL HEADER ===")
print()
print("=== BEGIN EXTENDED TEXTUAL HEADER ===")
print(segy_reader.extended_textual_header)
print("=== END EXTENDED TEXTUAL_HEADER ===")
print("=== BEGIN BINARY REEL HEADER ===")
b=segy_reader.binary_reel_header
print('\n'.join(['{:33.33s} {:10}'.format(k, getattr(b, k)) for k in b.ordered_field_names()]))
print("=== END BINARY REEL HEADER ===")
print("=== BEGIN FIRST TRACE HEADER ===")
b=segy_reader.trace_header(0)
print('\n'.join(['{:33.33s} {:10}'.format(k, getattr(b, k)) for k in b.ordered_field_names()]))
print("=== END FIRST TRACE HEADER ===")
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment