Last active
April 21, 2016 01:45
-
-
Save wassname/4bd878e4d24e27a6bbaedfff4a4e7b37 to your computer and use it in GitHub Desktop.
Using segypy to read large files by assuming they have a fixed layout
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
See: https://github.com/sixty-north/segpy/issues/29 | |
This iherits it licence from segpy | |
This is for segpy commit: 91562fd | |
""" | |
import os | |
import sys | |
root = os.path.dirname(os.path.dirname(__file__)) | |
segpy_path=os.path.join(root,'segpy') | |
os.sys.path.append(segpy_path) | |
import segpy | |
from segpy import __version__ | |
from segpy.encoding import ASCII | |
from segpy.packer import make_header_packer | |
from segpy.trace_header import TraceHeaderRev1 | |
from segpy.util import file_length, filename_from_handle, make_sorted_distinct_sequence, hash_for_file, UNKNOWN_FILENAME | |
from segpy.datatypes import DATA_SAMPLE_FORMAT_TO_SEG_Y_TYPE, SEG_Y_TYPE_DESCRIPTION, SEG_Y_TYPE_TO_CTYPE, size_in_bytes | |
from segpy.toolkit import (extract_revision, | |
bytes_per_sample, | |
read_binary_reel_header, | |
read_trace_header, | |
catalog_traces, | |
read_binary_values, | |
REEL_HEADER_NUM_BYTES, | |
TRACE_HEADER_NUM_BYTES, | |
TEXTUAL_HEADER_NUM_BYTES, | |
BINARY_HEADER_NUM_BYTES, | |
read_textual_reel_header, | |
read_extended_textual_headers, | |
guess_textual_header_encoding) | |
from segpy.reader import SegYReader | |
from segpy.header import SubFormatMeta | |
def create_read_writer(fh, encoding=None, trace_header_format=TraceHeaderRev1, endian='>', progress=None, cache_directory=".segpy", fast=False): | |
"""Create a SegYReadWriter (or one of its subclasses) | |
This function is the preferred method for creating SegYReadWriter | |
objects. It reads basic header information and attempts to build | |
indexes for traces, CDP numbers (for 2D surveys), and inline and | |
cross line co-ordinates (for 3D surveys) to facilitate subsequent | |
random-access to traces. | |
Args: | |
fh: A file-like-object open in binary mode positioned such | |
that the beginning of the reel header will be the next | |
byte to be read. For disk-based SEG Y files, this is the | |
beginning of the file. | |
encoding: An optional text encoding for the textual headers. If | |
None (the default) a heuristic will be used to guess the | |
header encoding. | |
trace_header_format: An optional class defining the layout of the | |
trace header. Defaults to TraceHeaderRev1. | |
endian: '>' for big-endian data (the standard and default), '<' | |
for little-endian (non-standard) | |
progress: A unary callable which will be passed a number | |
between zero and one indicating the progress made. If | |
provided, this callback will be invoked at least once with | |
an argument equal to one. | |
cache_directory: The directory for the cache file. Relative paths | |
are interpreted as being relative to the directory containing | |
the SEG Y file. Absolute paths are used as is. If | |
cache_directory is None, caching is disabled. | |
Raises: | |
ValueError: ``fh`` is unsuitable for some reason, such as not | |
being open, not being seekable, not being in | |
binary mode, or being too short. | |
Returns: | |
A SegYReadWriter object. Depending on the exact type of the | |
SegYReadWriter returned different capabilities may be | |
available. Inspect the returned object to determine these | |
capabilities, or be prepared for capabilities not defined in | |
the SegYReadWriter base class to be unavailable. The underlying | |
file-like object must remain open for the duration of use of | |
the returned reader object. It is the caller's responsibility | |
to close the underlying file. | |
Example: | |
with open('my_seismic_data.sgy', 'rb') as fh: | |
reader = create_reader(fh) | |
print(reader.num_traces()) | |
""" | |
if hasattr(fh, 'encoding') and fh.encoding is not None: | |
raise TypeError( | |
"SegYReader must be provided with a binary mode file object") | |
if not fh.seekable(): | |
raise TypeError( | |
"SegYReader must be provided with a seekable file object") | |
if fh.closed: | |
raise ValueError( | |
"SegYReader must be provided with an open file object") | |
num_file_bytes = file_length(fh) | |
if num_file_bytes < REEL_HEADER_NUM_BYTES: | |
raise ValueError( | |
"SEG Y file {!r} of {} bytes is too short".format( | |
filename_from_handle(fh), | |
num_file_bytes)) | |
if endian not in ('<', '>'): | |
raise ValueError("Unrecognised endian value {!r}".format(endian)) | |
reader = None | |
cache_file_path = None | |
if cache_directory is not None: | |
sha1 = hash_for_file(fh, encoding, trace_header_format, endian) | |
seg_y_path = filename_from_handle(fh) | |
cache_file_path = segpy.reader._locate_cache_file(seg_y_path, cache_directory, sha1) | |
if cache_file_path is not None: | |
reader = segpy.reader._load_reader_from_cache(cache_file_path, seg_y_path) | |
if reader is None: | |
reader = _make_read_writer(fh, encoding, trace_header_format, endian, progress, fast=fast) | |
if cache_directory is not None: | |
segpy.reader._save_reader_to_cache(reader, cache_file_path) | |
return reader | |
def _make_read_writer(fh, encoding, trace_header_format, endian, progress, fast=False): | |
""" | |
Makes a fixed length segy reader. If catalouging fails it attempts a fixed | |
length catalog. | |
""" | |
if encoding is None: | |
encoding = guess_textual_header_encoding(fh) | |
if encoding is None: | |
encoding = ASCII | |
textual_reel_header = read_textual_reel_header(fh, encoding) | |
binary_reel_header = read_binary_reel_header(fh, endian) | |
extended_textual_header = read_extended_textual_headers(fh, binary_reel_header, encoding) | |
revision = extract_revision(binary_reel_header) | |
bps = bytes_per_sample(binary_reel_header, revision) | |
if fast: | |
try: | |
trace_offset_catalog, trace_length_catalog, cdp_catalog, line_catalog = catalog_fixed_length_traces(fh, binary_reel_header, trace_header_format,endian, progress) | |
except Exception as e: | |
trace_offset_catalog, trace_length_catalog, cdp_catalog, line_catalog = catalog_traces(fh, bps, trace_header_format,endian, progress) | |
else: | |
try: | |
trace_offset_catalog, trace_length_catalog, cdp_catalog, line_catalog = catalog_traces(fh, bps, trace_header_format,endian, progress) | |
except Exception as e: | |
print("Error cataloging traces, trying fixed length catalog.") | |
fh.seek(REEL_HEADER_NUM_BYTES) | |
trace_offset_catalog, trace_length_catalog, cdp_catalog, line_catalog = catalog_fixed_length_traces(fh, binary_reel_header, trace_header_format,endian, progress) | |
# This part deviated from segpy | |
# For the case where it has 2d and 3d attributes, we want it to be 3d in this case | |
# TODO implement this change to segpy | |
if line_catalog is not None: | |
return SegYReadWriter3D(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog, | |
trace_length_catalog, line_catalog, trace_header_format, encoding, endian) | |
if cdp_catalog is not None: | |
return SegYReadWriter2D(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog, | |
trace_length_catalog, cdp_catalog, trace_header_format, encoding, endian) | |
return SegYReadWriter(fh, textual_reel_header, binary_reel_header, extended_textual_header, trace_offset_catalog, | |
trace_length_catalog, trace_header_format, encoding, endian) | |
def catalog_fixed_length_traces(fh, binary_reel_header, trace_header_format=TraceHeaderRev1, endian='>', progress=None): | |
"""Build catalogs to for a fixed length SEG Y file. This is much faster | |
than the full catalog, but has limitations. No CDP, or inline, xline | |
catalogs, and it only works for segy files with fixed legth SEG Y files. | |
Note: | |
This function is faster than the full catalog, but has limitations. | |
No CDP, or inline, xline catalogs, and it only works for SEG Y files | |
with a fixed number of samples in each trace. | |
Two catalogs will be built: | |
1. A catalog mapping trace_samples index (0-based) to the position of that | |
trace_samples header in the file. | |
2. A catalog mapping trace_samples index (0-based) to the number of | |
samples in that trace_samples. | |
Args: | |
fh: A file-like-object open in binary mode, positioned at the | |
start of the first trace_samples header. | |
bps: The number of bytes per sample, such as obtained by a call | |
to bytes_per_sample() | |
trace_header_format: The class defining the trace header format. | |
Defaults to TraceHeaderRev1. | |
endian: '>' for big-endian data (the standard and default), '<' | |
for little-endian (non-standard) | |
progress: A unary callable which will be passed a number | |
between zero and one indicating the progress made. If | |
provided, this callback will be invoked at least once with | |
an argument equal to 1 | |
Returns: | |
A 4-tuple of the form (trace_samples-offset-catalog, | |
trace_samples-length-catalog, | |
None, | |
None)` where | |
each catalog is an instance of ``collections.Mapping`` or None | |
if no catalog could be built. | |
""" | |
revision = extract_revision(binary_reel_header) | |
bps = bytes_per_sample(binary_reel_header, revision) | |
progress_callback = progress if progress is not None else lambda p: None | |
if not callable(progress_callback): | |
raise TypeError("catalog_traces(): progress callback must be callable") | |
class CatalogSubFormat(metaclass=SubFormatMeta, | |
parent_format=trace_header_format, | |
parent_field_names=( | |
'file_sequence_num', | |
'ensemble_num', | |
'num_samples', | |
'inline_number', | |
'crossline_number', | |
)): | |
pass | |
num_file_bytes = file_length(fh) | |
num_samples=binary_reel_header.num_samples | |
num_traces_float = (num_file_bytes-REEL_HEADER_NUM_BYTES)/(TRACE_HEADER_NUM_BYTES+num_samples*bps) | |
num_traces = int(num_traces_float) | |
if num_traces != num_traces_float: | |
raise ValueError( | |
"SEG Y file {!r} of {} bytes is not consistent with a fixed trace length".format( | |
filename_from_handle(fh), | |
num_file_bytes)) | |
trace_offset_catalog_builder = segpy.catalog.CatalogBuilder() | |
trace_length_catalog_builder = segpy.catalog.CatalogBuilder() | |
for trace_index in range(num_traces): | |
pos_begin=REEL_HEADER_NUM_BYTES+(num_samples * bps+TRACE_HEADER_NUM_BYTES) * trace_index | |
trace_length_catalog_builder.add(trace_index, num_samples) | |
trace_offset_catalog_builder.add(trace_index, pos_begin) | |
trace_offset_catalog = trace_offset_catalog_builder.create() | |
trace_length_catalog = trace_length_catalog_builder.create() | |
progress_callback(1) | |
return (trace_offset_catalog, | |
trace_length_catalog, | |
None, | |
None) | |
class SegYReadWriter(SegYReader): | |
""" | |
Mixin that extends SegyReader with Writing capabilities | |
""" | |
def trace_position(self,trace_index): | |
if not (0 <= trace_index < self.num_traces()): | |
raise ValueError("Trace index out of range.") | |
return self._trace_offset_catalog[trace_index] | |
def write_trace_header(self,trace_index,trace_header,force=True): | |
""" | |
Write a trace header in place | |
""" | |
pos=self.trace_position(trace_index) | |
try: | |
read_trace_header(self._fh, self._trace_header_packer, pos=pos) | |
except Exception as e: | |
print ("Could not read a trace header from trace_index={}, skipping writting. Pass force=False to force".format(trace_index)) | |
else: | |
segpy.toolkit.write_trace_header(self._fh,trace_header,self._trace_header_packer,pos) | |
def write_binary_reel_header(self,binary_reel_header): | |
self._fh.seek(REEL_HEADER_NUM_BYTES) | |
segpy.toolkit.write_binary_reel_header(self._fh, binary_reel_header, self.endian) | |
def write_textual_reel_header(self,textual_reel_header): | |
self._fh.seek(0) | |
segpy.toolkit.write_textual_reel_header(self._fh, textual_reel_header, self.encoding) | |
def write_extended_textual_headers(self,extended_textual_header): | |
self._fh.seek(TEXTUAL_HEADER_NUM_BYTES) | |
segpy.toolkit.write_extended_textual_headers(self._fh, extended_textual_header, self.encoding) | |
def write_trace_samples(self,trace_index,samples): | |
num_samples_in_trace = self.num_trace_samples(trace_index) | |
start_pos = (self.trace_position(trace_index) + TRACE_HEADER_NUM_BYTES) | |
if not num_samples_in_trace==len(samples): | |
raise ValueError( | |
"Length of samples {} does not fit in trace size {}".format(len(samples),num_samples_in_trace)) | |
segpy.toolkit.write_trace_samples(self._fh, samples, self.data_sample_format, pos=start_pos, endian='>') | |
class SegYReadWriter2D(segpy.reader.SegYReader2D,SegYReadWriter): | |
pass | |
class SegYReadWriter3D(segpy.reader.SegYReader3D,SegYReadWriter): | |
pass | |
def main(argv=None): | |
import sys | |
if argv is None: | |
argv = sys.argv[1:] | |
class ProgressBar(object): | |
def __init__(self, num_chars, character='.'): | |
self._num_chars = num_chars | |
self._character = character | |
self._ratchet = 0 | |
def __call__(self, proportion): | |
existing = self._num_marks(self._ratchet) | |
required = self._num_marks(proportion) | |
print(self._character * (required - existing), end='') | |
self._ratchet = proportion | |
def _num_marks(self, p): | |
return int(round(p * self._num_chars)) | |
filename = argv[0] | |
with open(filename, 'r+b') as segy_file: | |
segy_reader = create_read_writer(segy_file, progress=ProgressBar(30)) | |
trace_header = segy_reader.trace_header(0) | |
trace_header.shotpoint_scalar=trace_header.shotpoint_scalar | |
segy_reader.write_trace_header(0,trace_header) | |
binary_reel_header = segy_reader.binary_reel_header | |
binary_reel_header.num_samples=binary_reel_header.num_samples | |
segy_reader.write_binary_reel_header(binary_reel_header) | |
trace_samples=segy_reader.trace_samples(0) | |
segy_reader.write_trace_samples(0,trace_samples) | |
with open(filename, 'rb') as segy_file: | |
segy_reader = create_read_writer(segy_file, progress=ProgressBar(30)) | |
print() | |
print("Filename: ", segy_reader.filename) | |
print("SEG Y revision: ", segy_reader.revision) | |
print("Number of traces: ", segy_reader.num_traces()) | |
print("Data format: ", | |
segy_reader.data_sample_format_description) | |
print("Dimensionality: ", segy_reader.dimensionality) | |
try: | |
print("Number of CDPs: ", segy_reader.num_cdps()) | |
except AttributeError: | |
pass | |
try: | |
print("Number of inlines: ", segy_reader.num_inlines()) | |
print("Number of crosslines: ", segy_reader.num_xlines()) | |
except AttributeError: | |
pass | |
print("=== BEGIN TEXTUAL REEL HEADER ===") | |
for line in segy_reader.textual_reel_header: | |
print(line[3:]) | |
print("=== END TEXTUAL REEL HEADER ===") | |
print() | |
print("=== BEGIN EXTENDED TEXTUAL HEADER ===") | |
print(segy_reader.extended_textual_header) | |
print("=== END EXTENDED TEXTUAL_HEADER ===") | |
print("=== BEGIN BINARY REEL HEADER ===") | |
b=segy_reader.binary_reel_header | |
print('\n'.join(['{:33.33s} {:10}'.format(k, getattr(b, k)) for k in b.ordered_field_names()])) | |
print("=== END BINARY REEL HEADER ===") | |
print("=== BEGIN FIRST TRACE HEADER ===") | |
b=segy_reader.trace_header(0) | |
print('\n'.join(['{:33.33s} {:10}'.format(k, getattr(b, k)) for k in b.ordered_field_names()])) | |
print("=== END FIRST TRACE HEADER ===") | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment