Skip to content

Instantly share code, notes, and snippets.

@khomman
Created June 5, 2019 16:06
Show Gist options
  • Save khomman/9de4d16f47cb60423c29366e87684e0a to your computer and use it in GitHub Desktop.
Save khomman/9de4d16f47cb60423c29366e87684e0a to your computer and use it in GitHub Desktop.
from random import shuffle
import numpy as np
from obspy import read,Stream,Trace, read_inventory,UTCDateTime
from obspy.taup import TauPyModel
from obspy.signal.filter import envelope
from obspy.core.util import AttribDict
from pyrtt import xcorr, util
_HEADERS = {'sac': ('stla','stlo','stel','evla','evlo',
'evdp','mag','o','a',
't0','gcarc','baz','dist')}
_STACKHEADERS = {}
def read_event(pathname_or_url=None, format=None, **kwargs):
"""
Read waveforms for an event into an EvStream object.
See :func:`~obspy.core.stream.read` in ObsPy.
"""
#if pathname_or_url is None:
#Set up to use Example file...pkg_resources?
#fname =
stream = read(pathname_or_url,**kwargs)
return EvStream(stream,**kwargs)
class EvStream(Stream):
"""
Extension of Obspy Stream object for calculating relative arrival
times.
"""
def __init__(self, traces=None, phase='P',model='ak135',inventory=None,
pick_arrivals=True):
"""
Creates the EvStream object based on an obspy stream object. Reads
anything Obspy can read and adds methods specific to automatic
relative arrival time picking. Methods based on Kolstrup (2015) and
the iterative cross correlation algorithm, Lou (2013).
:param traces:
:param phase: Phase arrival of interest.
:param model: Model to use to calculate theoretical arrival times
(iasp91, ak135, etc). If model is a valid obspy TauPyModel string
it will build the model. Otherwise, it is expecting a TauPyModel
object
:param inventory: Obpsy inventory object. REQUIRED FOR FILES OTHER
THAN SAC FORMAT
:param pick_arrivals: Add theoretical travel time arrivals to Trace
headers for selected phase.
"""
self.info = AttribDict()
self.info.phase = phase
self.traces = []
if isinstance(model,TauPyModel):
self.model = model
else:
self.model = TauPyModel(model)
if inventory:
self.inv = read_inventory(inventory)
if isinstance(traces,Trace):
traces = [traces]
if traces:
for tr in traces:
if not isinstance(tr,EvTrace):
tr = EvTrace(trace=tr)
# FIX HEADERS FOR CALCULATION when not SAC
# READ FROM PROVIDED
# CAT OBJECT (quakeML)?
#tr.stats['gcarc'] = tr.stats.sac['gcarc']
#tr.stats['evdp'] = tr.stats.sac['evdp']
#tr.stats['origintime'] = tr.stats.starttime
if pick_arrivals:
self._pick_theoretical_times(tr)
if tr.data.any(): #Blocks flatline traces
self.traces.append(tr)
def _pick_theoretical_times(self,tr,arrival_header='t0'):
"""
Picks the theoretical phase arrival time base on EvStream.model
Uses ObsPy.taup's get_travel_times to calculate the phase arrival
based on a global model. Calculates first arrival so it's important
to be sure of the phase inputs you provide.
"""
try:
dist = tr.stats['gcarc']
depth = tr.stats['evdp']
origin_time = tr.stats['origintime']
start_time = tr.stats['starttime']
arrivals = self.model.get_travel_times(source_depth_in_km=depth,
distance_in_degree=dist,phase_list=self.info.phase)
tmarker = origin_time + arrivals[0].time
tr.stats[arrival_header] = tmarker
except KeyError as e:
print('Trace {} is missing header information: {}'.format(tr,e))
print('Trace {} not added to Stream!'.format(tr.id))
return
def stack(self,kind='mean',header=True,window=None,arrival_header=None):
"""
Stacks all data in stream
:type kind: str
:param kind: Type of stack to create. Either mean, weighted, or
normalized_weighted
:type header: Bool
:param header: Stores the result of calculation in stream.info
:type window: tuple
:param window: Tuple containing the time to cut around the phase arrival
(5,5) is 5 seconds before and 5 seconds after arrival time
:type arrival_header: str
:param arrival_header: Header value containg the phase arrival time to cut
"""
stack = util.stack(self,kind=kind,window=window,arrival_header=arrival_header)
if header:
self.info.stack = stack.data
self.info.stack_kind = kind
self.info.max_stack = np.amax(stack.data)
self.info.stack_envelope = envelope(stack.data)
return stack
def envelope_rtt(self):
"""
Calculates the envelope function and the envelope max for each trace
and stores it in trace.stats.envelope and trace.stats.envmax
"""
for tr in self:
try:
data_env = envelope(tr.data)
max_env = np.amax(data_env)
tr.stats['envelope'] = data_env
tr.stats['envmax'] = max_env
except:
print(f'Envelope Calculation failed for: {tr.id}')
self.remove(tr)
def filter_rtt(self,freqmin,freqmax):
"""
Bandpass filters the stream for travel time picking.
:param freqmin: minimum frequency for bandpass filter
:param freqmax: maximum frequency for bandpass filter.
"""
for tr in self:
tr.stats['freqmin'] = freqmin
tr.stats['freqmax'] = freqmax
tr.filter('bandpass',freqmin=freqmin,freqmax=freqmax)
def trim_rtt(self,before,after,arrival_header='t0'):
for tr in self:
try:
####If t0 is bad..or clock is bad etc and start or end is
## past file time then trim sets the trace to 0 length
# Raise error? PickError- t0 not in range?
# if t0 in file time keep file if out remove from stream?
## Add Checks? but why is t0 so bad?! 201101..SSPA and PO Network
start_cut = tr.stats[arrival_header] - before
end_cut = tr.stats[arrival_header] + after
except KeyError as e:
print(f'Trace {tr.id} is missing header information')
print('Removing from stream')
self.remove(tr)
continue
if len(tr) == 0:
print(f'Arrival is out of range for {tr.id}')
self.remove(tr)
continue
if 'envelope' in tr.stats:
# make envelope a trace in envelope_rtt?
env_tr = Trace(data=tr.stats['envelope'],header=tr.stats)
env_tr.trim(starttime=start_cut, endtime=end_cut,
nearest_sample=False)
tr.trim(starttime=start_cut, endtime=end_cut,
nearest_sample=False)
try:
tr.stats['envmax'] = np.amax(env_tr.data)
except ValueError as e:
print(f"trim_rtt: {tr.id}\n\t{e}\nRemoving from stream")
self.remove(tr)
tr.stats['envelope'] = env_tr.data #Keep as a Trace?
else:
tr.trim(starttime=start_cut,endtime=end_cut,
nearest_sample=False)
def envelope_rejection(self,rejection_value):
"""
Removes trace from stream if the maximum of the envelope is
greater than rejection_value * median of the maximum envelopes in
stream.
"""
median_env = np.median([tr.stats.envmax for tr in self])
for tr in self.traces:
if tr.stats['envmax'] > (rejection_value * median_env):
print(f"envelope_rejection: Removing {tr.id} from stream!")
self.remove(tr)
def iter_env_rejection(self,rejection_threshold):
"""
Removes trace from stream based on the maximum difference of the
trace envelope and the stream stack's envelope
"""
self.stack()
newst = None
shuffle(self.traces)
for tr in self:
tr_env = tr.stats['envelope']
st_env = self.info['stack_envelope']
diff = np.abs(tr_env - st_env)/np.amax(st_env)
tr.stats['diff_env'] = diff
max_diff = np.amax(diff)
if max_diff > rejection_threshold:
print(f"Rejecting {tr.id} with a difference of {max_diff}")
self.remove(tr)
newst = 'failed'
break
if newst != None:
self.iter_env_rejection(rejection_threshold)
self.sort()
return
def _shuffle(self):
# Not necessary?
shuffle(self.traces)
def snr(self,stack=True,trace=True):
if stack:
stack_env_max = np.amax(self.info.stack_envelope)
stack_env_mean = np.mean(self.info.stack_envelope)
self.info.stream_snr = stack_env_max/stack_env_mean
if trace:
for tr in self:
tr.snr()
def iccs(self,iterations=3,min_trace=5,cc_cutoff=0.8,window=None,
input_header='t0',output_header='t1',convergence_cutoff=0.001):
it = 0
convergence = 1.0
if window is not None:
tmin,tmax = window
while it < iterations and convergence > convergence_cutoff:
if len(self) < min_trace:
break
if it == 0:
stack = self.stack(kind='normalized_weighted',header=False,
window=window,arrival_header=input_header)
for tr in self:
if window is not None:
tr_data = tr.window_around_arrival(tmin,tmax,arrival=input_header)
else:
tr_data = tr.data
delay,max_cc,pol_cc = xcorr.xcorr_fast(stack.data,tr_data)
coh = util.coherence(tr_data,stack)
#temporarily fix 0 trace problem
if max_cc < cc_cutoff and len(self)>min_trace:
self.remove(tr)
print(f'Removed {tr.id}. CC: {max_cc}')
else:
tr.stats.delay = delay
tr.stats.max_cc = max_cc
tr.stats[output_header] = tr.stats[input_header] + delay*tr.stats.delta
#print(delay,max_cc, tr.stats.t0, delay/tr.stats.sampling_rate)
#This causes value error when all of stream gets removed!
newstack = self.stack(kind='normalized_weighted',header=False,
window=window,arrival_header=output_header)
convergence = util.convergence(newstack,stack)
else:
stack = self.stack(kind='normalized_weighted',header=False,
window=window,arrival_header=output_header)
for tr in self:
if window is not None:
tr_data = tr.window_around_arrival(tmin,tmax,arrival=output_header)
else:
tr_data = tr.data
delay,max_cc,pol_cc = xcorr.xcorr_fast(stack.data,tr_data)
#Temporarily fix 0 trace problem
if max_cc < cc_cutoff and len(self)>min_trace:
self.remove(tr)
print(f'Removed {tr.id}. CC: {max_cc}')
else:
tr.stats.delay = delay
tr.stats.max_cc = max_cc
tr.stats[output_header] = tr.stats[output_header] + delay*tr.stats.delta
newstack = self.stack(kind='normalized_weighted',header=False,
window=window,arrival_header=output_header)
convergence = util.convergence(newstack,stack)
it += 1
return
def mccs(self,window=None,input_header='t2',output_header='t3'):
return
def plot_rtt(self,window=None,fig=None):
if fig:
fig = fig
def clean_duplicates(self):
bhz = []
hhz = []
dups = []
for tr in self:
chan = tr.id[-3:]
if chan == 'BHZ':
bhz.append(tr.id)
else:
hhz.append(tr.id)
for i in hhz:
hid = i
hid = hid[:-3] + 'BHZ'
#print(hid)
if hid in bhz:
print(f'Duplicate: Will remove {hid}')
dups.append(hid)
for tr in self:
if tr.id in dups:
self.remove(tr)
### TEMP METHOD TO PROCEED WITH MCCC AFTER ONLY DOING ICCS
### USED UNTIL MCCC IS IMPLEMENTED WITHIN THIS PACKAGE
def write_sac_for_mccc(self,path='',name='',arrival_header='t1'):
self.trim_rtt(1,2,arrival_header='t1')
for tr in self:
amarker = tr.stats.t1 - tr.stats.origintime
evnm = str(tr.stats.origintime)
tr.stats.sac['kevnm'] = evnm[:16].replace(':','').replace('-','')
tr.stats.sac['a'] = amarker
tr.write(f'{path}{tr.id}_{name}_mccc_inp.sac',format='SAC')
class EvTrace(Trace):
"""
Extension of Obspy Stream object for calculating relative arrival
times.
"""
def __init__(self, data=np.array([]),header=None,trace=None):
if header is None:
header = {}
if trace is not None:
data = trace.data
header = trace.stats
super(EvTrace,self).__init__(data=data,header=header)
stats = self.stats
if ('_format' in stats and stats._format.upper()=='SAC'):
form = 'sac'
for head in _HEADERS[form]:
try:
stats[head] = stats['sac'][head]
except KeyError:
pass
stats['origintime'] = stats['starttime'] + stats['o']
stats['weight'] = 1.0
#elif ('_format' in st and st._format.upper()=='MSEED'):
elif ('_format' in stats and stats._format.upper() != 'SAC'):
#Add Headers to Miniseed from staxml and quakeml
sta = stats.station
net = stats.network
chan = stats.channel
loc = stats.location
#try:
coords = self.inv.get_coordinates("{}.{}.{}.{}".format(net,sta,
loc,chan))
#except:
#print(e)
# print("Inventory file not found")
pass
else:
print("File type not supported")
def window_around_arrival(self,window,arrival='t0'):
'''
Windows a trace around an arrival time.
:type before: int
:param before: Seconds before the arrival
:type after: int
:param after: Seconds after the arrival
:type arrival: str, optional
:param arrival: Header, tr.stats.arrival, to cut around
:return: A Numpy ndarray
'''
####
#### SET UP TO PAD IF WINDOW IS OUT OF TIME RANGE
####
before,after = window
arrival_time = self.stats[arrival]
starttime = arrival_time - before
endtime = arrival_time + after
return self.slice(starttime=starttime,endtime=endtime,
nearest_sample=False).data
def snr(self):
'''
Computes the signal to noise ratio of the trace
'''
self.stats.snr = np.amax(self.data)/np.mean(self.data)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment