Skip to content

Instantly share code, notes, and snippets.

@adcoh
Last active November 16, 2020 15:24
Show Gist options
  • Save adcoh/fe70ba8a9631f17d9510c15de1cdbcfb to your computer and use it in GitHub Desktop.
Save adcoh/fe70ba8a9631f17d9510c15de1cdbcfb to your computer and use it in GitHub Desktop.
the create_track_objects method
def create_track_objects(self):
df = self.data_df
df = self.split_train_val_as_pd(data=df, ratio=self.config.get('valratio', 6))
df.sort_values(by=['track_id', 'segment_id'], inplace=True)
df.replace({'animal': 0, 'human': 1}, inplace=True)
df['target_type'] = df['target_type'].astype(int)
# validating that each track consists of segments with same values in following columns
columns_to_check = ['geolocation_type', 'geolocation_id', 'sensor_id', 'snr_type', 'date_index', 'target_type']
# creating boolean matrix for np.select
conditions = [(df.groupby('track_id')[col].shift(0) == df.groupby('track_id')[col].shift(1).bfill())
for col in columns_to_check]
df['usable'] = np.select(conditions, conditions, default=False)
df.loc[df['is_validation'], 'usable'] = False
df.loc[df['is_validation'].shift(1).fillna(False), 'usable'] = False
df['usable'] = ~df['usable']
# save validation segments to object and drop from current DF
val_df = df.loc[df.is_validation].copy().set_index(['track_id', 'segment_id'])
df = df.loc[~df.is_validation].copy()
# Creating a subtrack id for grouping into contiguous segments
df['subtrack_id'] = df.groupby('track_id').usable.cumsum()
# create track-level dataframes
df_tracks = df.groupby(['track_id', 'subtrack_id']).agg(
target_type=pd.NamedAgg(column="target_type", aggfunc='unique'),
output_array=pd.NamedAgg(column="iq_sweep_burst", aggfunc=list),
doppler_burst=pd.NamedAgg(column="doppler_burst", aggfunc=list),
segment_count=pd.NamedAgg(column="segment_id", aggfunc='count'),
)
val_tracks = val_df.groupby(['track_id', 'segment_id']).agg(
target_type=pd.NamedAgg(column="target_type", aggfunc='unique'),
output_array=pd.NamedAgg(column="iq_sweep_burst", aggfunc=list),
doppler_burst=pd.NamedAgg(column="doppler_burst", aggfunc=list),
segment_count=pd.NamedAgg(column="target_type", aggfunc='count'),
)
# turning array into scalar
df_tracks['target_type'] = df_tracks['target_type'].apply(lambda x: x[0])
val_tracks['target_type'] = val_tracks['target_type'].apply(lambda x: x[0])
# concatenating doppler bursts
df_tracks['doppler_burst'] = df_tracks['doppler_burst'].apply(lambda x: np.concatenate(x, axis=-1))
val_tracks['doppler_burst'] = val_tracks['doppler_burst'].apply(lambda x: np.concatenate(x, axis=-1))
# concatenating IQ matrices along slow-time axis
df_tracks['output_array'] = df_tracks['output_array'].apply(lambda x: np.concatenate(x, axis=1))
val_tracks['output_array'] = val_tracks['output_array'].apply(lambda x: np.concatenate(x, axis=1))
# transforming IQ to scalogram or spectrogram
if self.output_data_type == 'scalogram':
print('Converting IQ matricies to Scalogram')
df_tracks['output_array'] = df_tracks['output_array'].progress_apply(iq_to_scalogram,
transformation=self.config['mother_wavelet'],
scale=self.config['scale'])
val_tracks['output_array'] = val_tracks['output_array'].progress_apply(iq_to_scalogram,
transformation=self.config['mother_wavelet'],
scale=self.config['scale'])
else:
print('Converting IQ matricies to Spectrogram')
df_tracks['output_array'] = df_tracks['output_array'].progress_apply(iq_to_spectogram)
val_tracks['output_array'] = val_tracks['output_array'].progress_apply(iq_to_spectogram)
if self.config.get('include_doppler'):
df_tracks['output_array'] = df_tracks.progress_apply(lambda row: max_value_on_doppler(row['output_array'],
row['doppler_burst']), axis=1)
val_tracks['output_array'] = val_tracks.progress_apply(lambda row: max_value_on_doppler(row['output_array'],
row['doppler_burst']), axis=1)
df_tracks['output_array'] = df_tracks['output_array'].progress_apply(normalize)
val_tracks['output_array'] = val_tracks['output_array'].progress_apply(normalize)
# generating list of _Segment objects
train_segments = [_Segment(segment_id=f'{k[0]}_{k[1]}', **v) for k, v in
df_tracks.to_dict(orient='index').items()]
val_segments = [_Segment(segment_id=f'{k[0]}_{k[1]}', **v) for k, v in
val_tracks.to_dict(orient='index').items()]
return train_segments, val_segments
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment