Last active
November 16, 2020 15:24
-
-
Save adcoh/fe70ba8a9631f17d9510c15de1cdbcfb to your computer and use it in GitHub Desktop.
the create_track_objects method
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def create_track_objects(self): | |
df = self.data_df | |
df = self.split_train_val_as_pd(data=df, ratio=self.config.get('valratio', 6)) | |
df.sort_values(by=['track_id', 'segment_id'], inplace=True) | |
df.replace({'animal': 0, 'human': 1}, inplace=True) | |
df['target_type'] = df['target_type'].astype(int) | |
# validating that each track consists of segments with same values in following columns | |
columns_to_check = ['geolocation_type', 'geolocation_id', 'sensor_id', 'snr_type', 'date_index', 'target_type'] | |
# creating boolean matrix for np.select | |
conditions = [(df.groupby('track_id')[col].shift(0) == df.groupby('track_id')[col].shift(1).bfill()) | |
for col in columns_to_check] | |
df['usable'] = np.select(conditions, conditions, default=False) | |
df.loc[df['is_validation'], 'usable'] = False | |
df.loc[df['is_validation'].shift(1).fillna(False), 'usable'] = False | |
df['usable'] = ~df['usable'] | |
# save validation segments to object and drop from current DF | |
val_df = df.loc[df.is_validation].copy().set_index(['track_id', 'segment_id']) | |
df = df.loc[~df.is_validation].copy() | |
# Creating a subtrack id for grouping into contiguous segments | |
df['subtrack_id'] = df.groupby('track_id').usable.cumsum() | |
# create track-level dataframes | |
df_tracks = df.groupby(['track_id', 'subtrack_id']).agg( | |
target_type=pd.NamedAgg(column="target_type", aggfunc='unique'), | |
output_array=pd.NamedAgg(column="iq_sweep_burst", aggfunc=list), | |
doppler_burst=pd.NamedAgg(column="doppler_burst", aggfunc=list), | |
segment_count=pd.NamedAgg(column="segment_id", aggfunc='count'), | |
) | |
val_tracks = val_df.groupby(['track_id', 'segment_id']).agg( | |
target_type=pd.NamedAgg(column="target_type", aggfunc='unique'), | |
output_array=pd.NamedAgg(column="iq_sweep_burst", aggfunc=list), | |
doppler_burst=pd.NamedAgg(column="doppler_burst", aggfunc=list), | |
segment_count=pd.NamedAgg(column="target_type", aggfunc='count'), | |
) | |
# turning array into scalar | |
df_tracks['target_type'] = df_tracks['target_type'].apply(lambda x: x[0]) | |
val_tracks['target_type'] = val_tracks['target_type'].apply(lambda x: x[0]) | |
# concatenating doppler bursts | |
df_tracks['doppler_burst'] = df_tracks['doppler_burst'].apply(lambda x: np.concatenate(x, axis=-1)) | |
val_tracks['doppler_burst'] = val_tracks['doppler_burst'].apply(lambda x: np.concatenate(x, axis=-1)) | |
# concatenating IQ matrices along slow-time axis | |
df_tracks['output_array'] = df_tracks['output_array'].apply(lambda x: np.concatenate(x, axis=1)) | |
val_tracks['output_array'] = val_tracks['output_array'].apply(lambda x: np.concatenate(x, axis=1)) | |
# transforming IQ to scalogram or spectrogram | |
if self.output_data_type == 'scalogram': | |
print('Converting IQ matricies to Scalogram') | |
df_tracks['output_array'] = df_tracks['output_array'].progress_apply(iq_to_scalogram, | |
transformation=self.config['mother_wavelet'], | |
scale=self.config['scale']) | |
val_tracks['output_array'] = val_tracks['output_array'].progress_apply(iq_to_scalogram, | |
transformation=self.config['mother_wavelet'], | |
scale=self.config['scale']) | |
else: | |
print('Converting IQ matricies to Spectrogram') | |
df_tracks['output_array'] = df_tracks['output_array'].progress_apply(iq_to_spectogram) | |
val_tracks['output_array'] = val_tracks['output_array'].progress_apply(iq_to_spectogram) | |
if self.config.get('include_doppler'): | |
df_tracks['output_array'] = df_tracks.progress_apply(lambda row: max_value_on_doppler(row['output_array'], | |
row['doppler_burst']), axis=1) | |
val_tracks['output_array'] = val_tracks.progress_apply(lambda row: max_value_on_doppler(row['output_array'], | |
row['doppler_burst']), axis=1) | |
df_tracks['output_array'] = df_tracks['output_array'].progress_apply(normalize) | |
val_tracks['output_array'] = val_tracks['output_array'].progress_apply(normalize) | |
# generating list of _Segment objects | |
train_segments = [_Segment(segment_id=f'{k[0]}_{k[1]}', **v) for k, v in | |
df_tracks.to_dict(orient='index').items()] | |
val_segments = [_Segment(segment_id=f'{k[0]}_{k[1]}', **v) for k, v in | |
val_tracks.to_dict(orient='index').items()] | |
return train_segments, val_segments |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment