This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
null_arr = [] | |
nan_arr = [] | |
for sig in metadata_train['signal_id'].values: | |
sig_data = pd.read_parquet('/content/train.parquet', | |
engine='fastparquet', columns=[str(sig)]) | |
null_arr.append(sig_data.isnull().sum()) | |
nan_arr.append(sig_data.isna().sum()) | |
print(f"Number of Null values in train data: {np.sum(null_arr)}") | |
print(f"Number of NaN values in train data: {np.sum(nan_arr)}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
null_arr = [] | |
nan_arr = [] | |
for sig in metadata_test['signal_id'].values: | |
sig_data = pd.read_parquet('/content/test.parquet', | |
engine='fastparquet', columns=[str(sig)]) | |
null_arr.append(sig_data.isnull().sum()) | |
nan_arr.append(sig_data.isna().sum()) | |
print(f"Number of Null values in test data: {np.sum(null_arr)}") | |
print(f"Number of NaN values in test data: {np.sum(nan_arr)}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for col in metadata_train.columns: | |
print(f"Number of unique values in {col} is {metadata_train[col].unique().shape[0]}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
splot = sns.countplot(x='target', data=metadata_train) | |
# https://github.com/mwaskom/seaborn/issues/1582 | |
for ind, p in enumerate(splot.patches): | |
percent = np.round((metadata_train[metadata_train['target']==ind].shape[0]/metadata_train['target'].shape[0])*100, 2) | |
splot.annotate(str(metadata_train[metadata_train['target']==ind].shape[0]) + f" ({percent}%)", | |
(p.get_x()+p.get_width()/2, p.get_height())) | |
plt.title("Distribution of target classes") | |
plt.show() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
splot = sns.countplot(x="phase", data=metadata_train, hue="target") | |
# Get the total number of signals present in each phase | |
total_phases = metadata_train[metadata_train['phase']==0].shape[0] | |
num_points = [] | |
# https://github.com/mwaskom/seaborn/issues/1582 | |
for ind, p in enumerate(splot.patches): | |
# Phase=[0,1,2] for indices [0,1,2] and indices [3,4,5] respectively | |
phase = ind%3 | |
# target=[0,1] for indices [0,1], [2,3], [4,5] respectively | |
tar = ind//3 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Store the imbalance ratio values of each phase | |
avg_imb = [] | |
for phase in metadata_train['phase'].unique(): | |
num_0 = metadata_train[(metadata_train.phase==(phase)) & (metadata_train.target==0)].shape[0] | |
num_1 = metadata_train[(metadata_train.phase==(phase)) & (metadata_train.target==1)].shape[0] | |
imbalance_ratio = num_0/num_1 | |
avg_imb.append(imbalance_ratio) | |
print(f"Imbalance ratio - phase {phase}: {imbalance_ratio}") | |
print(f"Average imbalance ratio: {np.round(np.sum(avg_imb)/len(avg_imb), 2)}") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
sig_list = metadata_train.head(6).values | |
plt.figure(figsize=(25,10)) | |
n_rows = 2 | |
n_cols = 1 | |
for ind, val in enumerate(sig_list): | |
plt.subplot(n_rows, n_cols, (ind//3)+1) | |
plt.plot(train[str(val[0])], label=f"signal_id-{val[0]}, phase-{val[2]}, id_measurement-{val[1]}") | |
plt.legend() | |
plt.title(f'Power Line Signal with target={val[3]}') | |
plt.xlabel('Samples') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Decompose to get the wavelet coefficients | |
# 'db4' refers to the daubechies 4 wavelet | |
# 'per' refers to periodic-padding, as the raw signal is a periodic one | |
# x refers to the raw signal data | |
wavlt_coeffs = pywt.wavedec(x, 'db4', mode="per") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Calculate the sigma value | |
sigma = (1/0.6745) * np.mean(np.absolute(wavlt_coeffs[-1] - np.mean(wavlt_coeffs[-1],None)), None) | |
# Calculate the universal threshold using Stein Unbiased Risk Estimate (SURE) | |
Td = sigma * np.sqrt(2*np.log(len(x))) | |
# Perform hard-thresholding on the 1st level detail coefficient values | |
wavlt_coeffs[1:] = (pywt.threshold(i, value=Td, mode='hard') for i in wavlt_coeffs[1:]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Reconstruct the signal using the coefficients | |
x_dn = pywt.waverec(wavlt_coeffs, 'db4', mode='per') |
OlderNewer