This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from sklearn.preprocessing import LabelEncoder | |
encoder = LabelEncoder() | |
encoder = encoder.fit(tps_data.cat0) | |
cat0_encoded_le = encoder.transform(tps_data.cat0) | |
pd.value_counts(cat0_encoded_le) | |
# 0 223525 | |
# 1 76475 | |
# dtype: int64 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def encode_leaveoneout(dataset,column): | |
encoder = LeaveOneOutEncoder() | |
encoder = encoder.fit(dataset[column],dataset['target']) | |
dataset[f"{column}_loo_enc"] = encoder.transform(dataset[column]) | |
return dataset | |
def encode_label(dataset,column): | |
encoder = LabelEncoder() | |
encoder = encoder.fit(dataset[column]) | |
dataset[f"{column}_le_enc"] = encoder.transform(dataset[column]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from category_encoders import LeaveOneOutEncoder | |
encoder = LeaveOneOutEncoder() | |
encoder = encoder.fit(tps_data.cat0,tps_data.target) | |
cat0_encoded_loo = encoder.transform(tps_data.cat0) | |
pd.value_counts(cat0_encoded_loo['cat0']) | |
# 0.334070 223525 | |
# 0.062609 76475 | |
# Name: cat0, dtype: int64 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
tps_data = pd.read_csv("../input/tabular-playground-series-mar-2021/train.csv") | |
tps_data.columns | |
# Index(['id', 'cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7', | |
# 'cat8', 'cat9', 'cat10', 'cat11', 'cat12', 'cat13', 'cat14', 'cat15', | |
# 'cat16', 'cat17', 'cat18', 'cont0', 'cont1', 'cont2', 'cont3', 'cont4', | |
# 'cont5', 'cont6', 'cont7', 'cont8', 'cont9', 'cont10', 'target'], | |
# dtype='object') | |
pd.value_counts(tps_data.cat0) | |
# A 223525 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
images_pair, labels_pair = generate_train_image_pairs(images_dataset, labels_dataset) | |
history = model.fit([images_pair[:, 0], images_pair[:, 1]], labels_pair[:],validation_split=0.1,batch_size=64,epochs=100) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
image = images_dataset[92] # a random image as test image | |
test_image_pairs, test_label_pairs = generate_test_image_pairs(images_dataset, labels_dataset, image) # produce an array of test image pairs and test label pairs | |
# for each pair in the test image pair, predict the similarity between the images | |
for index, pair in enumerate(test_image_pairs): | |
pair_image1 = np.expand_dims(pair[0], axis=-1) | |
pair_image1 = np.expand_dims(pair_image1, axis=0) | |
pair_image2 = np.expand_dims(pair[1], axis=-1) | |
pair_image2 = np.expand_dims(pair_image2, axis=0) | |
prediction = model.predict([pair_image1, pair_image2])[0][0] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def generate_test_image_pairs(images_dataset, labels_dataset, image): | |
unique_labels = np.unique(labels_dataset) | |
label_wise_indices = dict() | |
for label in unique_labels: | |
label_wise_indices.setdefault(label, | |
[index for index, curr_label in enumerate(labels_dataset) if | |
label == curr_label]) | |
pair_images = [] | |
pair_labels = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def generate_train_image_pairs(images_dataset, labels_dataset): | |
unique_labels = np.unique(labels_dataset) | |
label_wise_indices = dict() | |
for label in unique_labels: | |
label_wise_indices.setdefault(label, | |
[index for index, curr_label in enumerate(labels_dataset) if | |
label == curr_label]) | |
pair_images = [] | |
pair_labels = [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"]) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def euclidean_distance(vectors): | |
(featA, featB) = vectors | |
sum_squared = k.sum(k.square(featA - featB), axis=1, keepdims=True) | |
return k.sqrt(k.maximum(sum_squared, k.epsilon())) | |
distance = Lambda(euclidean_distance)([featA, featB]) | |
outputs = Dense(1, activation="sigmoid")(distance) | |
model = Model(inputs=[imgA, imgB], outputs=outputs) |
NewerOlder