Skip to content

Instantly share code, notes, and snippets.

View girija2204's full-sized avatar
🏠
Working from home

Girija Shankar Behera girija2204

🏠
Working from home
View GitHub Profile
from sklearn.preprocessing import LabelEncoder
encoder = LabelEncoder()
encoder = encoder.fit(tps_data.cat0)
cat0_encoded_le = encoder.transform(tps_data.cat0)
pd.value_counts(cat0_encoded_le)
# 0 223525
# 1 76475
# dtype: int64
def encode_leaveoneout(dataset,column):
encoder = LeaveOneOutEncoder()
encoder = encoder.fit(dataset[column],dataset['target'])
dataset[f"{column}_loo_enc"] = encoder.transform(dataset[column])
return dataset
def encode_label(dataset,column):
encoder = LabelEncoder()
encoder = encoder.fit(dataset[column])
dataset[f"{column}_le_enc"] = encoder.transform(dataset[column])
from category_encoders import LeaveOneOutEncoder
encoder = LeaveOneOutEncoder()
encoder = encoder.fit(tps_data.cat0,tps_data.target)
cat0_encoded_loo = encoder.transform(tps_data.cat0)
pd.value_counts(cat0_encoded_loo['cat0'])
# 0.334070 223525
# 0.062609 76475
# Name: cat0, dtype: int64
tps_data = pd.read_csv("../input/tabular-playground-series-mar-2021/train.csv")
tps_data.columns
# Index(['id', 'cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6', 'cat7',
# 'cat8', 'cat9', 'cat10', 'cat11', 'cat12', 'cat13', 'cat14', 'cat15',
# 'cat16', 'cat17', 'cat18', 'cont0', 'cont1', 'cont2', 'cont3', 'cont4',
# 'cont5', 'cont6', 'cont7', 'cont8', 'cont9', 'cont10', 'target'],
# dtype='object')
pd.value_counts(tps_data.cat0)
# A 223525
images_pair, labels_pair = generate_train_image_pairs(images_dataset, labels_dataset)
history = model.fit([images_pair[:, 0], images_pair[:, 1]], labels_pair[:],validation_split=0.1,batch_size=64,epochs=100)
image = images_dataset[92] # a random image as test image
test_image_pairs, test_label_pairs = generate_test_image_pairs(images_dataset, labels_dataset, image) # produce an array of test image pairs and test label pairs
# for each pair in the test image pair, predict the similarity between the images
for index, pair in enumerate(test_image_pairs):
pair_image1 = np.expand_dims(pair[0], axis=-1)
pair_image1 = np.expand_dims(pair_image1, axis=0)
pair_image2 = np.expand_dims(pair[1], axis=-1)
pair_image2 = np.expand_dims(pair_image2, axis=0)
prediction = model.predict([pair_image1, pair_image2])[0][0]
def generate_test_image_pairs(images_dataset, labels_dataset, image):
unique_labels = np.unique(labels_dataset)
label_wise_indices = dict()
for label in unique_labels:
label_wise_indices.setdefault(label,
[index for index, curr_label in enumerate(labels_dataset) if
label == curr_label])
pair_images = []
pair_labels = []
def generate_train_image_pairs(images_dataset, labels_dataset):
unique_labels = np.unique(labels_dataset)
label_wise_indices = dict()
for label in unique_labels:
label_wise_indices.setdefault(label,
[index for index, curr_label in enumerate(labels_dataset) if
label == curr_label])
pair_images = []
pair_labels = []
model.compile(loss="binary_crossentropy", optimizer="adam", metrics=["accuracy"])
def euclidean_distance(vectors):
(featA, featB) = vectors
sum_squared = k.sum(k.square(featA - featB), axis=1, keepdims=True)
return k.sqrt(k.maximum(sum_squared, k.epsilon()))
distance = Lambda(euclidean_distance)([featA, featB])
outputs = Dense(1, activation="sigmoid")(distance)
model = Model(inputs=[imgA, imgB], outputs=outputs)