Created
July 20, 2021 06:08
-
-
Save therealnaveenkamal/d9a5bc6dd94e7cfa30285d4325ec92a0 to your computer and use it in GitHub Desktop.
This code checks for the overlap of patients between train, validation and test dataset and removes the respective patients from their datasets.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def isOverlap(s1, s2): | |
total = set(s1).intersection(set(s2)) | |
return [len(total), total] | |
def overlapcheck(trainset, valset, testset): | |
patid_train = [] | |
patid_val = [] | |
patid_test = [] | |
for name in trainset['Image'].values: | |
patid_train.append(int(name.split("_")[0])) | |
for name in valset['Image'].values: | |
patid_val.append(int(name.split("_")[0])) | |
for name in testset['Image'].values: | |
patid_test.append(int(name.split("_")[0])) | |
trte = isOverlap(patid_train, patid_test) | |
teva = isOverlap(patid_test, patid_val) | |
trva = isOverlap(patid_train, patid_val) | |
print("Patient Overlap - Train and Test: ", trte[0]) | |
print("Patient Overlap - Test and Validation: ", teva[0]) | |
print("Patient Overlap - Train and Validation: ", trva[0]) | |
return trte, teva, trva | |
#Checking for overlaps between trainset, testset and validation set | |
trte, teva, trva = overlapcheck(trainset, valset, testset) | |
#Removing overlapping patients | |
for i in trva[1]: | |
for name in trainset['Image'].values: | |
if(int(name.split("_")[0]) == i): | |
trainset.drop(trainset.loc[trainset['Image'] == name].index, inplace=True) | |
#Checking for overlaps after removing common patients | |
trte, teva, trva = overlapcheck(trainset, valset, testset) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment