Skip to content

Instantly share code, notes, and snippets.

Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save therealnaveenkamal/d9a5bc6dd94e7cfa30285d4325ec92a0 to your computer and use it in GitHub Desktop.
Save therealnaveenkamal/d9a5bc6dd94e7cfa30285d4325ec92a0 to your computer and use it in GitHub Desktop.
This code checks for the overlap of patients between train, validation and test dataset and removes the respective patients from their datasets.
def isOverlap(s1, s2):
total = set(s1).intersection(set(s2))
return [len(total), total]
def overlapcheck(trainset, valset, testset):
patid_train = []
patid_val = []
patid_test = []
for name in trainset['Image'].values:
patid_train.append(int(name.split("_")[0]))
for name in valset['Image'].values:
patid_val.append(int(name.split("_")[0]))
for name in testset['Image'].values:
patid_test.append(int(name.split("_")[0]))
trte = isOverlap(patid_train, patid_test)
teva = isOverlap(patid_test, patid_val)
trva = isOverlap(patid_train, patid_val)
print("Patient Overlap - Train and Test: ", trte[0])
print("Patient Overlap - Test and Validation: ", teva[0])
print("Patient Overlap - Train and Validation: ", trva[0])
return trte, teva, trva
#Checking for overlaps between trainset, testset and validation set
trte, teva, trva = overlapcheck(trainset, valset, testset)
#Removing overlapping patients
for i in trva[1]:
for name in trainset['Image'].values:
if(int(name.split("_")[0]) == i):
trainset.drop(trainset.loc[trainset['Image'] == name].index, inplace=True)
#Checking for overlaps after removing common patients
trte, teva, trva = overlapcheck(trainset, valset, testset)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment