Created
August 29, 2020 21:57
-
-
Save douglaspsteen/bc824ab47714dd5f541bcd225a131e55 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Shuffle the data | |
df = df.sample(frac=1, random_state=15).reset_index(drop=True) | |
# Generate indices for splits | |
test_ind = round(len(df)*0.25) | |
train_ind = test_ind + round(len(df)*0.01) | |
unlabeled_ind = train_ind + round(len(df)*0.74) | |
# Partition the data | |
test = df.iloc[:test_ind] | |
train = df.iloc[test_ind:train_ind] | |
unlabeled = df.iloc[train_ind:unlabeled_ind] | |
# Assign data to train, test, and unlabeled sets | |
X_train = train.drop('complication', axis=1) | |
y_train = train.complication | |
X_unlabeled = unlabeled.drop('complication', axis=1) | |
X_test = test.drop('complication', axis=1) | |
y_test = test.complication | |
# Check dimensions of data after splitting | |
print(f"X_train dimensions: {X_train.shape}") | |
print(f"y_train dimensions: {y_train.shape}\n") | |
print(f"X_test dimensions: {X_test.shape}") | |
print(f"y_test dimensions: {y_test.shape}\n") | |
print(f"X_unlabeled dimensions: {X_unlabeled.shape}") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment