This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def split_train_test(tweet_vectors, randomized_tweet_vectors) -> tuple: | |
""" | |
Split into train and test sets | |
:param tweet_vectors: tweets in vector form | |
:return: train_set, test_set tuple of train set and test set | |
""" | |
x_train_dim = math.floor(0.8 * tweet_vectors.shape[0]) # Use 80% of data for train set | |
x_test_dim = math.ceil(0.2 * tweet_vectors.shape[0]) # Use 20% of data for test set | |
y_dim = tweet_vectors.shape[1] | |
train_set = np.zeros((x_train_dim, y_dim), dtype=int) | |
test_set = np.zeros((x_test_dim, y_dim), dtype=int) | |
for x in range(x_train_dim): | |
for y in range(y_dim): | |
train_set[x][y] = randomized_tweet_vectors[x][y] | |
for x in range(x_test_dim): | |
for y in range(y_dim): | |
test_set[x][y] = randomized_tweet_vectors[x + x_train_dim][y] | |
return train_set, test_set |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment