Created
May 27, 2021 13:21
-
-
Save mikemahoney218/b391050e2459bbd2e1b0c2208d4bc54b to your computer and use it in GitHub Desktop.
code for greater_gatsby
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from tqdm import trange | |
from big_sleep import Imagine | |
from os import listdir, remove, path | |
gatsby_files = listdir('gatsby') | |
for file in gatsby_files: | |
with open(f'./gatsby/{file}') as f: | |
text = f.readline() | |
model = Imagine( | |
text = text, | |
lr = 0.05, | |
iterations = 100, | |
epochs = 5, | |
save_progress = False, | |
seed = 123 | |
) | |
for epoch in trange(5, desc = 'epochs'): | |
for i in trange(100, desc = 'iteration'): | |
model.train_step(epoch, i) | |
if i == 0 or i % model.save_every != 0: | |
continue | |
if path.exists(f'./gatsby/{file}'): | |
remove(f'./gatsby/{file}') |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Obtained from | |
# https://www.gutenberg.org/files/64317/64317-h/64317-h.htm | |
gatsby <- readChar("gatsby.txt", file.info("gatsby.txt")$size) | |
# Poorly tokenize into sentences | |
gatsby <- gsub("\\n|\\r", " ", gatsby) | |
gatsby <- gsub(" ", " ", gatsby) | |
gatsby <- gsub(" ", " ", gatsby) | |
gatsby <- gsub(" ", " ", gatsby) | |
gatsby <- tokenizers::tokenize_sentences(gatsby, strip_punct = TRUE)[[1]] | |
# Write each sentence to a file | |
dir.create("gatsby") | |
for (i in seq_along(gatsby)) { | |
writeLines( | |
substr(gatsby[[i]], 1, 240), | |
paste0("gatsby/", i, ".txt") | |
) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment