Skip to content

Instantly share code, notes, and snippets.

@sakti
Created November 14, 2016 13:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save sakti/72507c3af42ef14765d86c57a3ea1279 to your computer and use it in GitHub Desktop.
Save sakti/72507c3af42ef14765d86c57a3ea1279 to your computer and use it in GitHub Desktop.
#!/usr/bin/env python
"download random text file from gutenberg project into current directory; `pip install Gutenberg` first"
import random
from gutenberg.acquire import load_etext
from gutenberg.cleanup import strip_headers
list_generated_random = []
def get_unique_random(lower, upper):
choice = random.randrange(lower, upper)
while choice in list_generated_random:
choice = random.randrange(lower, upper)
list_generated_random.append(choice)
return choice
def main():
while True:
doc_id = get_unique_random(1, 53000)
print("downloading doc id: %s" % doc_id)
text = strip_headers(load_etext(doc_id)).strip()
with open('%s.txt' % doc_id, 'w+') as f:
f.write(text)
print('%s.txt written' % doc_id)
if __name__ == '__main__':
try:
main()
except KeyboardInterrupt:
print('exiting\ngoodbye :)')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment