Skip to content

Instantly share code, notes, and snippets.

@miguelarauj1o
Created January 4, 2016 01:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save miguelarauj1o/66fdec67a2159a38c646 to your computer and use it in GitHub Desktop.
Save miguelarauj1o/66fdec67a2159a38c646 to your computer and use it in GitHub Desktop.
import os
def filterAlpha(a):
return a.isalpha()
def generate(size) :
# files = ['desolation-row.txt', 'english.txt', 'storm-of-swords.txt', 'communist-manifesto.txt']
f = open('english.txt', 'r')
o = open('english'+str(size)+'.txt', 'w')
lines = f.readlines()
for l in lines:
l = filter(filterAlpha, l.replace('\n', ' ').replace('\00', '').replace('\0', ''))
if len(l) > 0:
o.write(l)
if os.path.getsize('english'+str(size)+'.txt') > size*1000000:
break
f.close()
o.close()
if __name__ == '__main__':
generate(5)
generate(10)
generate(20)
generate(30)
generate(50)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment