mlai-demo/tem2.py

## tem2.py
with open(fpath + '/Plutarch.txt') as f,
open(fpath + '/Plutarch2.txt', 'w') as out_f:
    text = f.read().lower()
    new_text = re.sub('[^a-z\.\?\!\-\'\:\;]', ' ', text) #keep only wanted characters (alphabet and select punctuation)    new_text = re.sub(' +', ' ', new_text)#remove double empty spaces between words
    new_text = re.sub(' +', ' ', new_text) #remove double space
    new_text = re.sub('\n', ' ', new_text) #remove new line
    items = [w for w in new_text.split(' ') if w.strip() != '' or w == '\n']
    unique_items = set(items)
    print("The text is {} words long, has {} unique items and {} characters on average\n".format
      (len(items), len(unique_items), round(sum(len(word) for word in items)/len(items),2)))
    print("First 1000 characters of the text:\n", new_text[:1000])
    out_f.write(new_text)
	with open(fpath + '/Plutarch.txt') as f,
	open(fpath + '/Plutarch2.txt', 'w') as out_f:
	text = f.read().lower()
	new_text = re.sub('[^a-z\.\?\!\-\'\:\;]', ' ', text) #keep only wanted characters (alphabet and select punctuation) new_text = re.sub(' +', ' ', new_text)#remove double empty spaces between words
	new_text = re.sub(' +', ' ', new_text) #remove double space
	new_text = re.sub('\n', ' ', new_text) #remove new line
	items = [w for w in new_text.split(' ') if w.strip() != '' or w == '\n']
	unique_items = set(items)
	print("The text is {} words long, has {} unique items and {} characters on average\n".format
	(len(items), len(unique_items), round(sum(len(word) for word in items)/len(items),2)))
	print("First 1000 characters of the text:\n", new_text[:1000])
	out_f.write(new_text)