moubaba/get_book.py Secret

## get_book.py
siddhartha_response = requests.get("https://www.gutenberg.org/cache/epub/2500/pg2500.txt")
siddhartha_data = siddhartha_response.text
#We split it by "***" to just get the content of the book, there is a lot of useless information about license in
#Gutenberg books
siddhartha_data = siddhartha_data.split("***")[2]

## one_hot.py
stems,lems = process_data(siddhartha_data.split(".")[14])
print(siddhartha_data.split(".")[14])
onehot_encoded = list()
for word in stems:
    letter = [0 for _ in range(len(set(text_stems_sid)))]
    print(word,vocab.index(word))
    letter[vocab.index(word)] = 1
    onehot_encoded.append(letter)

## take_phrases.py
text_stems_sid,text_lems_sid = process_data(" ".join(siddhartha_data.split(".")[10:15]))
vocab = list(set(text_stems_sid))
print(" ".join(siddhartha_data.split(".")[10:15]))&lt;br data-mce-bogus="1"&gt;
	siddhartha_response = requests.get("https://www.gutenberg.org/cache/epub/2500/pg2500.txt")
	siddhartha_data = siddhartha_response.text
	#We split it by "***" to just get the content of the book, there is a lot of useless information about license in
	#Gutenberg books
	siddhartha_data = siddhartha_data.split("***")[2]
	stems,lems = process_data(siddhartha_data.split(".")[14])
	print(siddhartha_data.split(".")[14])
	onehot_encoded = list()
	for word in stems:
	letter = [0 for _ in range(len(set(text_stems_sid)))]
	print(word,vocab.index(word))
	letter[vocab.index(word)] = 1
	onehot_encoded.append(letter)
	text_stems_sid,text_lems_sid = process_data(" ".join(siddhartha_data.split(".")[10:15]))
	vocab = list(set(text_stems_sid))
	print(" ".join(siddhartha_data.split(".")[10:15]))<br data-mce-bogus="1">