saburbutt/Lyricsgenerationupdated.txt

## Lyricsgenerationupdated.txt
from typing import Dict, Any
from html.parser import HTMLParser
from nltk.tokenize import sent_tokenize, word_tokenize
from nltk.corpus import stopwords
import nltk
import pandas as pd
from requests import get
from bs4 import BeautifulSoup
import random, re #re helps with the text formating
from matplotlib import pyplot as plt
import string
lyrics_words_by_user = input("Enter One keyword for lyrics:")
#the first task would be to take a keyword or a set of keywords that will be used in the genration
of lyrics
word_tokenized = word_tokenize(lyrics_words_by_user)
#Now that we have the keywords, lets scrap data that are related to the keywords
url = 'https://www.lyrics.com/lyrics/' + str(word_tokenized)
print(url)
response = get(url)
html_soup = BeautifulSoup(response.text, 'html.parser')
type(html_soup)
#These keywords will be used to download the lyrics that are related to them
song_lyrics = html_soup.find_all('',class_ = 'lyric-body')

for i in song_lyrics:
for j in i:

j = str(j)
cleaner = re.compile('<.*?>')
j = re.sub(cleaner, '', j)
j.replace(',', '')
j.replace(')', '')
j.replace('(', '')
appendFile = open('lyrics1.txt', 'a')
appendFile.write('\n')
appendFile.write(j)

appendFile.close()

#the lyrics will be stored in the file that will be used to train the next set of lyrics that rhyme
def testMarkov(startword):
LyricsLib = {}
addToLib('lyrics1.txt', LyricsLib)
return makeLyrics(startword, LyricsLib)
def addToLib(filename, currLib):
f = open(filename,'r') #opens the lyrics file that is being added everytime
words = re.sub("\n", "\n", f.read()).split(' ')
curr= 0
while curr < len(words) - 1:
#looping through all the words including \n in this song
currWord = words[curr].lower()
nextWord = words[curr + 1].lower()
if currWord in currLib.keys():
#Checks if the word is a new word
if nextWord in currLib[currWord].keys():
#if we have seen the sequence than currWord -> nextWord before
currLib[currWord][nextWord] += 1
else:
#Havent seen the sequence
currLib[currWord][nextWord] = 1

else:
#I haven't seen this word
currLib[currWord] = {nextWord: 1}
curr += 1
#Change counts to percentages values
for key in currLib.keys():
#for each word
keyTotal = 0
TwoDarray = 0
for probKey in currLib[key].keys():
keyTotal += currLib[key][probKey]
for probKey in currLib[key].keys():
currLib[key][probKey] = currLib[key][probKey]/keyTotal

print('\n', currLib)
return currLib

def makeLyrics(startword, probDict): #set of lyrics and start words as paratmeters
lyrics, curr, wc = '', startword, 0

while wc < 50:
lyrics += curr + ' '
curr = markov_next(curr, probDict)
wc += 1
return lyrics
#Hidden Markov Model
def markov_next(currword, probDict):
#Either returns a random word if it is novel or finds probabiliticly
if currword not in probDict.keys():
return random.choice(list(probDict.keys()))
else:
wordprobs = probDict[currword]
randProb = random.uniform(0.0, 1.0)
currProb = 0.0
for key in wordprobs:
currProb += wordprobs[key]
if randProb <= currProb:
return key

return random.choice(probDict.keys())

corpus = open('lyrics1.txt', "r")
inputfile =corpus.read()
words = nltk.tokenize.word_tokenize(inputfile)
count = set(words)
dict = nltk.FreqDist(words)
dict.plot()
plt.plot(words)
plt.show()
#Input of the user
compare = testMarkov(lyrics_words_by_user)
#compare_tokenized = word_tokenized(compare)
#print(testMarkov(lyrics_words_by_user))
print(compare)
	from typing import Dict, Any
	from html.parser import HTMLParser
	from nltk.tokenize import sent_tokenize, word_tokenize
	from nltk.corpus import stopwords
	import nltk
	import pandas as pd
	from requests import get
	from bs4 import BeautifulSoup
	import random, re #re helps with the text formating
	from matplotlib import pyplot as plt
	import string
	lyrics_words_by_user = input("Enter One keyword for lyrics:")
	#the first task would be to take a keyword or a set of keywords that will be used in the genration
	of lyrics
	word_tokenized = word_tokenize(lyrics_words_by_user)
	#Now that we have the keywords, lets scrap data that are related to the keywords
	url = 'https://www.lyrics.com/lyrics/' + str(word_tokenized)
	print(url)
	response = get(url)
	html_soup = BeautifulSoup(response.text, 'html.parser')
	type(html_soup)
	#These keywords will be used to download the lyrics that are related to them
	song_lyrics = html_soup.find_all('',class_ = 'lyric-body')

	for i in song_lyrics:
	for j in i:

	j = str(j)
	cleaner = re.compile('<.*?>')
	j = re.sub(cleaner, '', j)
	j.replace(',', '')
	j.replace(')', '')
	j.replace('(', '')
	appendFile = open('lyrics1.txt', 'a')
	appendFile.write('\n')
	appendFile.write(j)

	appendFile.close()

	#the lyrics will be stored in the file that will be used to train the next set of lyrics that rhyme
	def testMarkov(startword):
	LyricsLib = {}
	addToLib('lyrics1.txt', LyricsLib)
	return makeLyrics(startword, LyricsLib)
	def addToLib(filename, currLib):
	f = open(filename,'r') #opens the lyrics file that is being added everytime
	words = re.sub("\n", "\n", f.read()).split(' ')
	curr= 0
	while curr < len(words) - 1:
	#looping through all the words including \n in this song
	currWord = words[curr].lower()
	nextWord = words[curr + 1].lower()
	if currWord in currLib.keys():
	#Checks if the word is a new word
	if nextWord in currLib[currWord].keys():
	#if we have seen the sequence than currWord -> nextWord before
	currLib[currWord][nextWord] += 1
	else:
	#Havent seen the sequence
	currLib[currWord][nextWord] = 1

	else:
	#I haven't seen this word
	currLib[currWord] = {nextWord: 1}
	curr += 1
	#Change counts to percentages values
	for key in currLib.keys():
	#for each word
	keyTotal = 0
	TwoDarray = 0
	for probKey in currLib[key].keys():
	keyTotal += currLib[key][probKey]
	for probKey in currLib[key].keys():
	currLib[key][probKey] = currLib[key][probKey]/keyTotal

	print('\n', currLib)
	return currLib

	def makeLyrics(startword, probDict): #set of lyrics and start words as paratmeters
	lyrics, curr, wc = '', startword, 0

	while wc < 50:
	lyrics += curr + ' '
	curr = markov_next(curr, probDict)
	wc += 1
	return lyrics
	#Hidden Markov Model
	def markov_next(currword, probDict):
	#Either returns a random word if it is novel or finds probabiliticly
	if currword not in probDict.keys():
	return random.choice(list(probDict.keys()))
	else:
	wordprobs = probDict[currword]
	randProb = random.uniform(0.0, 1.0)
	currProb = 0.0
	for key in wordprobs:
	currProb += wordprobs[key]
	if randProb <= currProb:
	return key

	return random.choice(probDict.keys())

	corpus = open('lyrics1.txt', "r")
	inputfile =corpus.read()
	words = nltk.tokenize.word_tokenize(inputfile)
	count = set(words)
	dict = nltk.FreqDist(words)
	dict.plot()
	plt.plot(words)
	plt.show()
	#Input of the user
	compare = testMarkov(lyrics_words_by_user)
	#compare_tokenized = word_tokenized(compare)
	#print(testMarkov(lyrics_words_by_user))
	print(compare)