abhi18av/laura_naerhi.py

## laura_naerhi.py
# I have left an ample amount of trail and rough work in here so that you may follow up the workflow
# and modify the program as necessary for your needs.

"""
Disclaimer :- I am not responsible for any damage done via this code or it's derivative.

Advice :- Be careful with the sites you play with. Be nice and don't wander away from the path of light!
            Most Muggles don't like Magic;P

"""

#################  VERSION 1

"""
The goal is to fetch all the song names and links from the website

<< http://lyricstranslate.com/en/laura-naerhi-lyrics.html >>

and then using those links to navigate over there and to fetch the lyrics.

"""

## Start by isolating the names of the songs from the list


import requests
from bs4 import BeautifulSoup
url = "http://lyricstranslate.com/en/laura-naerhi-lyrics.html"
r = requests.get(url)
soup = BeautifulSoup(r.content)

print(soup.prettify)

songnames = soup.find_all('td', class_='songName')

for i in range(5):
    print(songnames[i],"\n")

songnames[0]
songnames[0].a
songnames[0].a.string                          #songnames[0].a.text
songnames[0].a["href"]


############################## VERSION 2

"""
Here we aim to isoalate 3 things

> Singer
> Song name
> Song link

"""

soup.title.text

for i in range(len(songnames)):
    print(songnames[i].a.string, " --->> ", songnames[0].a["href"], "\n")


############## VERSION 3


"""
Here we wish to isolate only the songs
> title
> lyrics

"""
song_url = "http://lyricstranslate.com" + songnames[4].a["href"]
print(song_url)

r_song = requests.get(song_url)
song_soup = BeautifulSoup(r_song.content)

print(song_soup.prettify)

#lyrics = song_soup.find('div', class_ ='title-h2')

##  WE USE A DIFFERENT NOTATION FOR CLASS
lyrics = song_soup.find('div', {"class": "song-node-text"})


print(lyrics.text)


for i in range(len(list(lyrics.children))):
    print(list(lyrics.children)[i],"\n")

"""

some work still to be done in version 3

"""
song_lyrics = lyrics.find_all("p")

song_lyrics[1]

for para in song_lyrics:
    print(para)


only_para = list(lyrics.children)[1:18]
only_para

################ VERSION 4
"""
The goal is to save the lyrics in a file with utf-8 encoding
in the songs&poetry directory

"""

to_be_saved = str(song_soup.title) + "\n" + str(lyrics.text)

import os

writepath = 'W:\songs&poetry\file1.txt'

mode = 'a' if os.path.exists(writepath) else 'w'
with open(writepath, mode, encoding = "utf-8") as f:
#    f.write(lyrics.text) # saves only the main lyrics
    f.write(to_be_saved)

############################## VERSION 5

"""
    Now we fetch the lyrics of all the songs listed on the index page

"""

import requests
from bs4 import BeautifulSoup
url = "http://lyricstranslate.com/en/laura-naerhi-lyrics.html"
r = requests.get(url)
soup = BeautifulSoup(r.content)

songnames = soup.find_all('td', class_='songName')

import os

writepath = 'W:\\songs&poetry\\finnish\\songs\\laura-narhi\\songs.txt'


for i in range(len(songnames)):

    song_url = "http://lyricstranslate.com" + songnames[i].a["href"]

    r_song = requests.get(song_url)
    song_soup = BeautifulSoup(r_song.content)

    lyrics = song_soup.find('div', {"class": "song-node-text"})

#for i in range(len(list(lyrics.children))):
#    print(list(lyrics.children)[i],"\n")

    to_be_saved = str(song_soup.title) + "\n" + str(lyrics.text)

    mode = 'a' if os.path.exists(writepath) else 'w'
    with open(writepath, mode, encoding = "utf-8") as f:
        f.write(to_be_saved)

    # end of For loop


#######################


soup.contents[1].contents[3]
songnames[0].contents

songnames[0].contents[0]
songnames[0].contents[0].text

#
#for university in universities:
#    print(university['href']+","+university.string)

for name in songnames:
    print(name)
#    print(university.string, "  <- - ->  ", university['href'] )


ls = list(soup.children)

for i in range(3):
    print(ls[i])
	# I have left an ample amount of trail and rough work in here so that you may follow up the workflow
	# and modify the program as necessary for your needs.

	"""
	Disclaimer :- I am not responsible for any damage done via this code or it's derivative.

	Advice :- Be careful with the sites you play with. Be nice and don't wander away from the path of light!
	Most Muggles don't like Magic;P

	"""

	################# VERSION 1

	"""
	The goal is to fetch all the song names and links from the website

	<< http://lyricstranslate.com/en/laura-naerhi-lyrics.html >>

	and then using those links to navigate over there and to fetch the lyrics.

	"""

	## Start by isolating the names of the songs from the list


	import requests
	from bs4 import BeautifulSoup
	url = "http://lyricstranslate.com/en/laura-naerhi-lyrics.html"
	r = requests.get(url)
	soup = BeautifulSoup(r.content)

	print(soup.prettify)

	songnames = soup.find_all('td', class_='songName')

	for i in range(5):
	print(songnames[i],"\n")

	songnames[0]
	songnames[0].a
	songnames[0].a.string #songnames[0].a.text
	songnames[0].a["href"]



	############################## VERSION 2

	"""
	Here we aim to isoalate 3 things

	> Singer
	> Song name
	> Song link

	"""

	soup.title.text

	for i in range(len(songnames)):
	print(songnames[i].a.string, " --->> ", songnames[0].a["href"], "\n")



	############## VERSION 3



	"""
	Here we wish to isolate only the songs
	> title
	> lyrics

	"""
	song_url = "http://lyricstranslate.com" + songnames[4].a["href"]
	print(song_url)

	r_song = requests.get(song_url)
	song_soup = BeautifulSoup(r_song.content)

	print(song_soup.prettify)

	#lyrics = song_soup.find('div', class_ ='title-h2')

	## WE USE A DIFFERENT NOTATION FOR CLASS
	lyrics = song_soup.find('div', {"class": "song-node-text"})


	print(lyrics.text)


	for i in range(len(list(lyrics.children))):
	print(list(lyrics.children)[i],"\n")

	"""

	some work still to be done in version 3

	"""
	song_lyrics = lyrics.find_all("p")

	song_lyrics[1]

	for para in song_lyrics:
	print(para)


	only_para = list(lyrics.children)[1:18]
	only_para

	################ VERSION 4
	"""
	The goal is to save the lyrics in a file with utf-8 encoding
	in the songs&poetry directory

	"""

	to_be_saved = str(song_soup.title) + "\n" + str(lyrics.text)

	import os

	writepath = 'W:\songs&poetry\file1.txt'

	mode = 'a' if os.path.exists(writepath) else 'w'
	with open(writepath, mode, encoding = "utf-8") as f:
	# f.write(lyrics.text) # saves only the main lyrics
	f.write(to_be_saved)

	############################## VERSION 5

	"""
	Now we fetch the lyrics of all the songs listed on the index page

	"""

	import requests
	from bs4 import BeautifulSoup
	url = "http://lyricstranslate.com/en/laura-naerhi-lyrics.html"
	r = requests.get(url)
	soup = BeautifulSoup(r.content)

	songnames = soup.find_all('td', class_='songName')

	import os

	writepath = 'W:\\songs&poetry\\finnish\\songs\\laura-narhi\\songs.txt'



	for i in range(len(songnames)):

	song_url = "http://lyricstranslate.com" + songnames[i].a["href"]

	r_song = requests.get(song_url)
	song_soup = BeautifulSoup(r_song.content)

	lyrics = song_soup.find('div', {"class": "song-node-text"})

	#for i in range(len(list(lyrics.children))):
	# print(list(lyrics.children)[i],"\n")

	to_be_saved = str(song_soup.title) + "\n" + str(lyrics.text)

	mode = 'a' if os.path.exists(writepath) else 'w'
	with open(writepath, mode, encoding = "utf-8") as f:
	f.write(to_be_saved)

	# end of For loop


	#######################


	soup.contents[1].contents[3]
	songnames[0].contents

	songnames[0].contents[0]
	songnames[0].contents[0].text

	#
	#for university in universities:
	# print(university['href']+","+university.string)

	for name in songnames:
	print(name)
	# print(university.string, " <- - -> ", university['href'] )



	ls = list(soup.children)

	for i in range(3):
	print(ls[i])