Skip to content

Instantly share code, notes, and snippets.

@abhi18av
Last active August 29, 2015 14:26
Show Gist options
  • Save abhi18av/2488af1734388d2323e0 to your computer and use it in GitHub Desktop.
Save abhi18av/2488af1734388d2323e0 to your computer and use it in GitHub Desktop.
Remember the Songs!
# I have left an ample amount of trail and rough work in here so that you may follow up the workflow
# and modify the program as necessary for your needs.
"""
Disclaimer :- I am not responsible for any damage done via this code or it's derivative.
Advice :- Be careful with the sites you play with. Be nice and don't wander away from the path of light!
Most Muggles don't like Magic;P
"""
################# VERSION 1
"""
The goal is to fetch all the song names and links from the website
<< http://lyricstranslate.com/en/laura-naerhi-lyrics.html >>
and then using those links to navigate over there and to fetch the lyrics.
"""
## Start by isolating the names of the songs from the list
import requests
from bs4 import BeautifulSoup
url = "http://lyricstranslate.com/en/laura-naerhi-lyrics.html"
r = requests.get(url)
soup = BeautifulSoup(r.content)
print(soup.prettify)
songnames = soup.find_all('td', class_='songName')
for i in range(5):
print(songnames[i],"\n")
songnames[0]
songnames[0].a
songnames[0].a.string #songnames[0].a.text
songnames[0].a["href"]
############################## VERSION 2
"""
Here we aim to isoalate 3 things
> Singer
> Song name
> Song link
"""
soup.title.text
for i in range(len(songnames)):
print(songnames[i].a.string, " --->> ", songnames[0].a["href"], "\n")
############## VERSION 3
"""
Here we wish to isolate only the songs
> title
> lyrics
"""
song_url = "http://lyricstranslate.com" + songnames[4].a["href"]
print(song_url)
r_song = requests.get(song_url)
song_soup = BeautifulSoup(r_song.content)
print(song_soup.prettify)
#lyrics = song_soup.find('div', class_ ='title-h2')
## WE USE A DIFFERENT NOTATION FOR CLASS
lyrics = song_soup.find('div', {"class": "song-node-text"})
print(lyrics.text)
for i in range(len(list(lyrics.children))):
print(list(lyrics.children)[i],"\n")
"""
some work still to be done in version 3
"""
song_lyrics = lyrics.find_all("p")
song_lyrics[1]
for para in song_lyrics:
print(para)
only_para = list(lyrics.children)[1:18]
only_para
################ VERSION 4
"""
The goal is to save the lyrics in a file with utf-8 encoding
in the songs&poetry directory
"""
to_be_saved = str(song_soup.title) + "\n" + str(lyrics.text)
import os
writepath = 'W:\songs&poetry\file1.txt'
mode = 'a' if os.path.exists(writepath) else 'w'
with open(writepath, mode, encoding = "utf-8") as f:
# f.write(lyrics.text) # saves only the main lyrics
f.write(to_be_saved)
############################## VERSION 5
"""
Now we fetch the lyrics of all the songs listed on the index page
"""
import requests
from bs4 import BeautifulSoup
url = "http://lyricstranslate.com/en/laura-naerhi-lyrics.html"
r = requests.get(url)
soup = BeautifulSoup(r.content)
songnames = soup.find_all('td', class_='songName')
import os
writepath = 'W:\\songs&poetry\\finnish\\songs\\laura-narhi\\songs.txt'
for i in range(len(songnames)):
song_url = "http://lyricstranslate.com" + songnames[i].a["href"]
r_song = requests.get(song_url)
song_soup = BeautifulSoup(r_song.content)
lyrics = song_soup.find('div', {"class": "song-node-text"})
#for i in range(len(list(lyrics.children))):
# print(list(lyrics.children)[i],"\n")
to_be_saved = str(song_soup.title) + "\n" + str(lyrics.text)
mode = 'a' if os.path.exists(writepath) else 'w'
with open(writepath, mode, encoding = "utf-8") as f:
f.write(to_be_saved)
# end of For loop
#######################
soup.contents[1].contents[3]
songnames[0].contents
songnames[0].contents[0]
songnames[0].contents[0].text
#
#for university in universities:
# print(university['href']+","+university.string)
for name in songnames:
print(name)
# print(university.string, " <- - -> ", university['href'] )
ls = list(soup.children)
for i in range(3):
print(ls[i])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment