Skip to content

Instantly share code, notes, and snippets.

@mathigatti
Last active September 2, 2021 17:44
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mathigatti/fec92ff0fffefd0a2951ec43f4302f43 to your computer and use it in GitHub Desktop.
Save mathigatti/fec92ff0fffefd0a2951ec43f4302f43 to your computer and use it in GitHub Desktop.
letras.com scraper
from bs4 import BeautifulSoup
import requests
import os
letras_url = "https://www.letras.com"
def descargar_cancion(url_path, destination_path):
url = f"{letras_url}{url_path}"
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
letra = ""
for div in soup.findAll("div", {"class": "cnt-letra p402_premium"}):
for p in div.findAll("p"):
text = str(p)
for space in ["</br>","<br>","<br/>","<p>","</p>"]:
text = text.replace(space,"\n")
letra += text
with open(destination_path,'w') as f:
f.write(letra)
def descargar_letras(artista):
url = f"{letras_url}/{artista}/mais_tocadas.html"
page = requests.get(url)
if not os.path.exists(artista):
os.mkdir(artista)
soup = BeautifulSoup(page.content, 'html.parser')
i = 0
for a in soup.findAll("a", {"class": "song-name"}):
descargar_cancion(a["href"], f"{artista}/{i:03d}.txt")
i += 1
# Usage example
# python3 scrape_letras.py duki
# It should download all songs of "duki" on the folder ./letras/duki
if __name__ == "__main__":
import sys
artista = sys.argv[1]
descargar_letras(artista)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment