Last active
January 9, 2023 18:58
-
-
Save sergiolucero/57b30f520ef18594430bd9dd3835db78 to your computer and use it in GitHub Desktop.
Cancionero de La Cuerda
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests, sys | |
from bs4 import BeautifulSoup | |
from docx import Document | |
ubs = lambda url:BeautifulSoup(requests.get(url).text,'html5lib') | |
def get_chords(artist = 'Manu Chao'): | |
fartist = '_'.join(s.lower() for s in artist.split()) # use map | |
url = f'https://acordes.lacuerda.net/{fartist}/' | |
songs = ubs(url).find_all('ul',attrs={'class':'b_main'})[0].find_all('a')#.text | |
songs = [(s.text,url+s['href']+'.shtml') for s in songs] | |
print('FOUND:', len(songs), 'songs') | |
fetch = {' '.join(s[0].split()[:-1]): ubs(s[1]) for s in songs} | |
fetched = {k: v.find_all('div',attrs={'id':'t_body'})[0].text | |
for k,v in fetch.items()} | |
document = Document() | |
document.add_heading(artist, 0) # format titles | |
for songname, song_chords in fetched.items(): | |
document.add_heading(songname, level=1) | |
document.add_paragraph(song_chords) # style='Intense Quote' | |
document.save(f'{fartist}.docx') | |
if __name__ == '__main__': | |
artist = ' '.join(sys.argv[1:]) # Fabulosos Cadillacs | |
print('ARTIST:', artist) | |
get_chords(artist) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
to be implemented as quant.cl/cancionero/
idealmente si salen pocos resultados el código debiera buscar algo parecido (ie: manuchao, manu_chao...)