Skip to content

Instantly share code, notes, and snippets.

View brenoimatos's full-sized avatar

Breno Matos brenoimatos

View GitHub Profile
# função adjust_text importada de https://github.com/Phlya/adjustText/blob/master/adjustText/__init__.py
x = df['Unique']
y = df['Relative']
annot = df.index
fig, ax = plt.subplots(figsize=(12, 15))
ax.scatter(x, y)
texts = []
# Para pegar o path do seu diretório de trabalho
path = os.getcwd()
# Definindo o nome da pasta que vai ser salvo os png
wordcloud_folder = 'wordcloud'
for artist in df.index:
wordcloud = WordCloud(background_color="black",
max_words = 500,
width=1600, height=800).generate(df.loc[artist]['wordcloud'])
df['Unique'] = df['tokens'].apply(lambda x: len(set(x)))
df['Total'] = df['tokens'].apply(lambda x: len(x))
df['Relative'] = df['tokens'].apply(lambda x: len(set(x))/len(x))
nltk.download('stopwords')
stopwords_nltk = nltk.corpus.stopwords.words('portuguese')
update = ["tá",'pra','tô', 'cê','pro', 'então', "meu", "em",
"você", "de", "ao", "os",'vou', 'vai', 'vem', 'mim',
'uns', 'sei', 'quero', 'ser', 'ver', 'aqui','faz']
# Concatenando as duas listas
stopwords_raw = [*stopwords_nltk, *update]
df["tokens"] = df.lyrics_clean.str.split()
df.set_index('artist', inplace = True)
# Checando se temos algum NaN
print(df_raw[df_raw['lyrics'].isna() == True])
# Excluindo os NaN
df_valid = df_raw.dropna()
print(df_valid.isna().sum())
# Criando a função para limpar o dataframe
def cleaning_text(text):
import pandas as pd
import nltk
import matplotlib.pyplot as plt
from wordcloud import WordCloud, ImageColorGenerator
import unidecode
import re
import string
import matplotlib.ticker as mtick
import os
import scrapy
class LyricsSpider(scrapy.Spider):
name = 'rap'
urls = []
artists = '3030 1kilo adl-mcs afro-x azzy baco-exu-do-blues bin bk black-alien bnegao c4bal cacife-clandestino cartel-mcs choice chris cone-crew-diretoria coruja-bc1 costa-gold criolo cynthia-luz dalsin delacruz de-leve dfideliz diomedes-chinaski djonga don-l drik-barbosa dudu-mc emicida fabio-brazza faccao-central felp22 flora-matos froid gaab gabriel-pensador gloria-groove haikaiss hungria-hip-hop jaya-luuck je-santiago kamau karol-conka kayua kiaz l7nnon luccas-carlos makalister-renton mano-brown marcelo-d2 matue mc-hariel-sp mc-marechal mc-orochi mr-thug mv-bill nabrisa-tonett negra-li nill nill ogi oriente pele-milflows projota quinto-andar racionais-mcs rael rappin-hood rashid ret rincon-sapiencia sabotage sant shawlin sidoka slim-rimografia speed-freaks tulio-dek ucl xama'.split()
for artist in artists:
urls.append(f'https://www.letras.mus.br/{artist}/')