This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Annual trend | |
df_plot_trend = df_tve.groupby(df_tve['date'].dt.year)['repeticiones'].agg(['sum']) | |
plt.plot(df_plot_trend.index, df_plot_trend['sum']) | |
plt.xticks(rotation='vertical') | |
plt.ylabel('Número de menciones') | |
plt.xlim(2014,2019) | |
plt.xlabel('Años') | |
plt.title('Tendencia Anual'); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Bar chart on mentions by month | |
df_hist_month = df_tve.groupby(df_tve['date'].dt.month)['repeticiones'].agg(['sum', 'mean', 'max']) | |
plt.bar(df_hist_month.index, df_hist_month['sum']) | |
plt.xlabel('Mes') | |
plt.ylabel('Nº menciones'); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Import charts libraries | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
%matplotlib inline | |
#Time series news mentioned | |
df_plot = df_tve.groupby(df_tve['date'])['repeticiones'].agg(['sum']) | |
df_plot_sci = df_tve.groupby(df_tve['date'])['repet_ciencia'].agg(['sum']) | |
#df_plot_trend = df_tve.groupby(df_tve['date'].dt.year)['repeticiones'].agg(['sum']) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Añadimos una columna de fecha | |
df['date'] = df['programme_date'].astype(str).str[:10] | |
df['date'] = pd.to_datetime(df['date'], format='%Y/%m/%d') | |
#Añadimos a qué telediario se refiere T15/T21 | |
df['Sesion'] = df['programme_date'].astype(str).str[10:13] | |
df['Sesion'].unique() | |
#Añadimos el tiempo que dura la noticia | |
df['duration'] = df['end_time'] - df['start_time'] | |
#Creamos un df específico con los datos de interés | |
df_tve = df[['date','Sesion','duration','content']] |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Design Scrape Function considering potential yahoo connection errors | |
# Scrapping data for each ticker in the df | |
error = 0 | |
for j in range(len(df_tikers)): | |
#print ("ticker =", df['ticker'][j],j, " de ", len(df)) | |
stock = df_tikers['ticker'][j] | |
# Requesting information from Yahoo and insisting if does not respond - stopping the program if Yahoo doesn´t reply | |
# for two tickers in sequence! |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#We tested how each of the statistical fields would be extracted for a company, in this case Google (GOOG) | |
for field in list_of_fields: | |
ScrapedValue = sourceCode.split('>' + field)[1].split('</td></tr>')[0].split('>')[-1] | |
print ("field=",field, " / scraped valued= ", ScrapedValue) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#We extract the statistical data | |
#We do another test before the full function | |
stock = 'GOOG' | |
sourceCode = str(urlopen('https://finance.yahoo.com/quote/'+stock+'/key-statistics?p='+stock).read()) | |
#print(sourceCode) | |
#We extract the name of the company | |
compname= sourceCode.split('Find out all the key statistics for')[1].split(', including')[0] | |
#print(compname) | |
#Alphabet Inc. (GOOG) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for i in range(len(list_of_fields)): | |
df_tikers[list_of_fields[i]] = '' | |
#We added two more fields to identify each company | |
df_tikers['ScrapedName'] = '' | |
df_tikers['Sector'] = '' | |
df_tikers.head() |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# We created a new dataframe to store the extracted tikers | |
index = range(len(ScrapedAux)) | |
df_tikers = pd.DataFrame(index=index, columns = ['ticker']) | |
#We add each ticker to the dataframe | |
for i in range(len(ScrapedAux)): | |
#print('Name:', ScrapedAux[i].replace('"','').replace('[','')) | |
tiker = ScrapedAux[i].replace('"','').replace('[','') | |
df_tikers['ticker'][i] = tiker | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
for i in range(len(ScrapedAux)): | |
tiker = ScrapedAux[i].replace('"','').replace('[','') #We cleaned the list to get the tickers out | |
print('Name:', tiker, 'type:', type(tiker)) |