Jose Luis Fernández Nuevo JLFDataScience

## Annual_trend.py
#Annual trend
df_plot_trend = df_tve.groupby(df_tve['date'].dt.year)['repeticiones'].agg(['sum'])

plt.plot(df_plot_trend.index, df_plot_trend['sum'])
plt.xticks(rotation='vertical')
plt.ylabel('Número de menciones')
plt.xlim(2014,2019)
plt.xlabel('Años')
plt.title('Tendencia Anual');

## Bar_chart_month.py
#Bar chart on mentions by month
df_hist_month = df_tve.groupby(df_tve['date'].dt.month)['repeticiones'].agg(['sum', 'mean', 'max'])
plt.bar(df_hist_month.index, df_hist_month['sum'])
plt.xlabel('Mes')
plt.ylabel('Nº menciones');

## time_series_news_aging.py
#Import charts libraries
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

#Time series news mentioned
df_plot = df_tve.groupby(df_tve['date'])['repeticiones'].agg(['sum'])
df_plot_sci = df_tve.groupby(df_tve['date'])['repet_ciencia'].agg(['sum'])
#df_plot_trend = df_tve.groupby(df_tve['date'].dt.year)['repeticiones'].agg(['sum'])

## tve_nvejecimiento_pandas.py
#Añadimos una columna de fecha
df['date'] = df['programme_date'].astype(str).str[:10]
df['date'] = pd.to_datetime(df['date'], format='%Y/%m/%d')
#Añadimos a qué telediario se refiere T15/T21
df['Sesion'] = df['programme_date'].astype(str).str[10:13]
df['Sesion'].unique()
#Añadimos el tiempo que dura la noticia
df['duration'] = df['end_time'] - df['start_time']
#Creamos un df específico con los datos de interés
df_tve = df[['date','Sesion','duration','content']]

## Final_function_scrape.py
# Design Scrape Function considering potential yahoo connection errors
# Scrapping data for each ticker in the df
error = 0
for j in range(len(df_tikers)):
    #print ("ticker =", df['ticker'][j],j, " de ", len(df))

    stock = df_tikers['ticker'][j]

    # Requesting information from Yahoo and insisting if does not respond - stopping the program if Yahoo doesn´t reply
    # for two tickers in sequence!

## test_extract_all_tickers.py
#We tested how each of the statistical fields would be extracted for a company, in this case Google (GOOG)
for field in list_of_fields:
    ScrapedValue = sourceCode.split('>' + field)[1].split('</td></tr>')[0].split('>')[-1]
    print ("field=",field, " / scraped valued= ", ScrapedValue)

## Extract_data_fields.py
#We extract the statistical data
#We do another test before the full function
stock = 'GOOG'
sourceCode = str(urlopen('https://finance.yahoo.com/quote/'+stock+'/key-statistics?p='+stock).read())
#print(sourceCode)

#We extract the name of the company
compname= sourceCode.split('Find out all the key statistics for')[1].split(', including')[0]
#print(compname)
#Alphabet Inc. (GOOG)

## New_field_df.py
for i in range(len(list_of_fields)):
    df_tikers[list_of_fields[i]] = ''

#We added two more fields to identify each company
df_tikers['ScrapedName'] = ''
df_tikers['Sector'] = ''

df_tikers.head()

## Enter_tickers_df.py
# We created a new dataframe to store the extracted tikers
index = range(len(ScrapedAux))
df_tikers = pd.DataFrame(index=index, columns = ['ticker'])

#We add each ticker to the dataframe
for i in range(len(ScrapedAux)):
    #print('Name:', ScrapedAux[i].replace('"','').replace('[',''))
    tiker = ScrapedAux[i].replace('"','').replace('[','')
    df_tikers['ticker'][i] = tiker


## test_extract_tickers.py
for i in range(len(ScrapedAux)):
    tiker = ScrapedAux[i].replace('"','').replace('[','') #We cleaned the list to get the tickers out
    print('Name:', tiker, 'type:', type(tiker))
	#Annual trend
	df_plot_trend = df_tve.groupby(df_tve['date'].dt.year)['repeticiones'].agg(['sum'])

	plt.plot(df_plot_trend.index, df_plot_trend['sum'])
	plt.xticks(rotation='vertical')
	plt.ylabel('Número de menciones')
	plt.xlim(2014,2019)
	plt.xlabel('Años')
	plt.title('Tendencia Anual');
	#Bar chart on mentions by month
	df_hist_month = df_tve.groupby(df_tve['date'].dt.month)['repeticiones'].agg(['sum', 'mean', 'max'])
	plt.bar(df_hist_month.index, df_hist_month['sum'])
	plt.xlabel('Mes')
	plt.ylabel('Nº menciones');
	#Import charts libraries
	import matplotlib.pyplot as plt
	import seaborn as sns
	%matplotlib inline

	#Time series news mentioned
	df_plot = df_tve.groupby(df_tve['date'])['repeticiones'].agg(['sum'])
	df_plot_sci = df_tve.groupby(df_tve['date'])['repet_ciencia'].agg(['sum'])
	#df_plot_trend = df_tve.groupby(df_tve['date'].dt.year)['repeticiones'].agg(['sum'])
	#Añadimos una columna de fecha
	df['date'] = df['programme_date'].astype(str).str[:10]
	df['date'] = pd.to_datetime(df['date'], format='%Y/%m/%d')
	#Añadimos a qué telediario se refiere T15/T21
	df['Sesion'] = df['programme_date'].astype(str).str[10:13]
	df['Sesion'].unique()
	#Añadimos el tiempo que dura la noticia
	df['duration'] = df['end_time'] - df['start_time']
	#Creamos un df específico con los datos de interés
	df_tve = df[['date','Sesion','duration','content']]
	# Design Scrape Function considering potential yahoo connection errors
	# Scrapping data for each ticker in the df
	error = 0
	for j in range(len(df_tikers)):
	#print ("ticker =", df['ticker'][j],j, " de ", len(df))

	stock = df_tikers['ticker'][j]

	# Requesting information from Yahoo and insisting if does not respond - stopping the program if Yahoo doesn´t reply
	# for two tickers in sequence!
	#We tested how each of the statistical fields would be extracted for a company, in this case Google (GOOG)
	for field in list_of_fields:
	ScrapedValue = sourceCode.split('>' + field)[1].split('</td></tr>')[0].split('>')[-1]
	print ("field=",field, " / scraped valued= ", ScrapedValue)
	#We extract the statistical data
	#We do another test before the full function
	stock = 'GOOG'
	sourceCode = str(urlopen('https://finance.yahoo.com/quote/'+stock+'/key-statistics?p='+stock).read())
	#print(sourceCode)

	#We extract the name of the company
	compname= sourceCode.split('Find out all the key statistics for')[1].split(', including')[0]
	#print(compname)
	#Alphabet Inc. (GOOG)
	for i in range(len(list_of_fields)):
	df_tikers[list_of_fields[i]] = ''

	#We added two more fields to identify each company
	df_tikers['ScrapedName'] = ''
	df_tikers['Sector'] = ''

	df_tikers.head()
	# We created a new dataframe to store the extracted tikers
	index = range(len(ScrapedAux))
	df_tikers = pd.DataFrame(index=index, columns = ['ticker'])

	#We add each ticker to the dataframe
	for i in range(len(ScrapedAux)):
	#print('Name:', ScrapedAux[i].replace('"','').replace('[',''))
	tiker = ScrapedAux[i].replace('"','').replace('[','')
	df_tikers['ticker'][i] = tiker
	for i in range(len(ScrapedAux)):
	tiker = ScrapedAux[i].replace('"','').replace('[','') #We cleaned the list to get the tickers out
	print('Name:', tiker, 'type:', type(tiker))