Skip to content

Instantly share code, notes, and snippets.

@aaizemberg
Last active October 25, 2018 20:13
Show Gist options
  • Save aaizemberg/616552f12dce4c91e22fb6417af4e741 to your computer and use it in GitHub Desktop.
Save aaizemberg/616552f12dce4c91e22fb6417af4e741 to your computer and use it in GitHub Desktop.
Las noticias del día 25/10/2018 (GoogleNews & Tableau)
# -*- coding: utf-8 -*-
import feedparser
import pandas as pd
lista = [{"letra":"n","categoria":"Nacional"},
{"letra":"s","categoria":"Deportes"},
{"letra":"m","categoria":"Salud"},
{"letra":"t","categoria":"Ciencia y Tecnología"},
{"letra":"e","categoria":"Entretenimiento"},
{"letra":"b","categoria":"Economía"}]
data = []
for l in lista:
url = 'https://news.google.com/news?pz=1&cf=all&ned=es_ar&hl=es&topic=' + l['letra'] + '&output=rss'
d = feedparser.parse( url )
for e in d.entries:
if(e!=d.entries[0]):
titmed2 = e.title
titmed = titmed2.split(' - ')
titulo = titmed[0]
medio = titmed[len(titmed)-1]
fecha = e.published
url2 = e.link # esta URL tiene primero la parte de news.google y despues la del medio
url = url2.split('&url=')[1]
categoria = l['categoria']
tabla = e.summary
tbl = tabla.replace("</b>", "<b>").split('<b>')
cantidad = "1"
for r in tbl:
if (u"artículos informativos&nbsp;&raquo;" in r):
cantidad = r.split(' ')[1].replace(".","")
data.append([titulo, medio, fecha, url, categoria, int(cantidad)])
df = pd.DataFrame(data, columns=['Title', 'Source', 'Date', 'url', 'Category', 'Size'])
df = df.sort_values(by='Size', ascending=False)
df.to_csv('googlenews_ar.tsv', sep='\t', header=True, index=False, encoding='utf-8')
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8">
<meta name="viewport" content="width=device-width">
<title>GoogleNews - 25/10/2018</title>
</head>
<body>
<div class='tableauPlaceholder' id='viz1540498161350' style='position: relative'><noscript><a href='#'><img alt=' ' src='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;Go&#47;GoogleNews&#47;GoogleNews&#47;1_rss.png' style='border: none' /></a></noscript><object class='tableauViz' style='display:none;'><param name='host_url' value='https%3A%2F%2Fpublic.tableau.com%2F' /> <param name='embed_code_version' value='3' /> <param name='site_root' value='' /><param name='name' value='GoogleNews&#47;GoogleNews' /><param name='tabs' value='no' /><param name='toolbar' value='yes' /><param name='static_image' value='https:&#47;&#47;public.tableau.com&#47;static&#47;images&#47;Go&#47;GoogleNews&#47;GoogleNews&#47;1.png' /> <param name='animate_transition' value='yes' /><param name='display_static_image' value='yes' /><param name='display_spinner' value='yes' /><param name='display_overlay' value='yes' /><param name='display_count' value='yes' /></object></div> <script type='text/javascript'> var divElement = document.getElementById('viz1540498161350'); var vizElement = divElement.getElementsByTagName('object')[0]; vizElement.style.width='100%';vizElement.style.height=(divElement.offsetWidth*0.75)+'px'; var scriptElement = document.createElement('script'); scriptElement.src = 'https://public.tableau.com/javascripts/api/viz_v1.js'; vizElement.parentNode.insertBefore(scriptElement, vizElement); </script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment