"Marc Dubuisson croque l’actu" en RSS
#!/usr/bin/python3 | |
# $Id: dubuisson-7sur7.py 521 2020-03-08 21:03:51Z sarterm $ | |
# Dependencies : bs4, lxml, requests | |
import logging | |
from bs4 import BeautifulSoup | |
import requests | |
url = "https://www.7sur7.be/extra/marc-dubuisson-croque-l-actu~a2e598b6/" | |
log = logging.getLogger(__name__) | |
log.addHandler(logging.StreamHandler()) | |
session = requests.Session() | |
session.headers.update({'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', | |
'Accept-Encoding': 'gzip, deflate', | |
'Connection': 'keep-alive', | |
'Cookie': 'pwv=1; pws=functional|analytics|content_recommendation|targeted_advertising|social_media; pwv=1; pws=functional|analytics|content_recommendation|targeted_advertising|social_media', | |
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64; rv:51.0) Gecko/20100101 Firefox/51.0'}) | |
page = BeautifulSoup(session.get(url).content, 'lxml') | |
posts = page.find_all('div', {'class': 'article__component article__component--picture'}) | |
print(f""" | |
<rss version="2.0"> | |
<channel> | |
<title>Dubuisson / 7 sur 7</title> | |
<link>{url}</link> | |
<description>Marc Dubuisson croque l’actu</description> | |
""") | |
for post in posts: | |
link = post.find('a', {'class': 'slideshow-trigger'}) | |
img = post.find('img') | |
post_url = link['href'] | |
post_title = img['alt'] | |
print(f""" | |
<item> | |
<title>{post_title}</title> | |
<link>{post_url}</link> | |
<author>Marc Dubuisson</author> | |
<description>{img}</description> | |
</item> | |
""") | |
print(""" | |
</channel> | |
</rss> | |
""") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment