Scrap mujkaktus.cz and send news to a Telegram channel
#!/usr/bin/env python3 | |
import lxml.html | |
import urllib.request | |
from pathlib import Path | |
import requests | |
telegram_bot_token = "FIXME" | |
telegram_group_name = "@kvetinac" | |
def send_telegram_message(message): | |
url = "https://api.telegram.org/bot{}/sendMessage".format(telegram_bot_token) | |
payload = { | |
"chat_id":telegram_group_name, | |
"text": message, | |
"parse_mode": "HTML", | |
} | |
r = requests.post(url, data=payload).json() | |
if not r["ok"]: | |
print(r) | |
raise RuntimeError("Telegram error") | |
def get_last_newsitem(): | |
url = "https://www.mujkaktus.cz/homepage" | |
doc = lxml.html.parse(urllib.request.urlopen(url)) | |
news = doc.find('//div[@class="journal-content-article"]/div[@class="article ico ico-news-default"]') | |
news.remove(news[-1]) # Odstraň poslední odstavec | |
return news | |
def save_last_newsitem(news): | |
p = Path(__file__).parents[0] / "lastitem.html" | |
p.write_text(lxml.html.tostring(news, encoding="unicode")) | |
def load_saved_newsitem(): | |
p = Path(__file__).parents[0] / "lastitem.html" | |
return lxml.html.fragment_fromstring(p.read_text()) | |
def render_html(news): | |
return "<b>{}</b>\n{}".format(news[0].text_content(), "".join(n.text_content().strip() for n in news[1:])) | |
def main(): | |
try: | |
news = get_last_newsitem() | |
except ConnectionResetError: | |
return | |
try: | |
saved = load_saved_newsitem() | |
if saved.text_content().strip() == news.text_content().strip(): | |
return | |
except FileNotFoundError: | |
pass | |
send_telegram_message(render_html(news)) | |
save_last_newsitem(news) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment