Created
February 24, 2020 15:09
-
-
Save oskar456/dcd4229323a87b6bb17358fcb6ca89d2 to your computer and use it in GitHub Desktop.
Scrap mujkaktus.cz and send news to a Telegram channel
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import lxml.html | |
import urllib.request | |
from pathlib import Path | |
import requests | |
telegram_bot_token = "FIXME" | |
telegram_group_name = "@kvetinac" | |
def send_telegram_message(message): | |
url = "https://api.telegram.org/bot{}/sendMessage".format(telegram_bot_token) | |
payload = { | |
"chat_id":telegram_group_name, | |
"text": message, | |
"parse_mode": "HTML", | |
} | |
r = requests.post(url, data=payload).json() | |
if not r["ok"]: | |
print(r) | |
raise RuntimeError("Telegram error") | |
def get_last_newsitem(): | |
url = "https://www.mujkaktus.cz/homepage" | |
doc = lxml.html.parse(urllib.request.urlopen(url)) | |
news = doc.find('//div[@class="journal-content-article"]/div[@class="article ico ico-news-default"]') | |
news.remove(news[-1]) # Odstraň poslední odstavec | |
return news | |
def save_last_newsitem(news): | |
p = Path(__file__).parents[0] / "lastitem.html" | |
p.write_text(lxml.html.tostring(news, encoding="unicode")) | |
def load_saved_newsitem(): | |
p = Path(__file__).parents[0] / "lastitem.html" | |
return lxml.html.fragment_fromstring(p.read_text()) | |
def render_html(news): | |
return "<b>{}</b>\n{}".format(news[0].text_content(), "".join(n.text_content().strip() for n in news[1:])) | |
def main(): | |
try: | |
news = get_last_newsitem() | |
except ConnectionResetError: | |
return | |
try: | |
saved = load_saved_newsitem() | |
if saved.text_content().strip() == news.text_content().strip(): | |
return | |
except FileNotFoundError: | |
pass | |
send_telegram_message(render_html(news)) | |
save_last_newsitem(news) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment