Skip to content

Instantly share code, notes, and snippets.

@oskar456
Created February 24, 2020 15:09
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save oskar456/dcd4229323a87b6bb17358fcb6ca89d2 to your computer and use it in GitHub Desktop.
Save oskar456/dcd4229323a87b6bb17358fcb6ca89d2 to your computer and use it in GitHub Desktop.
Scrap mujkaktus.cz and send news to a Telegram channel
#!/usr/bin/env python3
import lxml.html
import urllib.request
from pathlib import Path
import requests
telegram_bot_token = "FIXME"
telegram_group_name = "@kvetinac"
def send_telegram_message(message):
url = "https://api.telegram.org/bot{}/sendMessage".format(telegram_bot_token)
payload = {
"chat_id":telegram_group_name,
"text": message,
"parse_mode": "HTML",
}
r = requests.post(url, data=payload).json()
if not r["ok"]:
print(r)
raise RuntimeError("Telegram error")
def get_last_newsitem():
url = "https://www.mujkaktus.cz/homepage"
doc = lxml.html.parse(urllib.request.urlopen(url))
news = doc.find('//div[@class="journal-content-article"]/div[@class="article ico ico-news-default"]')
news.remove(news[-1]) # Odstraň poslední odstavec
return news
def save_last_newsitem(news):
p = Path(__file__).parents[0] / "lastitem.html"
p.write_text(lxml.html.tostring(news, encoding="unicode"))
def load_saved_newsitem():
p = Path(__file__).parents[0] / "lastitem.html"
return lxml.html.fragment_fromstring(p.read_text())
def render_html(news):
return "<b>{}</b>\n{}".format(news[0].text_content(), "".join(n.text_content().strip() for n in news[1:]))
def main():
try:
news = get_last_newsitem()
except ConnectionResetError:
return
try:
saved = load_saved_newsitem()
if saved.text_content().strip() == news.text_content().strip():
return
except FileNotFoundError:
pass
send_telegram_message(render_html(news))
save_last_newsitem(news)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment