Last active
July 28, 2019 06:03
-
-
Save meatyite/7a51ca616db0fc527496812d8b063d43 to your computer and use it in GitHub Desktop.
ynet.co.il Suicide awareness bot
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from ynet import Article, Comment | |
from bs4 import BeautifulSoup as bs | |
from requests import get as GET | |
from datetime import datetime, timedelta | |
from threading import Timer | |
class ArticleScraper: | |
suicide_keywords = [ | |
"התאבדות", | |
"אובדני", | |
"אובדנות", | |
"מתאבד", | |
"התאבד", | |
"התאבדה" | |
"אובדנית", | |
"מתאבדת", | |
"דיכאון", | |
"מדוכא", | |
"רוצה למות" | |
] | |
suicide_message = """ | |
אם אתה או מישהו שאתה מכיר מרגיש אובדני או רוצה למות, יש עם מי לדבר. | |
בבקשה התקשרו לער"ן במספר 1201, | |
זה עדיף מלסבול לבד. | |
| | |
ביפ בופ, אני רובוט. | |
בוט על ידי sl4v | |
""" | |
def __init__(self, article_url): | |
self.articleAPI = Article(article_url) | |
article_soup = bs(GET(article_url).content.decode(), 'html.parser') | |
self.article_soup = article_soup | |
self.article_title = article_soup.find('div', {'class': 'art_header_title'}).string | |
self.article_sub_title = article_soup.find('div', {'class': 'art_header_sub_title'}).string | |
def article_has_keywords(self, keywords): | |
for keyword in keywords: | |
if (keyword in self.article_title or keyword in self.article_sub_title): | |
return True | |
def is_article_about_suicide(self): | |
return self.article_has_keywords(self.suicide_keywords) | |
def send_suicide_comment(self): | |
print("Attempting to send suicide comment...") | |
Comment( | |
article=self.articleAPI, | |
name="Suicide Awareness bot", | |
email="iamsl4v@protonmail.com", | |
title="חשוב", | |
text=self.suicide_message | |
).post() | |
class YnetXMLScraper: | |
def __init__(self, category_rss): | |
self.category_url = category_rss | |
self.category_page_soup = bs(GET(category_rss).content.decode(), 'lxml') | |
def search_for_articles(self): | |
article_tags = self.category_page_soup.find_all('link') | |
if (not(article_tags is None)): | |
for article_tag in article_tags: | |
if (article_tag.string is None): | |
return | |
article_url = self.remove_all_newlines_from_url(article_tag.string) | |
if self.is_article_url_linking_to_article(article_url): | |
yield ArticleScraper(article_url) | |
def is_article_url_linking_to_article(self, url): | |
return url.replace('https://', 'http://'.replace('http://', '')).startswith('www.ynet.co.il/articles/') | |
def remove_all_newlines_from_url(self, url): | |
return url.replace('\r', '\r\n').replace('\r\n', '\n').replace('\n', '') | |
def start_looking_in_category(category_url): | |
scraper = YnetXMLScraper(category_url) | |
for article in scraper.search_for_articles(): | |
if (article.is_article_about_suicide()): | |
article.send_suicide_comment() | |
def start_looking(): | |
categories_urls = [ | |
"http://www.ynet.co.il/Integration/StoryRss3052.xml", | |
"https://www.ynet.co.il/Integration/StoryRss2.xml" | |
] | |
for categories_url in categories_urls: | |
start_looking_in_category(categories_url) | |
if __name__ == "__main__": | |
x = datetime.today() | |
y = x.replace(day=x.day, hour=12, minute=0, second=0, microsecond=0) + timedelta(days=1) | |
delta_t = y - x | |
secs = delta_t.total_seconds() | |
t = Timer(secs, start_looking()) | |
t.start() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment