Skip to content

Instantly share code, notes, and snippets.

@aritraroy24
Last active June 4, 2021 18:04
Show Gist options
  • Save aritraroy24/6d8263abe9297cc87d7bacafb3465a13 to your computer and use it in GitHub Desktop.
Save aritraroy24/6d8263abe9297cc87d7bacafb3465a13 to your computer and use it in GitHub Desktop.
The main python script for running the bot
# importing modules
import feedparser
from bs4 import BeautifulSoup
import tweepy
import os
import logging
import datetime
# url for collecting news
url = "http://news.google.com/news?q=computational+chemistry&hl=en-US&sort=date&gl=US&num=100&output=rss"
# logging config
logging.basicConfig(level=logging.INFO, filename='data.txt',)
logger = logging.getLogger()
# Keys
CONSUMER_KEY = os.environ['Consumer_Key']
CONSUMER_SECRET_KEY = os.environ['Consumer_Secret_Key']
ACCESS_TOKEN = os.environ['Access_Token']
ACCESS_TOKEN_SECRET = os.environ['Access_Token_Secret']
# Authentication
auth = tweepy.OAuthHandler(CONSUMER_KEY, CONSUMER_SECRET_KEY)
auth.set_access_token(ACCESS_TOKEN, ACCESS_TOKEN_SECRET)
api = tweepy.API(auth)
class ParseFeed():
def __init__(self, url):
self.feed_url = url
def clean(self, html):
'''
Getting the text from html and doing some cleaning
'''
soup = BeautifulSoup(html)
text = soup.get_text()
text = text.replace('\xa0', ' ')
return text
def follow_back(self, api):
'''
Following back to those users who are retweeting
'''
# getting the retweeters of latest tweet
latest_tweet_id = (api.user_timeline(count=1)[0]).id
retweets_list = api.retweets(latest_tweet_id)
user_name_list = [
retweet.user.id for retweet in retweets_list]
# getting the friendlist
friend_list = [friend for friend in api.friends(
user_id="CompChemNewsBot")]
friend_final_list = [friend.id for friend in friend_list]
# removing the already followed users
final_list = [
elem for elem in user_name_list if elem not in friend_final_list]
# in case no new user to follow
if final_list == []:
logger.info(
f"No new user found at: {str(datetime.datetime.now())}\n\t\t\t\t\t\t******\t\t\t\t\t\t")
# new user found and following the user
else:
for user in final_list:
try:
api.create_friendship(user_id=user)
logger.info(
f"{user} UserID followed back at : {str(datetime.datetime.now())}\n\t\t\t\t\t\t******\t\t\t\t\t\t")
except Exception as e:
logger.info(
f"Can't be followed back at : {str(datetime.datetime.now())} due to {e} error\n\t\t\t\t\t\t******\t\t\t\t\t\t")
def tweet(self, text_list, url_list):
'''
Posting the news in the twitter and logging the data (First news of the list will be posted: One can modify by using random function)
'''
# removing news already tweeted
file_name = "tweets.txt"
with open(file_name, "r") as file:
tweet_list = file.readlines()
tweet_list = [x.strip() for x in tweet_list]
for tweet in tweet_list:
for text, url in zip(text_list, url_list):
if text == tweet:
text_list.remove(text)
url_list.remove(url)
# tweeting the fast news from the modified newslist
for text in text_list:
i = 0
try:
with open(file_name, "a") as file:
file.write(f"{text}\n\n")
api.update_status(
f"@aritraroy24 #compchem #news #update #science #chemistry #quantum\nπ™π™Šπ˜Ώπ˜Όπ™”'π™Ž π™π™‹π˜Ώπ˜Όπ™π™€: {text_list[i]}\n{url_list[i]}")
logger.info(
f"Tweet done at : {str(datetime.datetime.now())} === {text_list[i]} === {url_list[i]}\n\n\n")
i += 1
break
except Exception as e:
logger.info(
f"Tweet can't be done at : {str(datetime.datetime.now())} due to {e} error\n\n\n")
def parse(self):
'''
Parsing URL, and collecting descriptions and URLs of the news of which the character length of the tweet is in between 100 and 200
'''
text_list = []
url_list = []
feeds = feedparser.parse(self.feed_url).entries
for f in feeds:
text = self.clean(f.get("description"))
url = f.get("link")
count = sum((text[i] != ' ') for i in range(len(text)))
if count < 150:
text_list.append(text)
url_list.append(url)
self.follow_back(api)
self.tweet(text_list, url_list)
feed = ParseFeed(url)
feed.parse()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment