Skip to content

Instantly share code, notes, and snippets.

@aritraroy24
Last active June 4, 2021 14:28
Show Gist options
  • Save aritraroy24/cd12cca07b581330ec59dacea91b3600 to your computer and use it in GitHub Desktop.
Save aritraroy24/cd12cca07b581330ec59dacea91b3600 to your computer and use it in GitHub Desktop.
parsing RSS feed to get text and url
class ParseFeed():
def __init__(self, url):
self.feed_url = url
def clean(self, html):
'''
Getting the text from html and doing some cleaning
'''
soup = BeautifulSoup(html)
text = soup.get_text()
text = text.replace('\xa0', ' ')
return text
def parse(self):
'''
Parsing URL, and collecting descriptions and URLs of the news of which the character length of the tweet is in between 100 and 200
'''
text_list = []
url_list = []
feeds = feedparser.parse(self.feed_url).entries
for f in feeds:
text = self.clean(f.get("description"))
url = f.get("link")
count = sum((text[i] != ' ') for i in range(len(text)))
if count < 150:
text_list.append(text)
url_list.append(url)
self.follow_back(api)
self.tweet(text_list, url_lis
feed = ParseFeed(url)
feed.parse()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment