Skip to content

Instantly share code, notes, and snippets.

@dnkls
Last active October 5, 2021 12:43
Show Gist options
  • Save dnkls/5c40b5d7bbf05d35ce650d8236f10f83 to your computer and use it in GitHub Desktop.
Save dnkls/5c40b5d7bbf05d35ce650d8236f10f83 to your computer and use it in GitHub Desktop.
articles = []
for feed in feeds:
response = requests.get(feed, headers=headers)
webpage = response.content
soup = BeautifulSoup(webpage, features="xml")
# every article link will be found in an item tag
items = soup.find_all("item", text = re.findall(keywords))
for kewords in items.find_all(words)
word = items.find_all(text: keywords )
# extract the link
for item in word:
link = item.find("link").text
articles.append(link)
news_dict = {}
for url in articles: # Вот тут из списка articles сверху мы достаем линки/url и они спускаются вниз для дальнейшей
# обработки и добавления в словарь со значениями title,url, description итд не суть...
# каким образом в этом месте сделать цикл так чтобы он доставал линки по одному в течении опред
# времени в стуки пару раз. 🤷🏻‍♂️
info = Article(url)
info.download()
info.parse()
info.nlp()
article_url = info.url
article_title = info.title
keywords = info.keywords
text = info.text
article_sum = summarize(text, ratio=0.2)
article_id = info.link_hash
tikers = ['tsla','ko','apple','abbv','pltr']
for word in tikers:
if word in keywords or text:
news_dict[article_id] = {
"article_title": article_title,
"article_url": article_url,
"article_summary": article_sum,
"article_keyword" : keywords
}
else:
continue
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment