Skip to content

Instantly share code, notes, and snippets.

@obafgkm44
Last active January 18, 2025 16:19
Show Gist options
  • Save obafgkm44/3b1652147a833d253997aec24c716b9f to your computer and use it in GitHub Desktop.
Save obafgkm44/3b1652147a833d253997aec24c716b9f to your computer and use it in GitHub Desktop.
import requests
from bs4 import BeautifulSoup
import notification as nt
# スクレイピング対象のURL
url = "XXXX/CCC"
# Press先のURL
url2 = "XXXX"
# HTTPヘッダー(User-Agentを設定)
HEADERS = {
"User-Agent": "Safari/537.36"
}
"""id='tab_news'の中からテキストとPDFリンクを取得"""
response = requests.get(url, headers=HEADERS)
response.raise_for_status() # ステータスコードの確認
soup = BeautifulSoup(response.text, 'html.parser')
# id="tab_news"セクションを取得
tab_news = soup.find(id="tab_news")
results = []
for link in tab_news.find_all("a", href=True):
text = link.get_text(strip=True)
href = link["href"]
# PDFリンクの場合、完全なURLに変換
if not href.startswith("http"):
href = url2.rstrip("/") + "/" + href.lstrip("/")
results.append({"text": text, "url": href})
# 結果を表示
for item in results:
nt.schedule(message=f"ニュース: {item['text']}",action_url=item['url'],title="aaaaaのニュース")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment