Last active
January 18, 2025 16:19
-
-
Save obafgkm44/3b1652147a833d253997aec24c716b9f to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from bs4 import BeautifulSoup | |
import notification as nt | |
# スクレイピング対象のURL | |
url = "XXXX/CCC" | |
# Press先のURL | |
url2 = "XXXX" | |
# HTTPヘッダー(User-Agentを設定) | |
HEADERS = { | |
"User-Agent": "Safari/537.36" | |
} | |
"""id='tab_news'の中からテキストとPDFリンクを取得""" | |
response = requests.get(url, headers=HEADERS) | |
response.raise_for_status() # ステータスコードの確認 | |
soup = BeautifulSoup(response.text, 'html.parser') | |
# id="tab_news"セクションを取得 | |
tab_news = soup.find(id="tab_news") | |
results = [] | |
for link in tab_news.find_all("a", href=True): | |
text = link.get_text(strip=True) | |
href = link["href"] | |
# PDFリンクの場合、完全なURLに変換 | |
if not href.startswith("http"): | |
href = url2.rstrip("/") + "/" + href.lstrip("/") | |
results.append({"text": text, "url": href}) | |
# 結果を表示 | |
for item in results: | |
nt.schedule(message=f"ニュース: {item['text']}",action_url=item['url'],title="aaaaaのニュース") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment