Skip to content

Instantly share code, notes, and snippets.

@zhullyb
Created June 10, 2024 12:58
Show Gist options
  • Save zhullyb/4b608ce88da8ef62b3e8378d4556f1f3 to your computer and use it in GitHub Desktop.
Save zhullyb/4b608ce88da8ef62b3e8378d4556f1f3 to your computer and use it in GitHub Desktop.
针对 fwwb.org.cn 的新公告爬虫
#!/usr/bin/env python
import os
import requests
import random
from bs4 import BeautifulSoup as bs
import time
import pytz
import datetime
url = 'http://www.fwwb.org.cn/public/index'
tz = pytz.timezone('Asia/Shanghai')
def get_time():
return datetime.datetime.now(tz).strftime('%Y-%m-%d %H:%M:%S')
def get_ua():
version = random.randint(100,127)
linux_ua = f"Mozilla/5.0 (X11; Linux x86_64; rv:{version}.0) Gecko/20100101 Firefox/{version}.0"
windows_ua = f"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:{version}.0) Gecko/20100101 Firefox/{version}.0"
return random.choice([linux_ua, windows_ua])
def get_html():
ua = get_ua()
headers = {'User-Agent': ua}
r = requests.get(url, headers=headers)
return r.text
def get_title(t):
soup = bs(t, 'html.parser')
return [i.get_text() for i in soup.find_all('h3')]
def read_from_file():
if not os.path.exists('fwwb.txt'):
return []
with open('fwwb.txt', 'r') as f:
return [i.strip() for i in f.readlines()]
def save_to_file(t):
with open('fwwb.txt', 'w') as f:
for i in t:
f.write(i + '\n')
def notify_me(text):
pass
def main():
titles = get_title(get_html())
old_titles = read_from_file()
if set(old_titles) != set(titles):
notify_me(titles[0])
save_to_file(titles)
print("=" * 50)
print(get_time())
print("New title found: " + titles[0])
print("=" * 50)
else:
print(f"{get_time()}: No new title found.")
if __name__ == '__main__':
while True:
main()
time.sleep(600)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment