Skip to content

Instantly share code, notes, and snippets.

@skyleaworlder
Created February 21, 2021 15:59
Show Gist options
  • Save skyleaworlder/722e35f22cd61ee61d3cc1c6e6ebfaae to your computer and use it in GitHub Desktop.
Save skyleaworlder/722e35f22cd61ee61d3cc1c6e6ebfaae to your computer and use it in GitHub Desktop.
jb51(脚本之家) pdf 网页存活情况检查 (非百度云链接检查)
import requests
import urllib
import re
import sys
def getInfo(book_id):
url = "https://www.jb51.net/books/"+str(book_id)+".html"
res = requests.get(url)
res.encoding = "GBK"
pattern = re.compile("<h1 itemprop=\"name\">(.*?)</h1>")
book_name = re.findall(pattern, res.text)
if len(book_name) != 0:
return { "id": book_id, "name": book_name[0] }
return { "id": book_id, "name": "" }
def scan(beg, end):
total = end - beg
res = []
for idx in range(int(beg), int(end)):
info = getInfo(idx)
if info["name"] != "":
res.append(info)
print("[success]("+str(idx-beg)+"/"+str(total)+"): "+info["name"])
else:
print("[failed_]("+str(idx-beg)+"/"+str(total)+"): "+"[x]")
return res
def logoutput(res):
res = [str(elem["id"])+" -> "+elem["name"] for elem in res]
with open("book.log", "w", encoding="utf-8") as f:
for elem in res:
f.write(elem + "\n")
if __name__ == "__main__":
beg = int(sys.argv[1])
end = int(sys.argv[2])
print("Begin to scan:")
res = scan(beg, end)
logoutput(res)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment