Skip to content

Instantly share code, notes, and snippets.

@lxfly2000
Created September 12, 2023 05:31
Show Gist options
  • Save lxfly2000/41465568c06301bedf48482fdefbc727 to your computer and use it in GitHub Desktop.
Save lxfly2000/41465568c06301bedf48482fdefbc727 to your computer and use it in GitHub Desktop.
下载这个网页中的地图,注意先把渲染好的网页保存到本地再使用(https://zhy55415.tistory.com/15973215?category=568791)
# python 3.11
import os
import sys
import re
from urllib import request
# spider <savedir> [html files]
if len(sys.argv)<2:
print("Usage: spider <savedir> htmlfile1 [htmlfile2...]")
exit(1)
if not os.path.exists(sys.argv[1]):
os.mkdir(sys.argv[1])
save_index=0
def downloadFromURL(src_url,save_name):
print("下载\"%s\"到\"%s\"…"%(src_url,save_name))
url_file=request.urlopen(src_url)
save_file=open(save_name,"wb").write(url_file.read())
def spider(srcpath):
global save_index
f=open(srcpath,encoding="utf-8")
fc="\n".join(f.readlines())
m=re.findall("https://.*\\?original",fc)
for urltext in m:
downloadFromURL(urltext,f"{sys.argv[1]}/{save_index}.jpg")
save_index=save_index+1
for argi in range(2,len(sys.argv)):
spider(sys.argv[argi])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment