Skip to content

Instantly share code, notes, and snippets.

Last active February 28, 2022 04:44
What would you like to do?
#!/usr/bin/env python3
@create:2022-02-12 22:49
import time
import argparse
from urllib import request
from urllib.parse import urljoin
import xml.etree.ElementTree as ET
def chunker(seq, size):
""" iterate list by chunk """
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
class BaiduSubmitter:
def __init__(self, site: str, token: str, sitemap: str):
self.submit_url = self.gen_submit_url(site, token)
self.sitemap_url = self.gen_sitemap_url(site, sitemap)
def gen_submit_url(site: str, token: str) -> str:
""" generate url to submit to """
return f'{site}&token={token}'
def gen_sitemap_url(site: str, sitemap: str) -> str:
""" generate url path to get sitemap """
return urljoin(site, sitemap)
def get_links_from_sitemap(sitemap_url) -> list:
""" download sitemap, parse and get urls """
with request.urlopen(sitemap_url) as resp:
data =
root = ET.fromstring(data)
return [_.text for
_ in root.findall('./{}url/{}loc')]
def submit(submit_url: str, links: list):
""" submit to baidu """
data = '\n'.join(links).encode('utf8')
req = request.Request(submit_url, data=data)
return request.urlopen(req).read().decode()
def run(self, chunk_size=20, sleep_time=0.1):
""" submit process """
links = self.get_links_from_sitemap(self.sitemap_url)
print(f'Get {len(links)} links from sitemap: [{self.sitemap_url}]')
for chunk in chunker(links, chunk_size):
resp = self.submit(self.submit_url, chunk)
if sleep_time:
def get_args():
""" get cli args """
parser = argparse.ArgumentParser(description='Submit sitemap to Baidu')
parser.add_argument('--site', '-s', type=str, dest='site', required=True,
help='your site, eg:')
parser.add_argument('--token', '-t', type=str, dest='token', required=True,
help='baidu ziyuan token, you may find your token in')
parser.add_argument('--sitemap', '-p', type=str, dest='sitemap', default='sitemap.xml',
help='url path to get sitemap.xml file, default: sitemap.xml')
parser.add_argument('--chunk', '-c', type=int, dest='chunk_size', default=100,
help='how many urls should be submitted each time')
args = parser.parse_args()
return args
def main():
main process
args = get_args()
site =
token = args.token
sitemap_path = args.sitemap
chunk_size = args.chunk_size
submitter = BaiduSubmitter(site, token, sitemap_path)
if __name__ == '__main__':
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment