drelatgithub/docin-dl.py

## docin-dl.py
###############################################################################
#
# Docin document downloader
#
# Valid as of 2022-12-13
#
###############################################################################
import argparse
import os
from types import SimpleNamespace
import urllib.request

conf = SimpleNamespace(
    docin_pid = 0,
    output_dir = ""
)

def download_image(pid):
    i = 0
    while True:
        i += 1
        try:
            urllib.request.urlretrieve(
                "http://211.147.220.164/index.jsp?file={}&pageno={}".format(pid, i),
                os.path.join(conf.output_dir, "{}.png".format(i))
            )
        except urllib.error.HTTPError:
            break
        else:
            print("Page", i, "saved.")

if __name__ == "__main__":
    # Parse the arguments
    parser = argparse.ArgumentParser()

    parser.add_argument("docin_pid", type=str, help="The number after \"p-\" in docin url")
    parser.add_argument("output_dir", type=str, help="The output directory")

    args = parser.parse_args()

    conf.docin_pid = args.docin_pid
    conf.output_dir = args.output_dir

    # Do the work
    download_image(conf.docin_pid)
	###############################################################################
	#
	# Docin document downloader
	#
	# Valid as of 2022-12-13
	#
	###############################################################################
	import argparse
	import os
	from types import SimpleNamespace
	import urllib.request

	conf = SimpleNamespace(
	docin_pid = 0,
	output_dir = ""
	)

	def download_image(pid):
	i = 0
	while True:
	i += 1
	try:
	urllib.request.urlretrieve(
	"http://211.147.220.164/index.jsp?file={}&pageno={}".format(pid, i),
	os.path.join(conf.output_dir, "{}.png".format(i))
	)
	except urllib.error.HTTPError:
	break
	else:
	print("Page", i, "saved.")

	if __name__ == "__main__":
	# Parse the arguments
	parser = argparse.ArgumentParser()

	parser.add_argument("docin_pid", type=str, help="The number after \"p-\" in docin url")
	parser.add_argument("output_dir", type=str, help="The output directory")

	args = parser.parse_args()

	conf.docin_pid = args.docin_pid
	conf.output_dir = args.output_dir

	# Do the work
	download_image(conf.docin_pid)