Skip to content

Instantly share code, notes, and snippets.

@vifly
Created September 27, 2020 05:08
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save vifly/92ef1bef6ffa5e7ea16b3e904a41089c to your computer and use it in GitHub Desktop.
Save vifly/92ef1bef6ffa5e7ea16b3e904a41089c to your computer and use it in GitHub Desktop.
合并 OSTEP 的 PDF 文件
#使用 https://gist.github.com/theagoliveira/65980c144bf53cf8ee5e351bd827d7e7
#下载 Operating Systems: Three Easy Pieces,然后用此脚本把下载的多个 PDF 文件合并。
#可以使用 https://pypi.org/project/pdfCatalog/ 为 PDF 生成目录,pdfCatalog 只
#考虑到中文章节的情况,为了成功生成目录,需要在运行此脚本后生成的 catalog.txt 首行插入“扉页 1”。
import glob
from os.path import *
from pdfrw import PdfReader, PdfWriter, IndirectPdfDict
inputs = glob.glob("./OSTEP/*")
outfn = "Operating Systems: Three Easy Pieces.pdf"
catalog = []
writer = PdfWriter()
start_num = 1
for inpfn in sorted(inputs):
print(inpfn)
title = splitext(basename(inpfn))[0]
reader = PdfReader(inpfn)
page_num = 0
while True:
try:
reader.getPage(page_num)
page_num = page_num + 1
except Exception as e:
break
print(page_num)
catalog.append(f"{title} {start_num}\n")
start_num = start_num + page_num
writer.addpages(reader.pages)
with open("./catalog.txt", 'w') as f:
f.writelines(catalog)
writer.trailer.Info = IndirectPdfDict(
Title='Operating Systems: Three Easy Pieces',
Author='Remzi H. Arpaci-Dusseau and Andrea C. Arpaci-Dusseau'
)
writer.write(outfn)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment