Created
May 7, 2018 12:14
-
-
Save atuyosi/bf279835c49f17fac78e855ab4d4c059 to your computer and use it in GitHub Desktop.
別のスクリプトで生成したTSVファイルからpdftkコマンド用のBookmarkエントリを生成するスクリプト
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/local/bin/python3 | |
# coding: utf-8 | |
import sys | |
import pathlib | |
import csv | |
from natsort import natsorted | |
def find_entry(keyword, filepath): | |
'''ファイルからキーワードを検索して該当する最初の行を返す''' | |
p = pathlib.Path(filepath) | |
with p.open(mode='rt') as fp : | |
lines = fp.readlines() | |
for line in lines : | |
if line.find(keyword) >= 0 : | |
return line | |
else: | |
return None | |
def find_article_in_dir(keyword, datadir="."): | |
'''引数のディレクトリにあるファイルからキーワードにマッチする行を検索する''' | |
data_path = pathlib.Path(datadir) | |
data_glob = data_path.glob('index_*.tsv') | |
tsv_list = [] | |
for filepath in natsorted(data_glob): | |
entry = find_entry(keyword,filepath=filepath) | |
if entry : | |
tsv_list.append(entry.rstrip()) | |
else: | |
return tsv_list | |
def generate_bookmark_entry(keyword,dirname): | |
data = find_article_in_dir(keyword=keyword, datadir=dirname) | |
page_num = 1 | |
offset = 0 | |
for n in natsorted(data): | |
temp_st, start_page, end_page = n.split("\t")[1:4] | |
title = temp_st.split('◆')[0] # 連載記事の著者名の手前の部分をを切り取る | |
print("BookmarkBegin") | |
print("BookmarkTitle: {}".format(title)) | |
print("BookmarkLevel: 1") # 固定でいいか | |
print("BookmarkPageNumber: {}".format(page_num)) | |
offset = int(end_page) - int(start_page) + 1 | |
page_num += offset | |
if __name__ == '__main__': | |
args = sys.argv | |
keyword = args[1] | |
generate_bookmark_entry(keyword=keyword,dirname="data/page_index/") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment