Skip to content

Instantly share code, notes, and snippets.

@atuyosi
Created May 7, 2018 12:14
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save atuyosi/bf279835c49f17fac78e855ab4d4c059 to your computer and use it in GitHub Desktop.
Save atuyosi/bf279835c49f17fac78e855ab4d4c059 to your computer and use it in GitHub Desktop.
別のスクリプトで生成したTSVファイルからpdftkコマンド用のBookmarkエントリを生成するスクリプト
#! /usr/local/bin/python3
# coding: utf-8
import sys
import pathlib
import csv
from natsort import natsorted
def find_entry(keyword, filepath):
'''ファイルからキーワードを検索して該当する最初の行を返す'''
p = pathlib.Path(filepath)
with p.open(mode='rt') as fp :
lines = fp.readlines()
for line in lines :
if line.find(keyword) >= 0 :
return line
else:
return None
def find_article_in_dir(keyword, datadir="."):
'''引数のディレクトリにあるファイルからキーワードにマッチする行を検索する'''
data_path = pathlib.Path(datadir)
data_glob = data_path.glob('index_*.tsv')
tsv_list = []
for filepath in natsorted(data_glob):
entry = find_entry(keyword,filepath=filepath)
if entry :
tsv_list.append(entry.rstrip())
else:
return tsv_list
def generate_bookmark_entry(keyword,dirname):
data = find_article_in_dir(keyword=keyword, datadir=dirname)
page_num = 1
offset = 0
for n in natsorted(data):
temp_st, start_page, end_page = n.split("\t")[1:4]
title = temp_st.split('◆')[0] # 連載記事の著者名の手前の部分をを切り取る
print("BookmarkBegin")
print("BookmarkTitle: {}".format(title))
print("BookmarkLevel: 1") # 固定でいいか
print("BookmarkPageNumber: {}".format(page_num))
offset = int(end_page) - int(start_page) + 1
page_num += offset
if __name__ == '__main__':
args = sys.argv
keyword = args[1]
generate_bookmark_entry(keyword=keyword,dirname="data/page_index/")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment