Skip to content

Instantly share code, notes, and snippets.

@OrkoHunter
Created January 8, 2019 12:20
Show Gist options
  • Save OrkoHunter/e3200adeba42e14fa0dd49ca4fe8f809 to your computer and use it in GitHub Desktop.
Save OrkoHunter/e3200adeba42e14fa0dd49ca4fe8f809 to your computer and use it in GitHub Desktop.
text to pdf creater
#! /usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
import reportlab.lib.pagesizes
from reportlab.pdfgen.canvas import Canvas
from reportlab.lib import units
from reportlab.lib.styles import ParagraphStyle
from reportlab.platypus import Paragraph, SimpleDocTemplate, BaseDocTemplate, XPreformatted
from reportlab.lib.colors import red, black, navy, white, green
from reportlab.rl_config import defaultPageSize
(PAGE_WIDTH, PAGE_HEIGHT) = defaultPageSize
from reportlab.lib.units import inch
from reportlab.pdfbase import pdfmetrics
from reportlab.pdfbase.ttfonts import TTFont
import re
import sys
import os
class Margins(object):
def __init__(self, right, left, top, bottom):
self._right = right
self._left = left
self._top = top
self._bottom = bottom
@property
def right(self):
return self._right * units.cm
@property
def left(self):
return self._left * units.cm
@property
def top(self):
return self._top * units.cm
@property
def bottom(self):
return self._bottom * units.cm
def adjustLeft(self, width):
self._left -= width / units.cm
class PDFCreator(object):
appName = "txt2pdf (version 1.0)"
def __init__(self, args, margins):
pageWidth, pageHeight = reportlab.lib.pagesizes.__dict__[args.media]
if args.landscape:
pageWidth, pageHeight = reportlab.lib.pagesizes.landscape(
(pageWidth, pageHeight))
self.author = args.author
self.title = args.title
self.keywords = args.keywords
self.subject = args.subject
self.output = args.output
self.canvas = Canvas(args.output, pagesize=(pageWidth, pageHeight))
self.canvas.setCreator(self.appName)
if len(args.author) > 0:
self.canvas.setAuthor(args.author)
if len(args.title) > 0:
self.canvas.setTitle(args.title)
if len(args.subject) > 0:
self.canvas.setSubject(args.subject)
if len(args.keywords) > 0:
self.canvas.setKeywords(args.keywords)
self.fontSize = args.font_size
if args.font not in ('Courier'):
self.font = 'myFont'
pdfmetrics.registerFont(TTFont('myFont', args.font))
else:
self.font = args.font
self.kerning = args.kerning
self.margins = margins
self.leading = (args.extra_vertical_space + 1.2) * self.fontSize
self.linesPerPage = int(
(self.leading + pageHeight
- margins.top - margins.bottom - self.fontSize) / self.leading)
self.lppLen = len(str(self.linesPerPage))
fontWidth = self.canvas.stringWidth(
".", fontName=self.font, fontSize=self.fontSize)
self.font_size_vs_width = {}
for _fontsize in range(1, 600):
_fontwidth = self.canvas.stringWidth(".", fontName=self.font, fontSize=_fontsize)
#print(f"{_fontsize} {_fontwidth}")
self.font_size_vs_width[_fontsize] = _fontwidth
self.max_chars_in_line = 90
self.lineNumbering = args.line_numbers
if self.lineNumbering:
margins.adjustLeft(fontWidth * (self.lppLen + 2))
contentWidth = pageWidth - margins.left - margins.right
self.contentWidth = contentWidth
# print(self.contentWidth, self.font_size_vs_width)
#print(self.contentWidth)
# self.charsPerLine = int(
# (contentWidth + self.kerning) / (fontWidth + self.kerning))
self.top = pageHeight - margins.top - self.fontSize
self.filename = args.filename
self.verbose = not args.quiet
self.breakOnBlanks = args.break_on_blanks
self.encoding = args.encoding
self.pageNumbering = args.page_numbers
if self.pageNumbering:
self.pageNumberPlacement = \
(pageWidth / 2, margins.bottom / 2)
def _update_styling_characters(self, data):
chars = {
"\x1bG": "<strong>", # Dodstrike
"\x1bH": "</strong>", # Candstrike
"\x1bE": "<b>", # Doemph
"\x1bF": "</b>", # Canemph
"\x1b4": "<i>", # Doital
"\x1b5": "</i>", # Canital
"\x1b-1": "<u>", # Douline
"\x1b-0": "</u>", # Canuline
"\x1bP": "", # P10cpi # Character per inch
"\x1bM": "", # P12cpi
"\x1bg": "", #P15cpi
# "\x0e": "", # chr(14) Doubles the font of remaining
# Don't replace chr(14), process it later font size *= 2
"\x0f": "", # chr(15) # font size /= 2
"\x12": "", # chr(18) # cancel chr(14)
"\x1a": "", # chr(26)
}
for key in chars:
data = data.replace(key, chars[key])
return data
def _get_line_length(self, line):
pairs = [("<strong>", "</strong>"), ("<b>", "</b>"), ("<u>", "</u>"), ("<i>", "</i>")]
for i in pairs:
line = line.replace(i[0], "")
line = line.replace(i[1], "")
return len(line)
def _close_tags(self, lines):
pairs = [("<strong>", "</strong>"), ("<b>", "</b>"), ("<u>", "</u>"), ("<i>", "</i>")]
left_over_tags = ""
new_lines = []
for line in lines:
if len(left_over_tags):
line = left_over_tags + line
left_over_tags = ""
for pair in pairs:
remaining = line.count(pair[0]) - line.count(pair[1])
if remaining > 0:
line += pair[1]*remaining
left_over_tags += pair[0]*remaining
elif remaining < 0:
# Remove excess closing tags from behind
reversed_line = line[::-1]
reversed_tag = pair[1][::-1]
reversed_line = reversed_line.replace(reversed_tag, "", abs(remaining))
line = reversed_line[::-1]
new_lines.append(line)
return new_lines
def _process_chr_14(self, lines):
new_lines = []
for line in lines:
if "\x0e" in line:
line = line.replace("\x0e", f"<font size={self.fontSize*2}>")
line += "</font>"
new_lines.append(line)
return new_lines
def _process(self, data):
# Every line should have both closing and enclosing tags e.g. <b> and </b>
# Do not leave any line with an open styling tag
# This is because each line is converted to <para> and <para><b></para> will raise error
data = self._update_styling_characters(data)
_lines = data.split("\n")
lines = self._close_tags(_lines)
lines = self._process_chr_14(lines)
new_lines = []
for line in lines:
if sys.version_info.major == 2:
new_lines.append(line.decode(self.encoding).rstrip('\r\n'))
else:
new_lines.append(line.rstrip('\r\n'))
return new_lines
def _readDocument(self):
with open(self.filename, 'r') as f:
data = f.read()
lines = self._process(data)
return lines
def _newpage(self):
textobject = self.canvas.beginText()
textobject.setFont(self.font, self.fontSize, leading=self.leading)
textobject.setTextOrigin(self.margins.left, self.top)
textobject.setCharSpace(self.kerning)
if self.pageNumbering:
self.canvas.drawString(
self.pageNumberPlacement[0],
self.pageNumberPlacement[1],
str(self.canvas.getPageNumber()))
return textobject
def _scribble(self, text):
if self.verbose:
sys.stderr.write(text + os.linesep)
def generate(self):
self._scribble(
"Writing '%s' with %d max characters per "
"line and %d lines per page..." %
(self.filename, self.max_chars_in_line, self.linesPerPage)
)
# pageno = self._generatePlain(self._readDocument())
pageno = self._generateCustom(self._readDocument())
self._scribble("PDF document: %d pages" % pageno)
def _generateCustom(self, data):
# Use Paragraphs for more customization
# Each line is a Paragraph
pstyleparent = ParagraphStyle("parent", fontName="Courier", fontSize=self.fontSize)
pageno = 1
lineno = 0
page = self._newpage()
all_lines = []
for line in data:
lineno += 1
if self._get_line_length(line) < self.max_chars_in_line:
p = XPreformatted(line, style=pstyleparent)
else:
final_font_size = 0
while(True):
new_size = final_font_size + 1
if self.font_size_vs_width[new_size]*self._get_line_length(line) < self.contentWidth:
final_font_size = new_size
else:
break
# print("final_font_size", final_font_size)
new_style = ParagraphStyle('new', parent=pstyleparent, fontSize=final_font_size)
p = XPreformatted(line, style=new_style)
all_lines.append(p)
template = SimpleDocTemplate(self.output,
leftMargin=self.margins.left,
rightMargin=self.margins.right)
template.build(all_lines)
return pageno
# def _generatePlain(self, data):
# pageno = 1
# lineno = 0
# page = self._newpage()
# for _, line in data:
# lineno += 1
# # Handle form feed characters.
# (line, pageBreakCount) = re.subn(r'\f', r'', line)
# if pageBreakCount > 0 and lineno >= args.minimum_page_length:
# for _ in range(pageBreakCount):
# self.canvas.drawText(page)
# self.canvas.showPage()
# lineno = 0
# pageno += 1
# page = self._newpage()
# if args.minimum_page_length > 0:
# break
# page.textLine(line)
# if lineno == self.linesPerPage:
# self.canvas.drawText(page)
# self.canvas.showPage()
# lineno = 0
# pageno += 1
# page = self._newpage()
# self.canvas.drawText(page)
# if lineno > 0:
# self.canvas.drawText(page)
# else:
# pageno -= 1
# self.canvas.save()
# return pageno
parser = argparse.ArgumentParser()
parser.add_argument('filename')
parser.add_argument(
'--font',
'-f',
default='Courier',
help='Select a font (True Type format) by its full path')
parser.add_argument(
'--font-size',
'-s',
type=float,
default=9.0,
help='Size of the font')
parser.add_argument(
'--extra-vertical-space',
'-v',
type=float,
default=0.0,
help='Extra vertical space between lines')
parser.add_argument(
'--kerning',
'-k',
type=float,
default=0.0,
help='Extra horizontal space between characters')
parser.add_argument(
'--media',
'-m',
default='A4',
help='Select the size of the page (A4, A3, etc.)')
parser.add_argument(
'--minimum-page-length',
'-M',
type=int,
default=10,
help='The minimum number of lines before a form feed character will change the page')
parser.add_argument(
'--landscape',
'-l',
action="store_true",
default=False,
help='Select landscape mode')
parser.add_argument(
'--margin-left',
'-L',
type=float,
default=0.5,
help='Left margin (in cm unit)')
parser.add_argument(
'--margin-right',
'-R',
type=float,
default=0.5,
help='Right margin (in cm unit)')
parser.add_argument(
'--margin-top',
'-T',
type=float,
default=2.0,
help='Top margin (in cm unit)')
parser.add_argument(
'--margin-bottom',
'-B',
type=float,
default=2.0,
help='Bottom margin (in cm unit)')
parser.add_argument(
'--output',
'-o',
default='output.pdf',
help='Output file')
parser.add_argument(
'--author',
default='',
help='Author of the PDF document')
parser.add_argument(
'--title',
default='',
help='Title of the PDF document')
parser.add_argument(
'--quiet',
'-q',
action='store_true',
default=False,
help='Hide detailed information')
parser.add_argument('--subject',default='',help='Subject of the PDF document')
parser.add_argument('--keywords',default='',help='Keywords of the PDF document')
parser.add_argument(
'--break-on-blanks',
'-b',
action='store_true',
default=False,
help='Only break page on blank lines')
parser.add_argument(
'--encoding',
'-e',
type=str,
default='utf8',
help='Input encoding')
parser.add_argument(
'--page-numbers',
'-n',
action='store_true',
help='Add page numbers')
parser.add_argument(
'--line-numbers',
action='store_true',
help='Add line numbers')
args = parser.parse_args()
p = PDFCreator(args, Margins(
args.margin_right,
args.margin_left,
args.margin_top,
args.margin_bottom)).generate()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment