Last active
August 4, 2017 17:32
-
-
Save gustavohenrique/542dd71502fb44809ce89477414f0088 to your computer and use it in GitHub Desktop.
Convert a txt file to pdf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
Installing dependencies: | |
pip install argparse | |
pip install reportlab | |
""" | |
import argparse | |
import reportlab.lib.pagesizes | |
from reportlab.pdfgen.canvas import Canvas | |
from reportlab.lib import units | |
from reportlab.pdfbase import pdfmetrics | |
from reportlab.pdfbase.ttfonts import TTFont | |
import sys | |
import os | |
class Margins(object): | |
def __init__(self, right, left, top, bottom): | |
self._right = right | |
self._left = left | |
self._top = top | |
self._bottom = bottom | |
@property | |
def right(self): | |
return self._right * units.cm | |
@property | |
def left(self): | |
return self._left * units.cm | |
@property | |
def top(self): | |
return self._top * units.cm | |
@property | |
def bottom(self): | |
return self._bottom * units.cm | |
def adjustLeft(self, width): | |
self._left -= width / units.cm | |
class PDFCreator(object): | |
appName = "txt2pdf (version 1.0)" | |
def __init__(self, args, margins): | |
pageWidth, pageHeight = reportlab.lib.pagesizes.__dict__[args.media] | |
if args.landscape: | |
pageWidth, pageHeight = reportlab.lib.pagesizes.landscape( | |
(pageWidth, pageHeight)) | |
self.author = args.author | |
self.title = args.title | |
self.canvas = Canvas(args.output, pagesize=(pageWidth, pageHeight)) | |
self.canvas.setCreator(self.appName) | |
if len(args.author) > 0: | |
self.canvas.setAuthor(args.author) | |
if len(args.title) > 0: | |
self.canvas.setTitle(args.title) | |
self.fontSize = args.font_size | |
if args.font not in ('Courier'): | |
self.font = 'myFont' | |
pdfmetrics.registerFont(TTFont('myFont', args.font)) | |
else: | |
self.font = args.font | |
self.kerning = args.kerning | |
self.margins = margins | |
self.leading = (args.extra_vertical_space + 1.2) * self.fontSize | |
self.linesPerPage = int( | |
(self.leading + pageHeight | |
- margins.top - margins.bottom - self.fontSize) / self.leading) | |
self.lppLen = len(str(self.linesPerPage)) | |
fontWidth = self.canvas.stringWidth( | |
".", fontName=self.font, fontSize=self.fontSize) | |
self.lineNumbering = args.line_numbers | |
if self.lineNumbering: | |
margins.adjustLeft(fontWidth * (self.lppLen + 2)) | |
contentWidth = pageWidth - margins.left - margins.right | |
self.charsPerLine = int( | |
(contentWidth + self.kerning) / (fontWidth + self.kerning)) | |
self.top = pageHeight - margins.top - self.fontSize | |
self.filename = args.filename | |
self.verbose = not args.quiet | |
self.breakOnBlanks = args.break_on_blanks | |
self.encoding = args.encoding | |
self.pageNumbering = args.page_numbers | |
if self.pageNumbering: | |
self.pageNumberPlacement = \ | |
(pageWidth / 2, margins.bottom / 2) | |
def _process(self, data): | |
flen = os.fstat(data.fileno()).st_size | |
lineno = 0 | |
read = 0 | |
for line in data: | |
lineno += 1 | |
if sys.version_info.major == 2: | |
read += len(line) | |
yield flen == \ | |
read, lineno, line.decode(self.encoding).rstrip('\r\n') | |
else: | |
read += len(line.encode(self.encoding)) | |
yield flen == read, lineno, line.rstrip('\r\n') | |
def _readDocument(self): | |
with open(self.filename, 'r') as data: | |
for done, lineno, line in self._process(data): | |
if len(line) > self.charsPerLine: | |
self._scribble( | |
"Warning: wrapping line %d in %s" % | |
(lineno + 1, self.filename)) | |
while len(line) > self.charsPerLine: | |
yield done, line[:self.charsPerLine] | |
line = line[self.charsPerLine:] | |
yield done, line | |
def _newpage(self): | |
textobject = self.canvas.beginText() | |
textobject.setFont(self.font, self.fontSize, leading=self.leading) | |
textobject.setTextOrigin(self.margins.left, self.top) | |
textobject.setCharSpace(self.kerning) | |
if self.pageNumbering: | |
self.canvas.drawString( | |
self.pageNumberPlacement[0], | |
self.pageNumberPlacement[1], | |
str(self.canvas.getPageNumber())) | |
return textobject | |
def _scribble(self, text): | |
if self.verbose: | |
sys.stderr.write(text + os.linesep) | |
def generate(self): | |
self._scribble( | |
"Writing '%s' with %d characters per " | |
"line and %d lines per page..." % | |
(self.filename, self.charsPerLine, self.linesPerPage) | |
) | |
if self.breakOnBlanks: | |
pageno = self._generateBob(self._readDocument()) | |
else: | |
pageno = self._generatePlain(self._readDocument()) | |
self._scribble("PDF document: %d pages" % pageno) | |
def _generatePlain(self, data): | |
pageno = 1 | |
lineno = 0 | |
page = self._newpage() | |
for _, line in data: | |
page.textLine(line) | |
lineno += 1 | |
if lineno == self.linesPerPage: | |
self.canvas.drawText(page) | |
self.canvas.showPage() | |
lineno = 0 | |
pageno += 1 | |
page = self._newpage() | |
if lineno > 0: | |
self.canvas.drawText(page) | |
else: | |
pageno -= 1 | |
self.canvas.save() | |
return pageno | |
def _writeChunk(self, page, chunk, lineno): | |
if self.lineNumbering: | |
formatstr = '%%%dd: %%s' % self.lppLen | |
for index, line in enumerate(chunk): | |
page.textLine( | |
formatstr % (lineno - len(chunk) + index + 1, line)) | |
else: | |
for line in chunk: | |
page.textLine(line) | |
def _generateBob(self, data): | |
pageno = 1 | |
lineno = 0 | |
page = self._newpage() | |
chunk = list() | |
for last, line in data: | |
if lineno == self.linesPerPage: | |
self.canvas.drawText(page) | |
self.canvas.showPage() | |
lineno = len(chunk) | |
pageno += 1 | |
page = self._newpage() | |
lineno += 1 | |
chunk.append(line) | |
if last or len(line.strip()) == 0: | |
self._writeChunk(page, chunk, lineno) | |
chunk = list() | |
if lineno > 0: | |
self.canvas.drawText(page) | |
self.canvas.showPage() | |
else: | |
pageno -= 1 | |
if len(chunk) > 0: | |
page = self._newpage() | |
self.canvas.drawText(page) | |
self.canvas.showPage() | |
pageno += 1 | |
self.canvas.save() | |
return pageno | |
if __name__ == '__main__': | |
parser = argparse.ArgumentParser() | |
parser.add_argument('filename') | |
parser.add_argument('--font', '-f', default='Courier', help='Select a font (True Type format) by its full path') | |
parser.add_argument('--font-size', '-s', type=float, default=10.0, help='Size of the font') | |
parser.add_argument('--extra-vertical-space', '-v', type=float, default=0.0, help='Extra vertical space between lines') | |
parser.add_argument('--kerning', '-k', type=float, default=0.0, help='Extra horizontal space between characters') | |
parser.add_argument('--media', '-m', default='A4', help='Select the size of the page (A4, A3, etc.)') | |
parser.add_argument('--landscape', '-l', action="store_true", default=False, help='Select landscape mode') | |
parser.add_argument('--margin-left', '-L', type=float, default=2.0, help='Left margin (in cm unit)') | |
parser.add_argument('--margin-right', '-R', type=float, default=2.0, help='Right margin (in cm unit)') | |
parser.add_argument('--margin-top', '-T', type=float, default=2.0, help='Top margin (in cm unit)') | |
parser.add_argument('--margin-bottom', '-B', type=float, default=2.0, help='Bottom margin (in cm unit)') | |
parser.add_argument('--output', '-o', default='output.pdf', help='Output file') | |
parser.add_argument('--author', default='', help='Author of the PDF document') | |
parser.add_argument('--title', default='', help='Title of the PDF document') | |
parser.add_argument('--quiet', '-q', action='store_true', default=False, help='Title of the PDF document') | |
parser.add_argument('--break-on-blanks', '-b', action='store_true', default=False, help='Only break page on blank lines') | |
parser.add_argument('--encoding', '-e', type=str, default='utf8', help='Input encoding') | |
parser.add_argument('--page-numbers', '-n', action='store_true', help='Add page numbers') | |
parser.add_argument('--line-numbers', action='store_true', help='Add line numbers') | |
args = parser.parse_args() | |
PDFCreator(args, Margins(args.margin_right, args.margin_left, args.margin_top, args.margin_bottom)).generate() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment