Fill pdf form with pdfcpu
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package render | |
import ( | |
"bytes" | |
"fmt" | |
"math" | |
"github.com/pdfcpu/pdfcpu/pkg/api" | |
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu" | |
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/validate" | |
"github.com/pkg/errors" | |
) | |
type FormField struct { | |
ObjectId int | |
Name string | |
Rect *pdfcpu.Rectangle | |
Type string | |
Dict pdfcpu.Dict | |
} | |
// PDFFormFiller 表单填充辅助类 | |
type PDFFormFiller struct { | |
ctx *pdfcpu.Context | |
fieldMapById map[int]*FormField | |
fieldMapByName map[string]*FormField | |
} | |
func NewPDFFormFiller(template []byte) (r *PDFFormFiller, err error) { | |
conf := pdfcpu.NewDefaultConfiguration() | |
conf.ValidationMode = pdfcpu.ValidationRelaxed | |
// 读取 PDF | |
ctx, err := pdfcpu.Read(bytes.NewReader(template), conf) | |
if err != nil { | |
return nil, err | |
} | |
// 校验 | |
if err = validate.XRefTable(ctx.XRefTable); err != nil { | |
return nil, err | |
} | |
render := &PDFFormFiller{ | |
ctx: ctx, | |
fieldMapById: make(map[int]*FormField), | |
fieldMapByName: make(map[string]*FormField), | |
} | |
err = render.extractFormFields() | |
if err != nil { | |
return nil, err | |
} | |
return render, nil | |
} | |
func (r *PDFFormFiller) WriteToBytes() (data []byte, err error) { | |
buffer := bytes.NewBuffer(nil) | |
err = api.WriteContext(r.ctx, buffer) | |
if err != nil { | |
return nil, err | |
} | |
return buffer.Bytes(), nil | |
} | |
func (r *PDFFormFiller) extractFormFields() error { | |
for objectId, item := range r.ctx.XRefTable.Table { | |
dict, ok := item.Object.(pdfcpu.Dict) | |
if !ok { | |
continue | |
} | |
subtype, ok := dict.Find("Subtype") | |
if !ok || subtype.String() != "Widget" { | |
continue | |
} | |
fieldType, ok := dict.Find("FT") | |
if !ok { | |
continue | |
} | |
name, err := pdfcpu.HexLiteralToString(dict["T"].(pdfcpu.HexLiteral)) | |
if err != nil { | |
return errors.Wrap(err, "Decode T attribute of form field failed") | |
} | |
rect := dict.ArrayEntry("Rect") | |
x, _ := rect.FloatNumber(0) | |
y, _ := rect.FloatNumber(1) | |
x2, _ := rect.FloatNumber(2) | |
y2, _ := rect.FloatNumber(3) | |
field := &FormField{ | |
ObjectId: objectId, | |
Name: name, | |
Type: fieldType.String(), | |
Rect: pdfcpu.Rect(x, y, x2, y2), | |
Dict: dict, | |
} | |
r.fieldMapById[objectId] = field | |
r.fieldMapByName[name] = field | |
} | |
return nil | |
} | |
// FillFormFieldsWithItsIdName 为工具方法, 会将 Form 中所有文本字段 填充上其 ID,便于写业务逻辑 | |
func (r *PDFFormFiller) FillFormFieldsWithItsIdName() { | |
for objectId, field := range r.fieldMapById { | |
label := fmt.Sprintf("#%d %s", objectId, field.Name) | |
r.AddText(1, label, int(field.Rect.LL.X), int(field.Rect.LL.Y)) | |
} | |
} | |
// GetFormDictById 获取某个元素 | |
func (r *PDFFormFiller) GetFormDictById(objectId int) (dict pdfcpu.Dict, err error) { | |
item, ok := r.ctx.XRefTable.Table[objectId] | |
if !ok { | |
return nil, errors.Errorf("field %d not found!", objectId) | |
} | |
dict, ok = item.Object.(pdfcpu.Dict) | |
if !ok { | |
return nil, errors.Errorf("field %d, %s is not Dict type!", objectId, item.Object) | |
} | |
return dict, nil | |
} | |
// SetTextFieldByName 填充文本表单 | |
func (r *PDFFormFiller) SetTextFieldByName(name string, value string, setReadOnly bool) (err error) { | |
formField, ok := r.fieldMapByName[name] | |
if !ok { | |
return errors.Wrapf(err, "Can not found Field: %s", name) | |
} | |
return r.SetTextFieldById(formField.ObjectId, value, setReadOnly) | |
} | |
// SetTextFieldById 填充文本表单 | |
// objectId: 为对象编号(可以通过 mupdf 工具 `mutool show some.pdf form` 查看) | |
// value: 为文本内容 | |
func (r *PDFFormFiller) SetTextFieldById(objectId int, value string, setReadOnly bool) (err error) { | |
formField, ok := r.fieldMapById[objectId] | |
if !ok { | |
return errors.Wrapf(err, "Can not found objectId %d", objectId) | |
} | |
if formField.Type != "Tx" { | |
return errors.Errorf("type of field %d is %s (expected Tx)", objectId, formField.Type) | |
} | |
formField.Dict["V"] = pdfcpu.NewHexLiteral([]byte(pdfcpu.EncodeUTF16String(value))) | |
if setReadOnly { | |
formField.Dict["Ff"] = pdfcpu.Integer(1) | |
} | |
return nil | |
} | |
// SetCheckboxFieldByName 设置 checkbox 表单选项 | |
func (r *PDFFormFiller) SetCheckboxFieldByName(name string, value string, setReadOnly bool) (err error) { | |
formField, ok := r.fieldMapByName[name] | |
if !ok { | |
return errors.Wrapf(err, "Can not found Field: %s", name) | |
} | |
return r.SetCheckboxFieldById(formField.ObjectId, value, setReadOnly) | |
} | |
// SetCheckboxFieldById 设置 checkbox 表单选项 | |
// objectId: 为对象编号(可以通过 mupdf 工具 `mutool show some.pdf form` 查看) | |
// value 为表单状态,可选项为 AP 属性定义的选项 | |
func (r *PDFFormFiller) SetCheckboxFieldById(objectId int, value string, setReadOnly bool) (err error) { | |
formField, ok := r.fieldMapById[objectId] | |
if !ok { | |
return errors.Wrapf(err, "Can not found objectId %d", objectId) | |
} | |
if formField.Type != "Btn" { | |
return errors.Errorf("type of field %d is %s (expected Btn)", objectId, formField.Type) | |
} | |
// checkbox 通过 AS 控制展示样式, 其选项定义在 AP 中 | |
// https://www.verypdf.com/document/pdf-format-reference/index.htm 612 页 | |
formField.Dict["AS"] = pdfcpu.Name(value) | |
if setReadOnly { | |
formField.Dict["Ff"] = pdfcpu.Integer(1) | |
} | |
return nil | |
} | |
// AddImageOverObjectByName 在某个对象上方添加图片 | |
func (r *PDFFormFiller) AddImageOverObjectByName(name string, image []byte) (err error) { | |
formField, ok := r.fieldMapByName[name] | |
if !ok { | |
return errors.Wrapf(err, "Can not found Field: %s", name) | |
} | |
return r.AddImageOverObjectById(formField.ObjectId, image) | |
} | |
func (r *PDFFormFiller) AddImageOverObjectById(objectId int, image []byte) (err error) { | |
formField, ok := r.fieldMapById[objectId] | |
if !ok { | |
return errors.Wrapf(err, "Can not found objectId %d", objectId) | |
} | |
// FIXME: 对于超过一页的 PDF,需要计算出 objectId 在哪一页 | |
err = r.AddImage(1, image, int(formField.Rect.LL.X), int(formField.Rect.LL.Y), int(formField.Rect.Width()), int(formField.Rect.Height()), 1) | |
if err != nil { | |
return err | |
} | |
return nil | |
} | |
// AddImage 在 PDF 指定区域添加图片 | |
func (r *PDFFormFiller) AddImage(page int, image []byte, x, y, w, h int, scale float64) (err error) { | |
pages := pdfcpu.IntSet{ | |
page: true, | |
} | |
// 计算水印描述字符串 | |
descriptionString := fmt.Sprintf("pos:bl, rot: 0, sc: %.4f abs, off: %d %d", scale, x, y) | |
fmt.Printf("descriptionString %s\n", descriptionString) | |
wm, err := api.ImageWatermarkForReader(bytes.NewReader(image), descriptionString, true, false, pdfcpu.POINTS) | |
if err != nil { | |
return errors.Wrap(err, "Build ImageWatermark failed") | |
} | |
err = r.ctx.AddWatermarks(pages, wm) | |
if err != nil { | |
return errors.Wrap(err, "Add ImageWatermark failed") | |
} | |
return err | |
} | |
// AddText 在 PDF 指定位置添加文字 | |
func (r *PDFFormFiller) AddText(page int, text string, x, y int) (err error) { | |
pages := pdfcpu.IntSet{ | |
page: true, | |
} | |
// 计算水印描述字符串 | |
descriptionString := fmt.Sprintf("points:12, strokec:#E00000, fillc:#E00000, sc: 1 abs, pos:bl, rot:0, off: %d %d", x, y) | |
fmt.Printf("descriptionString %s\n", descriptionString) | |
wm, err := api.TextWatermark(text, descriptionString, true, false, pdfcpu.POINTS) | |
if err != nil { | |
return errors.Wrap(err, "Build TextWatermark failed") | |
} | |
err = r.ctx.AddWatermarks(pages, wm) | |
if err != nil { | |
return errors.Wrap(err, "Add TextWatermark failed") | |
} | |
return err | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package render | |
import ( | |
"fmt" | |
"io/ioutil" | |
"testing" | |
"github.com/stretchr/testify/assert" | |
) | |
func TestPDFFormFiller(t *testing.T) { | |
templateData, _ := ioutil.ReadFile("testdata/fw8ben.pdf") | |
filler, err := NewPDFFormFiller(templateData) | |
if err != nil { | |
fmt.Println(err) | |
return | |
} | |
signatureImage, err := ioutil.ReadFile("testdata/signature.png") | |
// 为所有的表单字段 标准ID 和 名称, 方便调试 | |
filler.FillFormFieldsWithItsIdName() | |
// 使用 ID 设置内容 | |
filler.SetTextFieldById(302, "中文", true) | |
filler.SetCheckboxFieldById(312, "1", true) | |
filler.AddImageOverObjectById(323, signatureImage) | |
// 使用 Name 设置内容 | |
filler.SetTextFieldByName("f_2[0]", "中文2", true) | |
filler.SetCheckboxFieldByName("c1_02[0]", "1", true) | |
filler.AddImageOverObjectByName("Date[0]", signatureImage) | |
// 写入文件 | |
result, err := filler.WriteToBytes() | |
assert.Nil(t, err) | |
assert.True(t, len(result) > 0) | |
ioutil.WriteFile("testdata/fw8ben-labeled.pdf", result, 0644) | |
} |
@dingyaguang117 WonderShare Element is paid, I was looking for free alternatives.
As soon as time allows I will test again with this fix you made. Thank you very much.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
@eduardo-mior Thanks for your feedback.
I used
WonderShare PDFelement
to edit pdf files. Many pdf file are not valid, you can usemutool
to fix them like this:mutool clean in.pdf out.pdf
BTW I fixed the bug that non-ascii string works not well. I don't know if it'll solve your problem.