Skip to content

Instantly share code, notes, and snippets.

@dingyaguang117
Last active October 18, 2022 20:01
Embed
What would you like to do?
Fill pdf form with pdfcpu
package render
import (
"bytes"
"fmt"
"math"
"github.com/pdfcpu/pdfcpu/pkg/api"
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu"
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/validate"
"github.com/pkg/errors"
)
type FormField struct {
ObjectId int
Name string
Rect *pdfcpu.Rectangle
Type string
Dict pdfcpu.Dict
}
// PDFFormFiller 表单填充辅助类
type PDFFormFiller struct {
ctx *pdfcpu.Context
fieldMapById map[int]*FormField
fieldMapByName map[string]*FormField
}
func NewPDFFormFiller(template []byte) (r *PDFFormFiller, err error) {
conf := pdfcpu.NewDefaultConfiguration()
conf.ValidationMode = pdfcpu.ValidationRelaxed
// 读取 PDF
ctx, err := pdfcpu.Read(bytes.NewReader(template), conf)
if err != nil {
return nil, err
}
// 校验
if err = validate.XRefTable(ctx.XRefTable); err != nil {
return nil, err
}
render := &PDFFormFiller{
ctx: ctx,
fieldMapById: make(map[int]*FormField),
fieldMapByName: make(map[string]*FormField),
}
err = render.extractFormFields()
if err != nil {
return nil, err
}
return render, nil
}
func (r *PDFFormFiller) WriteToBytes() (data []byte, err error) {
buffer := bytes.NewBuffer(nil)
err = api.WriteContext(r.ctx, buffer)
if err != nil {
return nil, err
}
return buffer.Bytes(), nil
}
func (r *PDFFormFiller) extractFormFields() error {
for objectId, item := range r.ctx.XRefTable.Table {
dict, ok := item.Object.(pdfcpu.Dict)
if !ok {
continue
}
subtype, ok := dict.Find("Subtype")
if !ok || subtype.String() != "Widget" {
continue
}
fieldType, ok := dict.Find("FT")
if !ok {
continue
}
name, err := pdfcpu.HexLiteralToString(dict["T"].(pdfcpu.HexLiteral))
if err != nil {
return errors.Wrap(err, "Decode T attribute of form field failed")
}
rect := dict.ArrayEntry("Rect")
x, _ := rect.FloatNumber(0)
y, _ := rect.FloatNumber(1)
x2, _ := rect.FloatNumber(2)
y2, _ := rect.FloatNumber(3)
field := &FormField{
ObjectId: objectId,
Name: name,
Type: fieldType.String(),
Rect: pdfcpu.Rect(x, y, x2, y2),
Dict: dict,
}
r.fieldMapById[objectId] = field
r.fieldMapByName[name] = field
}
return nil
}
// FillFormFieldsWithItsIdName 为工具方法, 会将 Form 中所有文本字段 填充上其 ID,便于写业务逻辑
func (r *PDFFormFiller) FillFormFieldsWithItsIdName() {
for objectId, field := range r.fieldMapById {
label := fmt.Sprintf("#%d %s", objectId, field.Name)
r.AddText(1, label, int(field.Rect.LL.X), int(field.Rect.LL.Y))
}
}
// GetFormDictById 获取某个元素
func (r *PDFFormFiller) GetFormDictById(objectId int) (dict pdfcpu.Dict, err error) {
item, ok := r.ctx.XRefTable.Table[objectId]
if !ok {
return nil, errors.Errorf("field %d not found!", objectId)
}
dict, ok = item.Object.(pdfcpu.Dict)
if !ok {
return nil, errors.Errorf("field %d, %s is not Dict type!", objectId, item.Object)
}
return dict, nil
}
// SetTextFieldByName 填充文本表单
func (r *PDFFormFiller) SetTextFieldByName(name string, value string, setReadOnly bool) (err error) {
formField, ok := r.fieldMapByName[name]
if !ok {
return errors.Wrapf(err, "Can not found Field: %s", name)
}
return r.SetTextFieldById(formField.ObjectId, value, setReadOnly)
}
// SetTextFieldById 填充文本表单
// objectId: 为对象编号(可以通过 mupdf 工具 `mutool show some.pdf form` 查看)
// value: 为文本内容
func (r *PDFFormFiller) SetTextFieldById(objectId int, value string, setReadOnly bool) (err error) {
formField, ok := r.fieldMapById[objectId]
if !ok {
return errors.Wrapf(err, "Can not found objectId %d", objectId)
}
if formField.Type != "Tx" {
return errors.Errorf("type of field %d is %s (expected Tx)", objectId, formField.Type)
}
formField.Dict["V"] = pdfcpu.NewHexLiteral([]byte(pdfcpu.EncodeUTF16String(value)))
if setReadOnly {
formField.Dict["Ff"] = pdfcpu.Integer(1)
}
return nil
}
// SetCheckboxFieldByName 设置 checkbox 表单选项
func (r *PDFFormFiller) SetCheckboxFieldByName(name string, value string, setReadOnly bool) (err error) {
formField, ok := r.fieldMapByName[name]
if !ok {
return errors.Wrapf(err, "Can not found Field: %s", name)
}
return r.SetCheckboxFieldById(formField.ObjectId, value, setReadOnly)
}
// SetCheckboxFieldById 设置 checkbox 表单选项
// objectId: 为对象编号(可以通过 mupdf 工具 `mutool show some.pdf form` 查看)
// value 为表单状态,可选项为 AP 属性定义的选项
func (r *PDFFormFiller) SetCheckboxFieldById(objectId int, value string, setReadOnly bool) (err error) {
formField, ok := r.fieldMapById[objectId]
if !ok {
return errors.Wrapf(err, "Can not found objectId %d", objectId)
}
if formField.Type != "Btn" {
return errors.Errorf("type of field %d is %s (expected Btn)", objectId, formField.Type)
}
// checkbox 通过 AS 控制展示样式, 其选项定义在 AP 中
// https://www.verypdf.com/document/pdf-format-reference/index.htm 612 页
formField.Dict["AS"] = pdfcpu.Name(value)
if setReadOnly {
formField.Dict["Ff"] = pdfcpu.Integer(1)
}
return nil
}
// AddImageOverObjectByName 在某个对象上方添加图片
func (r *PDFFormFiller) AddImageOverObjectByName(name string, image []byte) (err error) {
formField, ok := r.fieldMapByName[name]
if !ok {
return errors.Wrapf(err, "Can not found Field: %s", name)
}
return r.AddImageOverObjectById(formField.ObjectId, image)
}
func (r *PDFFormFiller) AddImageOverObjectById(objectId int, image []byte) (err error) {
formField, ok := r.fieldMapById[objectId]
if !ok {
return errors.Wrapf(err, "Can not found objectId %d", objectId)
}
// FIXME: 对于超过一页的 PDF,需要计算出 objectId 在哪一页
err = r.AddImage(1, image, int(formField.Rect.LL.X), int(formField.Rect.LL.Y), int(formField.Rect.Width()), int(formField.Rect.Height()), 1)
if err != nil {
return err
}
return nil
}
// AddImage 在 PDF 指定区域添加图片
func (r *PDFFormFiller) AddImage(page int, image []byte, x, y, w, h int, scale float64) (err error) {
pages := pdfcpu.IntSet{
page: true,
}
// 计算水印描述字符串
descriptionString := fmt.Sprintf("pos:bl, rot: 0, sc: %.4f abs, off: %d %d", scale, x, y)
fmt.Printf("descriptionString %s\n", descriptionString)
wm, err := api.ImageWatermarkForReader(bytes.NewReader(image), descriptionString, true, false, pdfcpu.POINTS)
if err != nil {
return errors.Wrap(err, "Build ImageWatermark failed")
}
err = r.ctx.AddWatermarks(pages, wm)
if err != nil {
return errors.Wrap(err, "Add ImageWatermark failed")
}
return err
}
// AddText 在 PDF 指定位置添加文字
func (r *PDFFormFiller) AddText(page int, text string, x, y int) (err error) {
pages := pdfcpu.IntSet{
page: true,
}
// 计算水印描述字符串
descriptionString := fmt.Sprintf("points:12, strokec:#E00000, fillc:#E00000, sc: 1 abs, pos:bl, rot:0, off: %d %d", x, y)
fmt.Printf("descriptionString %s\n", descriptionString)
wm, err := api.TextWatermark(text, descriptionString, true, false, pdfcpu.POINTS)
if err != nil {
return errors.Wrap(err, "Build TextWatermark failed")
}
err = r.ctx.AddWatermarks(pages, wm)
if err != nil {
return errors.Wrap(err, "Add TextWatermark failed")
}
return err
}
package render
import (
"fmt"
"io/ioutil"
"testing"
"github.com/stretchr/testify/assert"
)
func TestPDFFormFiller(t *testing.T) {
templateData, _ := ioutil.ReadFile("testdata/fw8ben.pdf")
filler, err := NewPDFFormFiller(templateData)
if err != nil {
fmt.Println(err)
return
}
signatureImage, err := ioutil.ReadFile("testdata/signature.png")
// 为所有的表单字段 标准ID 和 名称, 方便调试
filler.FillFormFieldsWithItsIdName()
// 使用 ID 设置内容
filler.SetTextFieldById(302, "中文", true)
filler.SetCheckboxFieldById(312, "1", true)
filler.AddImageOverObjectById(323, signatureImage)
// 使用 Name 设置内容
filler.SetTextFieldByName("f_2[0]", "中文2", true)
filler.SetCheckboxFieldByName("c1_02[0]", "1", true)
filler.AddImageOverObjectByName("Date[0]", signatureImage)
// 写入文件
result, err := filler.WriteToBytes()
assert.Nil(t, err)
assert.True(t, len(result) > 0)
ioutil.WriteFile("testdata/fw8ben-labeled.pdf", result, 0644)
}
@dingyaguang117
Copy link
Author

@eduardo-mior
Copy link

@dingyaguang117 I'm getting the following error
interface conversion: pdfcpu.Object is pdfcpu.StringLiteral, not pdfcpu.HexLiteral

This error is happening on line 80:

name, err := pdfcpu.HexLiteralToString(dict["T"].(pdfcpu.HexLiteral))

I fixed this as follows:

var name string
var err error

dictT := dict["T"]
if dictTHexLiteral, ok := dictT.(pdfcpu.HexLiteral); ok {
	name, err = pdfcpu.HexLiteralToString(dictTHexLiteral)
	if err != nil {
		return errors.Wrap(err, "Decode T attribute of form field failed")
	}
} else if dictTStringLiteral, ok := dictT.(pdfcpu.StringLiteral); ok {
	name = string(dictTStringLiteral)
} else {
	panic("dict os not HexLiteral and not StringLiteral")
}

@eduardo-mior
Copy link

@dingyaguang117 What program or website do you use to edit your PDFs and create forms?

I tried using this site https://www.sejda.com/en/pdf-forms but it seems that the fields are not detected or filled by your package.

I tried using this site https://www.pdfescape.com/ but your package doesn't fill the forms. Your package finds the fields (fields) but doesn't fill them.

@dingyaguang117
Copy link
Author

@eduardo-mior Thanks for your feedback.

I used WonderShare PDFelement to edit pdf files. Many pdf file are not valid, you can use mutool to fix them like this:

mutool clean in.pdf out.pdf

BTW I fixed the bug that non-ascii string works not well. I don't know if it'll solve your problem.

-	formField.Dict["V"] = pdfcpu.StringLiteral(pdfcpu.EncodeUTF16String(value))
+	formField.Dict["V"] = pdfcpu.NewHexLiteral([]byte(pdfcpu.EncodeUTF16String(value)))

@dingyaguang117 What program or website do you use to edit your PDFs and create forms?

I tried using this site https://www.sejda.com/en/pdf-forms but it seems that the fields are not detected or filled by your package.

I tried using this site https://www.pdfescape.com/ but your package doesn't fill the forms. Your package finds the fields (fields) but doesn't fill them.

@eduardo-mior
Copy link

@dingyaguang117 WonderShare Element is paid, I was looking for free alternatives.

As soon as time allows I will test again with this fix you made. Thank you very much.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment