Skip to content

Instantly share code, notes, and snippets.

@dingyaguang117
Last active April 19, 2023 18:29
Show Gist options
  • Star 2 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dingyaguang117/28352dfd98364fd52cec71bd6b365efa to your computer and use it in GitHub Desktop.
Save dingyaguang117/28352dfd98364fd52cec71bd6b365efa to your computer and use it in GitHub Desktop.
Fill pdf form with pdfcpu
package render
import (
"bytes"
"fmt"
"math"
"github.com/pdfcpu/pdfcpu/pkg/api"
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu"
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/validate"
"github.com/pkg/errors"
)
type FormField struct {
ObjectId int
Name string
Rect *pdfcpu.Rectangle
Type string
Dict pdfcpu.Dict
}
// PDFFormFiller 表单填充辅助类
type PDFFormFiller struct {
ctx *pdfcpu.Context
fieldMapById map[int]*FormField
fieldMapByName map[string]*FormField
}
func NewPDFFormFiller(template []byte) (r *PDFFormFiller, err error) {
conf := pdfcpu.NewDefaultConfiguration()
conf.ValidationMode = pdfcpu.ValidationRelaxed
// 读取 PDF
ctx, err := pdfcpu.Read(bytes.NewReader(template), conf)
if err != nil {
return nil, err
}
// 校验
if err = validate.XRefTable(ctx.XRefTable); err != nil {
return nil, err
}
render := &PDFFormFiller{
ctx: ctx,
fieldMapById: make(map[int]*FormField),
fieldMapByName: make(map[string]*FormField),
}
err = render.extractFormFields()
if err != nil {
return nil, err
}
return render, nil
}
func (r *PDFFormFiller) WriteToBytes() (data []byte, err error) {
buffer := bytes.NewBuffer(nil)
err = api.WriteContext(r.ctx, buffer)
if err != nil {
return nil, err
}
return buffer.Bytes(), nil
}
func (r *PDFFormFiller) extractFormFields() error {
for objectId, item := range r.ctx.XRefTable.Table {
dict, ok := item.Object.(pdfcpu.Dict)
if !ok {
continue
}
subtype, ok := dict.Find("Subtype")
if !ok || subtype.String() != "Widget" {
continue
}
fieldType, ok := dict.Find("FT")
if !ok {
continue
}
name, err := pdfcpu.HexLiteralToString(dict["T"].(pdfcpu.HexLiteral))
if err != nil {
return errors.Wrap(err, "Decode T attribute of form field failed")
}
rect := dict.ArrayEntry("Rect")
x, _ := rect.FloatNumber(0)
y, _ := rect.FloatNumber(1)
x2, _ := rect.FloatNumber(2)
y2, _ := rect.FloatNumber(3)
field := &FormField{
ObjectId: objectId,
Name: name,
Type: fieldType.String(),
Rect: pdfcpu.Rect(x, y, x2, y2),
Dict: dict,
}
r.fieldMapById[objectId] = field
r.fieldMapByName[name] = field
}
return nil
}
// FillFormFieldsWithItsIdName 为工具方法, 会将 Form 中所有文本字段 填充上其 ID,便于写业务逻辑
func (r *PDFFormFiller) FillFormFieldsWithItsIdName() {
for objectId, field := range r.fieldMapById {
label := fmt.Sprintf("#%d %s", objectId, field.Name)
r.AddText(1, label, int(field.Rect.LL.X), int(field.Rect.LL.Y))
}
}
// GetFormDictById 获取某个元素
func (r *PDFFormFiller) GetFormDictById(objectId int) (dict pdfcpu.Dict, err error) {
item, ok := r.ctx.XRefTable.Table[objectId]
if !ok {
return nil, errors.Errorf("field %d not found!", objectId)
}
dict, ok = item.Object.(pdfcpu.Dict)
if !ok {
return nil, errors.Errorf("field %d, %s is not Dict type!", objectId, item.Object)
}
return dict, nil
}
// SetTextFieldByName 填充文本表单
func (r *PDFFormFiller) SetTextFieldByName(name string, value string, setReadOnly bool) (err error) {
formField, ok := r.fieldMapByName[name]
if !ok {
return errors.Wrapf(err, "Can not found Field: %s", name)
}
return r.SetTextFieldById(formField.ObjectId, value, setReadOnly)
}
// SetTextFieldById 填充文本表单
// objectId: 为对象编号(可以通过 mupdf 工具 `mutool show some.pdf form` 查看)
// value: 为文本内容
func (r *PDFFormFiller) SetTextFieldById(objectId int, value string, setReadOnly bool) (err error) {
formField, ok := r.fieldMapById[objectId]
if !ok {
return errors.Wrapf(err, "Can not found objectId %d", objectId)
}
if formField.Type != "Tx" {
return errors.Errorf("type of field %d is %s (expected Tx)", objectId, formField.Type)
}
formField.Dict["V"] = pdfcpu.NewHexLiteral([]byte(pdfcpu.EncodeUTF16String(value)))
if setReadOnly {
formField.Dict["Ff"] = pdfcpu.Integer(1)
}
return nil
}
// SetCheckboxFieldByName 设置 checkbox 表单选项
func (r *PDFFormFiller) SetCheckboxFieldByName(name string, value string, setReadOnly bool) (err error) {
formField, ok := r.fieldMapByName[name]
if !ok {
return errors.Wrapf(err, "Can not found Field: %s", name)
}
return r.SetCheckboxFieldById(formField.ObjectId, value, setReadOnly)
}
// SetCheckboxFieldById 设置 checkbox 表单选项
// objectId: 为对象编号(可以通过 mupdf 工具 `mutool show some.pdf form` 查看)
// value 为表单状态,可选项为 AP 属性定义的选项
func (r *PDFFormFiller) SetCheckboxFieldById(objectId int, value string, setReadOnly bool) (err error) {
formField, ok := r.fieldMapById[objectId]
if !ok {
return errors.Wrapf(err, "Can not found objectId %d", objectId)
}
if formField.Type != "Btn" {
return errors.Errorf("type of field %d is %s (expected Btn)", objectId, formField.Type)
}
// checkbox 通过 AS 控制展示样式, 其选项定义在 AP 中
// https://www.verypdf.com/document/pdf-format-reference/index.htm 612 页
formField.Dict["AS"] = pdfcpu.Name(value)
if setReadOnly {
formField.Dict["Ff"] = pdfcpu.Integer(1)
}
return nil
}
// AddImageOverObjectByName 在某个对象上方添加图片
func (r *PDFFormFiller) AddImageOverObjectByName(name string, image []byte) (err error) {
formField, ok := r.fieldMapByName[name]
if !ok {
return errors.Wrapf(err, "Can not found Field: %s", name)
}
return r.AddImageOverObjectById(formField.ObjectId, image)
}
func (r *PDFFormFiller) AddImageOverObjectById(objectId int, image []byte) (err error) {
formField, ok := r.fieldMapById[objectId]
if !ok {
return errors.Wrapf(err, "Can not found objectId %d", objectId)
}
// FIXME: 对于超过一页的 PDF,需要计算出 objectId 在哪一页
err = r.AddImage(1, image, int(formField.Rect.LL.X), int(formField.Rect.LL.Y), int(formField.Rect.Width()), int(formField.Rect.Height()), 1)
if err != nil {
return err
}
return nil
}
// AddImage 在 PDF 指定区域添加图片
func (r *PDFFormFiller) AddImage(page int, image []byte, x, y, w, h int, scale float64) (err error) {
pages := pdfcpu.IntSet{
page: true,
}
// 计算水印描述字符串
descriptionString := fmt.Sprintf("pos:bl, rot: 0, sc: %.4f abs, off: %d %d", scale, x, y)
fmt.Printf("descriptionString %s\n", descriptionString)
wm, err := api.ImageWatermarkForReader(bytes.NewReader(image), descriptionString, true, false, pdfcpu.POINTS)
if err != nil {
return errors.Wrap(err, "Build ImageWatermark failed")
}
err = r.ctx.AddWatermarks(pages, wm)
if err != nil {
return errors.Wrap(err, "Add ImageWatermark failed")
}
return err
}
// AddText 在 PDF 指定位置添加文字
func (r *PDFFormFiller) AddText(page int, text string, x, y int) (err error) {
pages := pdfcpu.IntSet{
page: true,
}
// 计算水印描述字符串
descriptionString := fmt.Sprintf("points:12, strokec:#E00000, fillc:#E00000, sc: 1 abs, pos:bl, rot:0, off: %d %d", x, y)
fmt.Printf("descriptionString %s\n", descriptionString)
wm, err := api.TextWatermark(text, descriptionString, true, false, pdfcpu.POINTS)
if err != nil {
return errors.Wrap(err, "Build TextWatermark failed")
}
err = r.ctx.AddWatermarks(pages, wm)
if err != nil {
return errors.Wrap(err, "Add TextWatermark failed")
}
return err
}
package render
import (
"fmt"
"io/ioutil"
"testing"
"github.com/stretchr/testify/assert"
)
func TestPDFFormFiller(t *testing.T) {
templateData, _ := ioutil.ReadFile("testdata/fw8ben.pdf")
filler, err := NewPDFFormFiller(templateData)
if err != nil {
fmt.Println(err)
return
}
signatureImage, err := ioutil.ReadFile("testdata/signature.png")
// 为所有的表单字段 标准ID 和 名称, 方便调试
filler.FillFormFieldsWithItsIdName()
// 使用 ID 设置内容
filler.SetTextFieldById(302, "中文", true)
filler.SetCheckboxFieldById(312, "1", true)
filler.AddImageOverObjectById(323, signatureImage)
// 使用 Name 设置内容
filler.SetTextFieldByName("f_2[0]", "中文2", true)
filler.SetCheckboxFieldByName("c1_02[0]", "1", true)
filler.AddImageOverObjectByName("Date[0]", signatureImage)
// 写入文件
result, err := filler.WriteToBytes()
assert.Nil(t, err)
assert.True(t, len(result) > 0)
ioutil.WriteFile("testdata/fw8ben-labeled.pdf", result, 0644)
}
@eduardo-mior
Copy link

@dingyaguang117 What program or website do you use to edit your PDFs and create forms?

I tried using this site https://www.sejda.com/en/pdf-forms but it seems that the fields are not detected or filled by your package.

I tried using this site https://www.pdfescape.com/ but your package doesn't fill the forms. Your package finds the fields (fields) but doesn't fill them.

@dingyaguang117
Copy link
Author

@eduardo-mior Thanks for your feedback.

I used WonderShare PDFelement to edit pdf files. Many pdf file are not valid, you can use mutool to fix them like this:

mutool clean in.pdf out.pdf

BTW I fixed the bug that non-ascii string works not well. I don't know if it'll solve your problem.

-	formField.Dict["V"] = pdfcpu.StringLiteral(pdfcpu.EncodeUTF16String(value))
+	formField.Dict["V"] = pdfcpu.NewHexLiteral([]byte(pdfcpu.EncodeUTF16String(value)))

@dingyaguang117 What program or website do you use to edit your PDFs and create forms?

I tried using this site https://www.sejda.com/en/pdf-forms but it seems that the fields are not detected or filled by your package.

I tried using this site https://www.pdfescape.com/ but your package doesn't fill the forms. Your package finds the fields (fields) but doesn't fill them.

@eduardo-mior
Copy link

@dingyaguang117 WonderShare Element is paid, I was looking for free alternatives.

As soon as time allows I will test again with this fix you made. Thank you very much.

@T3OnlineServices
Copy link

T3OnlineServices commented Apr 19, 2023

@dingyaguang117 WonderShare Element is paid, I was looking for free alternatives.

As soon as time allows I will test again with this fix you made. Thank you very much.

I understand your frustration with the sites you've tried for editing and filling PDF forms. It can be challenging to find the right tool for the job. Have you considered using PDFSimpli? It's a great program that allows you to easily edit and fill PDF forms. With PDFSimpli, you can quickly and easily add text, images, and signatures to your PDFs, and you can fill out and sign forms online without any hassle.

What's great about https://pdfsimpli.com/pdf-editor is that it's user-friendly and offers a range of features to make editing and filling PDF forms a breeze. Plus, it's secure and reliable, so you can be confident that your documents are safe and protected. Give it a try and see if it's the right fit for your needs.

With the Delayed Option you can access a document free after 24 hours. If you want the doc immediately, you would be charged today between $1.45 and $1.95 for the 14-Day Trial (depending on which plan you choose) and then be billed the $39.95 on day 14. The policy states they will refund any payments within the last 30 days, no questions asked. This means you can have a full refund if you follow these terms.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment