package render | |
import ( | |
"bytes" | |
"fmt" | |
"math" | |
"github.com/pdfcpu/pdfcpu/pkg/api" | |
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu" | |
"github.com/pdfcpu/pdfcpu/pkg/pdfcpu/validate" | |
"github.com/pkg/errors" | |
) | |
type FormField struct { | |
ObjectId int | |
Name string | |
Rect *pdfcpu.Rectangle | |
Type string | |
Dict pdfcpu.Dict | |
} | |
// PDFFormFiller 表单填充辅助类 | |
type PDFFormFiller struct { | |
ctx *pdfcpu.Context | |
fieldMapById map[int]*FormField | |
fieldMapByName map[string]*FormField | |
} | |
func NewPDFFormFiller(template []byte) (r *PDFFormFiller, err error) { | |
conf := pdfcpu.NewDefaultConfiguration() | |
conf.ValidationMode = pdfcpu.ValidationRelaxed | |
// 读取 PDF | |
ctx, err := pdfcpu.Read(bytes.NewReader(template), conf) | |
if err != nil { | |
return nil, err | |
} | |
// 校验 | |
if err = validate.XRefTable(ctx.XRefTable); err != nil { | |
return nil, err | |
} | |
render := &PDFFormFiller{ | |
ctx: ctx, | |
fieldMapById: make(map[int]*FormField), | |
fieldMapByName: make(map[string]*FormField), | |
} | |
err = render.extractFormFields() | |
if err != nil { | |
return nil, err | |
} | |
return render, nil | |
} | |
func (r *PDFFormFiller) WriteToBytes() (data []byte, err error) { | |
buffer := bytes.NewBuffer(nil) | |
err = api.WriteContext(r.ctx, buffer) | |
if err != nil { | |
return nil, err | |
} | |
return buffer.Bytes(), nil | |
} | |
func (r *PDFFormFiller) extractFormFields() error { | |
for objectId, item := range r.ctx.XRefTable.Table { | |
dict, ok := item.Object.(pdfcpu.Dict) | |
if !ok { | |
continue | |
} | |
subtype, ok := dict.Find("Subtype") | |
if !ok || subtype.String() != "Widget" { | |
continue | |
} | |
fieldType, ok := dict.Find("FT") | |
if !ok { | |
continue | |
} | |
name, err := pdfcpu.HexLiteralToString(dict["T"].(pdfcpu.HexLiteral)) | |
if err != nil { | |
return errors.Wrap(err, "Decode T attribute of form field failed") | |
} | |
rect := dict.ArrayEntry("Rect") | |
x, _ := rect.FloatNumber(0) | |
y, _ := rect.FloatNumber(1) | |
x2, _ := rect.FloatNumber(2) | |
y2, _ := rect.FloatNumber(3) | |
field := &FormField{ | |
ObjectId: objectId, | |
Name: name, | |
Type: fieldType.String(), | |
Rect: pdfcpu.Rect(x, y, x2, y2), | |
Dict: dict, | |
} | |
r.fieldMapById[objectId] = field | |
r.fieldMapByName[name] = field | |
} | |
return nil | |
} | |
// FillFormFieldsWithItsIdName 为工具方法, 会将 Form 中所有文本字段 填充上其 ID,便于写业务逻辑 | |
func (r *PDFFormFiller) FillFormFieldsWithItsIdName() { | |
for objectId, field := range r.fieldMapById { | |
label := fmt.Sprintf("#%d %s", objectId, field.Name) | |
r.AddText(1, label, int(field.Rect.LL.X), int(field.Rect.LL.Y)) | |
} | |
} | |
// GetFormDictById 获取某个元素 | |
func (r *PDFFormFiller) GetFormDictById(objectId int) (dict pdfcpu.Dict, err error) { | |
item, ok := r.ctx.XRefTable.Table[objectId] | |
if !ok { | |
return nil, errors.Errorf("field %d not found!", objectId) | |
} | |
dict, ok = item.Object.(pdfcpu.Dict) | |
if !ok { | |
return nil, errors.Errorf("field %d, %s is not Dict type!", objectId, item.Object) | |
} | |
return dict, nil | |
} | |
// SetTextFieldByName 填充文本表单 | |
func (r *PDFFormFiller) SetTextFieldByName(name string, value string, setReadOnly bool) (err error) { | |
formField, ok := r.fieldMapByName[name] | |
if !ok { | |
return errors.Wrapf(err, "Can not found Field: %s", name) | |
} | |
return r.SetTextFieldById(formField.ObjectId, value, setReadOnly) | |
} | |
// SetTextFieldById 填充文本表单 | |
// objectId: 为对象编号(可以通过 mupdf 工具 `mutool show some.pdf form` 查看) | |
// value: 为文本内容 | |
func (r *PDFFormFiller) SetTextFieldById(objectId int, value string, setReadOnly bool) (err error) { | |
formField, ok := r.fieldMapById[objectId] | |
if !ok { | |
return errors.Wrapf(err, "Can not found objectId %d", objectId) | |
} | |
if formField.Type != "Tx" { | |
return errors.Errorf("type of field %d is %s (expected Tx)", objectId, formField.Type) | |
} | |
formField.Dict["V"] = pdfcpu.NewHexLiteral([]byte(pdfcpu.EncodeUTF16String(value))) | |
if setReadOnly { | |
formField.Dict["Ff"] = pdfcpu.Integer(1) | |
} | |
return nil | |
} | |
// SetCheckboxFieldByName 设置 checkbox 表单选项 | |
func (r *PDFFormFiller) SetCheckboxFieldByName(name string, value string, setReadOnly bool) (err error) { | |
formField, ok := r.fieldMapByName[name] | |
if !ok { | |
return errors.Wrapf(err, "Can not found Field: %s", name) | |
} | |
return r.SetCheckboxFieldById(formField.ObjectId, value, setReadOnly) | |
} | |
// SetCheckboxFieldById 设置 checkbox 表单选项 | |
// objectId: 为对象编号(可以通过 mupdf 工具 `mutool show some.pdf form` 查看) | |
// value 为表单状态,可选项为 AP 属性定义的选项 | |
func (r *PDFFormFiller) SetCheckboxFieldById(objectId int, value string, setReadOnly bool) (err error) { | |
formField, ok := r.fieldMapById[objectId] | |
if !ok { | |
return errors.Wrapf(err, "Can not found objectId %d", objectId) | |
} | |
if formField.Type != "Btn" { | |
return errors.Errorf("type of field %d is %s (expected Btn)", objectId, formField.Type) | |
} | |
// checkbox 通过 AS 控制展示样式, 其选项定义在 AP 中 | |
// https://www.verypdf.com/document/pdf-format-reference/index.htm 612 页 | |
formField.Dict["AS"] = pdfcpu.Name(value) | |
if setReadOnly { | |
formField.Dict["Ff"] = pdfcpu.Integer(1) | |
} | |
return nil | |
} | |
// AddImageOverObjectByName 在某个对象上方添加图片 | |
func (r *PDFFormFiller) AddImageOverObjectByName(name string, image []byte) (err error) { | |
formField, ok := r.fieldMapByName[name] | |
if !ok { | |
return errors.Wrapf(err, "Can not found Field: %s", name) | |
} | |
return r.AddImageOverObjectById(formField.ObjectId, image) | |
} | |
func (r *PDFFormFiller) AddImageOverObjectById(objectId int, image []byte) (err error) { | |
formField, ok := r.fieldMapById[objectId] | |
if !ok { | |
return errors.Wrapf(err, "Can not found objectId %d", objectId) | |
} | |
// FIXME: 对于超过一页的 PDF,需要计算出 objectId 在哪一页 | |
err = r.AddImage(1, image, int(formField.Rect.LL.X), int(formField.Rect.LL.Y), int(formField.Rect.Width()), int(formField.Rect.Height()), 1) | |
if err != nil { | |
return err | |
} | |
return nil | |
} | |
// AddImage 在 PDF 指定区域添加图片 | |
func (r *PDFFormFiller) AddImage(page int, image []byte, x, y, w, h int, scale float64) (err error) { | |
pages := pdfcpu.IntSet{ | |
page: true, | |
} | |
// 计算水印描述字符串 | |
descriptionString := fmt.Sprintf("pos:bl, rot: 0, sc: %.4f abs, off: %d %d", scale, x, y) | |
fmt.Printf("descriptionString %s\n", descriptionString) | |
wm, err := api.ImageWatermarkForReader(bytes.NewReader(image), descriptionString, true, false, pdfcpu.POINTS) | |
if err != nil { | |
return errors.Wrap(err, "Build ImageWatermark failed") | |
} | |
err = r.ctx.AddWatermarks(pages, wm) | |
if err != nil { | |
return errors.Wrap(err, "Add ImageWatermark failed") | |
} | |
return err | |
} | |
// AddText 在 PDF 指定位置添加文字 | |
func (r *PDFFormFiller) AddText(page int, text string, x, y int) (err error) { | |
pages := pdfcpu.IntSet{ | |
page: true, | |
} | |
// 计算水印描述字符串 | |
descriptionString := fmt.Sprintf("points:12, strokec:#E00000, fillc:#E00000, sc: 1 abs, pos:bl, rot:0, off: %d %d", x, y) | |
fmt.Printf("descriptionString %s\n", descriptionString) | |
wm, err := api.TextWatermark(text, descriptionString, true, false, pdfcpu.POINTS) | |
if err != nil { | |
return errors.Wrap(err, "Build TextWatermark failed") | |
} | |
err = r.ctx.AddWatermarks(pages, wm) | |
if err != nil { | |
return errors.Wrap(err, "Add TextWatermark failed") | |
} | |
return err | |
} |
package render | |
import ( | |
"fmt" | |
"io/ioutil" | |
"testing" | |
"github.com/stretchr/testify/assert" | |
) | |
func TestPDFFormFiller(t *testing.T) { | |
templateData, _ := ioutil.ReadFile("testdata/fw8ben.pdf") | |
filler, err := NewPDFFormFiller(templateData) | |
if err != nil { | |
fmt.Println(err) | |
return | |
} | |
signatureImage, err := ioutil.ReadFile("testdata/signature.png") | |
// 为所有的表单字段 标准ID 和 名称, 方便调试 | |
filler.FillFormFieldsWithItsIdName() | |
// 使用 ID 设置内容 | |
filler.SetTextFieldById(302, "中文", true) | |
filler.SetCheckboxFieldById(312, "1", true) | |
filler.AddImageOverObjectById(323, signatureImage) | |
// 使用 Name 设置内容 | |
filler.SetTextFieldByName("f_2[0]", "中文2", true) | |
filler.SetCheckboxFieldByName("c1_02[0]", "1", true) | |
filler.AddImageOverObjectByName("Date[0]", signatureImage) | |
// 写入文件 | |
result, err := filler.WriteToBytes() | |
assert.Nil(t, err) | |
assert.True(t, len(result) > 0) | |
ioutil.WriteFile("testdata/fw8ben-labeled.pdf", result, 0644) | |
} |
@dingyaguang117 I'm getting the following error
interface conversion: pdfcpu.Object is pdfcpu.StringLiteral, not pdfcpu.HexLiteral
This error is happening on line 80:
name, err := pdfcpu.HexLiteralToString(dict["T"].(pdfcpu.HexLiteral))
I fixed this as follows:
var name string
var err error
dictT := dict["T"]
if dictTHexLiteral, ok := dictT.(pdfcpu.HexLiteral); ok {
name, err = pdfcpu.HexLiteralToString(dictTHexLiteral)
if err != nil {
return errors.Wrap(err, "Decode T attribute of form field failed")
}
} else if dictTStringLiteral, ok := dictT.(pdfcpu.StringLiteral); ok {
name = string(dictTStringLiteral)
} else {
panic("dict os not HexLiteral and not StringLiteral")
}
@dingyaguang117 What program or website do you use to edit your PDFs and create forms?
I tried using this site https://www.sejda.com/en/pdf-forms but it seems that the fields are not detected or filled by your package.
I tried using this site https://www.pdfescape.com/ but your package doesn't fill the forms. Your package finds the fields (fields) but doesn't fill them.
@eduardo-mior Thanks for your feedback.
I used WonderShare PDFelement
to edit pdf files. Many pdf file are not valid, you can use mutool
to fix them like this:
mutool clean in.pdf out.pdf
BTW I fixed the bug that non-ascii string works not well. I don't know if it'll solve your problem.
- formField.Dict["V"] = pdfcpu.StringLiteral(pdfcpu.EncodeUTF16String(value))
+ formField.Dict["V"] = pdfcpu.NewHexLiteral([]byte(pdfcpu.EncodeUTF16String(value)))
@dingyaguang117 What program or website do you use to edit your PDFs and create forms?
I tried using this site https://www.sejda.com/en/pdf-forms but it seems that the fields are not detected or filled by your package.
I tried using this site https://www.pdfescape.com/ but your package doesn't fill the forms. Your package finds the fields (fields) but doesn't fill them.
@dingyaguang117 WonderShare Element is paid, I was looking for free alternatives.
As soon as time allows I will test again with this fix you made. Thank you very much.
Dowload Template:
https://www.irs.gov/pub/irs-pdf/fw8ben.pdf