Skip to content

Instantly share code, notes, and snippets.

@3ace
Created October 3, 2023 07:45
Show Gist options
  • Save 3ace/91309575457dfea43f09aff6441afa37 to your computer and use it in GitHub Desktop.
Save 3ace/91309575457dfea43f09aff6441afa37 to your computer and use it in GitHub Desktop.
Get total number of lines in pdf text field using unipdf
package main
import (
"fmt"
"log"
"os"
"github.com/unidoc/unipdf/v3/common/license"
"github.com/unidoc/unipdf/v3/contentstream"
"github.com/unidoc/unipdf/v3/core"
"github.com/unidoc/unipdf/v3/model"
)
func init() {
// Make sure to load your metered License API key prior to using the library.
// If you need a key, you can sign up and create a free one at https://cloud.unidoc.io
err := license.SetMeteredKey(os.Getenv(`UNIDOC_LICENSE_API_KEY`))
if err != nil {
panic(err)
}
}
func main() {
inputPath := os.Args[1]
f, err := os.Open(inputPath)
if err != nil {
log.Fatal(err)
}
defer f.Close()
pdfReader, err := model.NewPdfReader(f)
if err != nil {
log.Fatal(err)
}
acroForm := pdfReader.AcroForm
if acroForm == nil {
log.Fatal(" No formdata present\n")
}
fields := acroForm.AllFields()
for idx, field := range fields {
fmt.Printf("=====\n")
fmt.Printf("Field %d\n", idx+1)
if !field.IsTerminal() {
fmt.Printf("- Skipping over non-terminal field\n")
continue
}
ctx := field.GetContext()
switch t := ctx.(type) {
case *model.PdfFieldText:
processTextField(t)
}
}
}
func processTextField(textField *model.PdfFieldText) error {
if str, ok := core.GetString(textField.V); ok {
fmt.Printf(" - Decoded: '%s'\n", str.Decoded())
}
for _, wa := range textField.Annotations {
if apDict, has := core.GetDict(wa.AP); has {
n, has := core.GetStream(apDict.Get("N"))
if has {
decoded, err := core.DecodeStream(n)
if err != nil {
fmt.Printf("Decoding error: %v\n", err)
return err
}
parser := contentstream.NewContentStreamParser(string(decoded))
ops, err := parser.Parse()
if err != nil {
return err
}
line := 0
for _, op := range *ops {
if op.Operand == "Td" && op.Params[1].String() != "0" {
line++
}
}
fmt.Printf("- Total line: %d\n", line)
}
}
}
return nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment