Skip to content

Instantly share code, notes, and snippets.

@giggiu16

giggiu16/main.go Secret

Created April 17, 2019 15:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save giggiu16/a17fa87ed1ca0dbb3a22b670a623c18c to your computer and use it in GitHub Desktop.
Save giggiu16/a17fa87ed1ca0dbb3a22b670a623c18c to your computer and use it in GitHub Desktop.
package main
import (
"fmt"
"os"
unicommon "github.com/unidoc/unidoc/common"
pdfcore "github.com/unidoc/unidoc/pdf/core"
"github.com/unidoc/unidoc/pdf/creator"
"github.com/unidoc/unidoc/pdf/extractor"
pdf "github.com/unidoc/unidoc/pdf/model"
pdfmodel "github.com/unidoc/unidoc/pdf/model"
)
func main() {
unicommon.SetLogger(unicommon.NewConsoleLogger(unicommon.LogLevelDebug))
if len(os.Args) < 2 {
fmt.Printf("Syntax: go run main.go input.pdf\n")
os.Exit(1)
}
inputPath := os.Args[1]
fmt.Printf("Input file: %s\n", inputPath)
err := extractText(inputPath)
if err != nil {
fmt.Printf("Error: %v\n", err)
os.Exit(1)
}
}
func extractText(inputPath string) error {
f, err := os.Open(inputPath)
if err != nil {
return err
}
defer f.Close()
pdfReader, err := pdf.NewPdfReader(f)
if err != nil {
return err
}
numPages, err := pdfReader.GetNumPages()
if err != nil {
return err
} else if numPages < 1 {
return nil
}
page, err := pdfReader.GetPage(1)
if err != nil {
return err
}
e, err := extractor.New(page)
if err != nil {
return err
}
creator := creator.New()
newpage := pdf.NewPdfPage()
newpage.MediaBox = page.MediaBox
creator.AddPage(newpage)
texts, _, _, err := e.ExtractPageText()
if err != nil {
return err
}
comps := texts.TextComponents()
count := 0
for _, m := range comps {
// Only parse first 1000 characters for brevity
if count > 1000 {
continue
}
count++
p := creator.NewParagraph(m.Text)
// Add font to page
if newpage.HasFontByName(pdfcore.PdfObjectName(m.Font.BaseFont())) == false {
fmt.Printf("%s font not found in page, adding it\n", pdfcore.PdfObjectName(m.Font.BaseFont()))
newpage.AddFont(pdfcore.PdfObjectName(m.Font.BaseFont()), m.Font.ToPdfObject())
}
font, found := newpage.Resources.GetFontByName(pdfcore.PdfObjectName(m.Font.BaseFont()))
if found {
newFont, err := pdfmodel.NewPdfFontFromPdfObject(font)
if err != nil {
fmt.Println("error is: ", err)
} else {
p.SetFont(newFont)
fmt.Println("font set:", newFont.BaseFont())
}
}
yPos := page.MediaBox.Height() - (m.Y + m.Height)
p.SetPos(m.X, yPos)
// p.SetFontSize(m.Font)
// p.SetWidth(m.Width)
// p.SetFont(m.Font)
// p.SetLineHeight(m.Height)
creator.Draw(p)
}
err = creator.WriteToFile("new.pdf")
if err != nil {
return err
}
return nil
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment