Skip to content

Instantly share code, notes, and snippets.

@akkuman
Created April 2, 2020 09:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save akkuman/703910946c8f01bcf38d254d513fe935 to your computer and use it in GitHub Desktop.
Save akkuman/703910946c8f01bcf38d254d513fe935 to your computer and use it in GitHub Desktop.
获取pdf中的文字
package main
import (
"fmt"
"os"
"github.com/ledongthuc/pdf"
)
func main() {
content, err := readPdf(os.Args[1]) // Read local pdf file
if err != nil {
panic(err)
}
fmt.Println(content)
return
}
func readPdf(path string) (text string, err error) {
f, r, err := pdf.Open(path)
r.GetPlainText()
if err != nil {
return
}
defer f.Close()
totalPage := r.NumPage()
for pageIndex := 1; pageIndex <= totalPage; pageIndex++ {
p := r.Page(pageIndex)
if p.V.IsNull() {
continue
}
rows, err := p.GetTextByRow()
if err != nil {
continue
}
for _, row := range rows {
for _, word := range row.Content {
text += word.S
}
}
}
return
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment