Created
April 26, 2020 20:38
-
-
Save dejadejade/75149a85be36da0d05e1ec9c6b2f5297 to your computer and use it in GitHub Desktop.
images to pdf
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"archive/zip" | |
"bytes" | |
"flag" | |
"fmt" | |
"image" | |
"image/color" | |
_ "image/jpeg" | |
_ "image/png" | |
"io" | |
"io/ioutil" | |
"log" | |
"os" | |
"path" | |
"path/filepath" | |
"sort" | |
"strings" | |
"unsafe" | |
) | |
//#cgo LDFLAGS: libjbig2enc.a -lpng -llept | |
//#include <stdlib.h> | |
//#include <stdio.h> | |
// | |
//struct Pix; | |
//typedef struct Pix PIX; | |
//void pixDestroy ( PIX **ppix ); | |
//PIX * pixReadMemPng ( const uint8_t*filedata, size_t filesize ); | |
//PIX * pixRemoveColormap ( PIX *pixs, int type ); | |
// | |
//struct jbig2ctx; | |
//typedef struct jbig2ctx JB2; | |
//JB2 *jb2Init(float thresh, float weight, int xres, int yres, int full_headers, int refine_level); | |
//void jb2Destroy(JB2 *ctx); | |
//void jb2AddPage(JB2 *ctx, PIX *bw); | |
//uint8_t *jb2ProducePage(JB2 *ctx, int page_no, int xres, int yres, int *const length); | |
//uint8_t *jb2PagesComplete(JB2 *ctx, int *const length); | |
import "C" | |
func ref(x int) string { | |
return fmt.Sprintf("%d 0 R", x) | |
} | |
type Dict map[string]string | |
func (d Dict) Set(key string, val string) { | |
d[key] = val | |
} | |
func (d Dict) String() string { | |
var s []string | |
s = append(s, "<< ") | |
for k, v := range d { | |
s = append(s, fmt.Sprintf("/%s ", k), v, "\n") | |
} | |
s = append(s, ">>\n") | |
return strings.Join(s, "") | |
} | |
type Obj struct { | |
id int | |
dict Dict | |
stream *bytes.Buffer | |
} | |
func NewObj(id int, dict Dict, stream *bytes.Buffer) *Obj { | |
obj := &Obj{id: id, dict: dict, stream: stream} | |
if obj.stream != nil { | |
obj.Set("Length", fmt.Sprintf("%d", stream.Len())) | |
} | |
return obj | |
} | |
func (o *Obj) String() string { | |
var s []string | |
s = append(s, o.dict.String()) | |
if o.stream != nil { | |
s = append(s, "stream\n", o.stream.String(), "\nendstream\n") | |
} | |
s = append(s, "endobj\n") | |
return strings.Join(s, "") | |
} | |
func (o *Obj) Set(key string, val string) { | |
o.dict.Set(key, val) | |
} | |
type Doc struct { | |
next_id int | |
objs []*Obj | |
pages []*Obj | |
pages_obj *Obj | |
} | |
func NewDoc() *Doc { | |
return &Doc{next_id: 1} | |
} | |
func (d *Doc) NextID() int { | |
id := d.next_id | |
d.next_id += 1 | |
return id | |
} | |
func (d *Doc) AddObject(obj *Obj) *Obj { | |
d.objs = append(d.objs, obj) | |
return obj | |
} | |
func (d *Doc) AddNewObject(m Dict, stream *bytes.Buffer) *Obj { | |
return d.AddObject(NewObj(d.NextID(), m, stream)) | |
} | |
func (d *Doc) String() string { | |
var a []string | |
var offsets []int | |
var j0 int | |
add := func(x string) { | |
a = append(a, x) | |
j0 += len(x) + 1 | |
} | |
add("%PDF-1.4") | |
for _, o := range d.objs { | |
offsets = append(offsets, j0) | |
add(fmt.Sprintf("%d 0 obj", o.id)) | |
add(o.String()) | |
} | |
xrefstart := j0 | |
a = append(a, "xref") | |
a = append(a, fmt.Sprintf("0 %d", len(offsets)+1)) | |
a = append(a, "0000000000 65535 f ") | |
for _, o := range offsets { | |
a = append(a, fmt.Sprintf("%010d 00000 n ", o)) | |
} | |
a = append(a, "") | |
a = append(a, "trailer") | |
a = append(a, fmt.Sprintf("<< /Size %d\n/Root 1 0 R >>", len(offsets)+1)) | |
a = append(a, "startxref") | |
a = append(a, fmt.Sprintf("%d", xrefstart)) | |
a = append(a, "%%EOF") | |
return strings.Join(a, "\n") | |
} | |
type Image struct { | |
name, format string | |
width, height int | |
color_space string | |
bits_per_component int | |
filter string | |
decode_params string | |
data *bytes.Buffer | |
jbig2_page_no int | |
} | |
func NewJBIG2Image(name string, width, height int, data *bytes.Buffer) *Image { | |
img := &Image{ | |
name: name, | |
format: "jbig2", | |
width: width, | |
height: height, | |
color_space: "DeviceGray", | |
bits_per_component: 1, | |
filter: "JBIG2Decode", | |
data: data} | |
return img | |
} | |
func NewJPEGImage(name string, width, height int, cs string, data *bytes.Buffer) *Image { | |
return &Image{ | |
name: name, | |
format: "jpeg", | |
width: width, | |
height: height, | |
color_space: cs, | |
bits_per_component: 8, | |
filter: "DCTDecode", | |
data: data, | |
} | |
} | |
func (d *Doc) AddImagePage(img *Image) *Obj { | |
var xres, yres float32 | |
xres = 72.0 | |
yres = 72.0 | |
width := float32(img.width*72) / xres | |
height := float32(img.height*72) / yres | |
xobj := d.AddNewObject(Dict{"Type": "/XObject", "Subtype": "/Image", | |
"Width": fmt.Sprintf("%d", img.width), | |
"Height": fmt.Sprintf("%d", img.height), | |
"ColorSpace": "/" + img.color_space, | |
"BitsPerComponent": fmt.Sprintf("%d", img.bits_per_component), | |
"Filter": "/" + img.filter, | |
}, img.data) | |
if img.decode_params != "" { | |
xobj.Set("DecodeParms", img.decode_params) | |
} | |
s := fmt.Sprintf("q %f 0 0 %f 0 0 cm /Im1 Do Q", width, height) | |
contents := d.AddNewObject(Dict{}, bytes.NewBufferString(s)) | |
resources := d.AddNewObject(Dict{"ProcSet": "[/PDF /ImageB]", | |
"XObject": fmt.Sprintf("<< /Im1 %d 0 R >>", xobj.id)}, nil) | |
page := d.AddNewObject(Dict{"Type": "/Page", | |
"Parent": "3 0 R", | |
"MediaBox": fmt.Sprintf("[ 0 0 %f %f ]", width, height), | |
"Contents": ref(contents.id), | |
"Resources": ref(resources.id)}, nil) | |
d.pages = append(d.pages, page) | |
return page | |
} | |
func (d *Doc) Start() { | |
d.AddNewObject(Dict{"Type": "/Catalog", "Outlines": ref(2), "Pages": ref(3)}, nil) | |
d.AddNewObject(Dict{"Type": "/Outlines", "Count": "0"}, nil) | |
d.pages_obj = d.AddNewObject(Dict{"Type": "/Pages"}, nil) | |
} | |
func (d *Doc) Finish() { | |
d.pages_obj.Set("Count", fmt.Sprintf("%d", len(d.pages))) | |
var pids []string | |
for _, page := range d.pages { | |
pids = append(pids, ref(page.id)) | |
} | |
d.pages_obj.Set("Kids", fmt.Sprintf("[%s]", strings.Join(pids, " "))) | |
} | |
// convert a list of image files to pdf | |
func NewPDF(files []string, sortNames bool) (*Doc, error) { | |
var images []*Image | |
for _, file := range files { | |
var data bytes.Buffer | |
r, err := os.Open(file) | |
if err != nil { | |
log.Printf("Failed to open %s, %v\n", file, err) | |
return nil, err | |
} | |
if _, err = data.ReadFrom(r); err != nil { | |
log.Printf("Failed to read from %s, %v\n", file, err) | |
return nil, err | |
} | |
r.Close() | |
bs := data.Bytes() | |
config, format, err := image.DecodeConfig(bytes.NewReader(bs)) | |
if err != nil { | |
log.Printf("Failed to decode header for %s, %v\n", file, err) | |
continue | |
} | |
name := path.Base(file) | |
var image *Image | |
if format == "jpeg" { | |
var cs string | |
switch config.ColorModel { | |
case color.GrayModel: | |
cs = "DeviceGray" | |
case color.YCbCrModel: | |
cs = "DeviceRGB" | |
case color.CMYKModel: | |
cs = "DeviceCMYK" | |
default: | |
} | |
// log.Printf("new JPEG page %s: %d\n", name, data.Len()) | |
image = NewJPEGImage(name, config.Width, config.Height, cs, &data) | |
} else if format == "png" { | |
if pal, ok := config.ColorModel.(color.Palette); ok && len(pal) == 2 { | |
image = NewJBIG2Image(name, config.Width, config.Height, &data) | |
} else { | |
log.Printf("Unsupported PNG: %s\n", file) | |
return nil, fmt.Errorf("Unsupported PNG %s", file) | |
} | |
} else { | |
log.Printf("Unsupported format: %s, %s\n", file, format) | |
return nil, fmt.Errorf("Unsupported format %s (%s)", file, format) | |
} | |
if image == nil { | |
log.Printf("No image found for %s\n", file) | |
continue | |
} | |
images = append(images, image) | |
} | |
if sortNames { | |
sort.Sort(ByImageName(images)) | |
} | |
var numPages int | |
ctx := C.jb2Init(0.85, 0.5, 0, 0, 0, -1) | |
log.Printf("Processing %d images...\n", len(images)) | |
// adding all pages to jbig2 encoder | |
for _, img := range images { | |
if img.format != "jbig2" { | |
continue | |
} | |
bs := img.data.Bytes() | |
pix := C.pixReadMemPng((*C.uint8_t)(unsafe.Pointer(&bs[0])), C.size_t(len(bs))) | |
if pix == nil { | |
log.Printf("Failed to decode png: %s\n", img.name) | |
return nil, fmt.Errorf("Failed to decode png: %s\n", img.name) | |
} | |
pixl := C.pixRemoveColormap(pix, 4) | |
if pixl == nil { | |
log.Printf("Failed to remove colormap: %s\n", img.name) | |
return nil, fmt.Errorf("Failed to remove colormap: %s\n", img.name) | |
} | |
C.pixDestroy(&pix) | |
C.jb2AddPage(ctx, pixl) | |
C.pixDestroy(&pixl) | |
log.Printf("adding page %d %s\n", numPages, img.name) | |
img.jbig2_page_no = numPages + 1 | |
numPages += 1 | |
} | |
log.Printf("Added %d images...\n", numPages) | |
var sym []byte | |
if numPages > 0 { | |
var symlen C.int = 0 | |
csym := C.jb2PagesComplete(ctx, &symlen) | |
if csym == nil { | |
log.Printf("No sym found") | |
return nil, fmt.Errorf("No sym") | |
} | |
log.Printf("symlen %d\n", symlen) | |
sym = C.GoBytes(unsafe.Pointer(csym), symlen) | |
C.free(unsafe.Pointer(csym)) | |
for _, img := range images { | |
if img.jbig2_page_no == 0 { | |
continue | |
} | |
var datalen C.int = 0 | |
data := C.jb2ProducePage(ctx, C.int(img.jbig2_page_no-1), -1, -2, &datalen) | |
if data == nil { | |
log.Printf("Failed to produce page for %s\n", img.name) | |
return nil, fmt.Errorf("Failed to produce page for %s\n", img.name) | |
} | |
img.data = bytes.NewBuffer(C.GoBytes(unsafe.Pointer(data), datalen)) | |
// log.Printf("page %s len %d\n", img.name, datalen) | |
C.free(unsafe.Pointer(data)) | |
} | |
} | |
doc := NewDoc() | |
doc.Start() | |
var symid int | |
if len(sym) > 0 { | |
symd := doc.AddNewObject(Dict{}, bytes.NewBuffer(sym)) | |
symid = symd.id | |
} | |
// output pages | |
for _, img := range images { | |
if img.format == "jbig2" { | |
img.decode_params = fmt.Sprintf(" << /JBIG2Globals %d 0 R >>", symid) | |
} | |
doc.AddImagePage(img) | |
} | |
doc.Finish() | |
return doc, nil | |
} | |
const NameLen = len("000001.pdg") | |
var ss = flag.Bool("ss", false, "enable ssreader mode") | |
func main() { | |
flag.Parse() | |
args := flag.Args() | |
if len(args) == 0 { | |
log.Printf("Usage: %s dir/zipfile/imagefile\n", os.Args[0]) | |
return | |
} | |
target := args[0] | |
info, err := os.Stat(target) | |
if err != nil { | |
log.Fatal(err) | |
} | |
var files []string | |
if info.IsDir() { | |
fis, err := ioutil.ReadDir(target) | |
if err != nil { | |
log.Fatal(err) | |
} | |
for _, fi := range fis { | |
if fi.Mode().IsRegular() { | |
files = append(files, filepath.Join(target, fi.Name())) | |
} | |
} | |
} else if info.Mode().IsRegular() { | |
ext := strings.ToLower(filepath.Ext(target)) | |
if ext == "zip" { | |
log.Printf("Extract files from %s\n", target) | |
dir := path.Base(target) + "_tmp" | |
err = os.Mkdir(dir, 0755) | |
if err != nil { | |
log.Fatal(err) | |
} | |
files, err = extractZipFiles(target, dir) | |
if err != nil { | |
log.Fatal(err) | |
} | |
defer os.RemoveAll(dir) | |
} else { | |
files = append(files, target) | |
} | |
} | |
var images []string | |
for _, file := range files { | |
if *ss && len(path.Base(file)) != NameLen { | |
log.Printf("Skipping file %s\n", file) | |
continue | |
} | |
images = append(images, file) | |
} | |
log.Printf("Processing %d files from %s\n", len(images), target) | |
sortNames := true | |
doc, err := NewPDF(images, sortNames) | |
if err != nil { | |
log.Fatal(err) | |
} | |
outfile := strings.TrimSuffix(path.Base(target), filepath.Ext(target)) + ".pdf" | |
log.Printf("Outputing file to %s\n", outfile) | |
ioutil.WriteFile(outfile, []byte(doc.String()), 0644) | |
} | |
//pdg | |
func tag(name string) int { | |
if len(name) < 2 { | |
return 10 | |
} | |
switch name[:2] { | |
case "bo": | |
return 1 //bok | |
case "co": | |
return 2 //cov | |
case "fo": | |
return 3 //fow | |
case "!0": | |
return 4 //toc | |
case "00": | |
fallthrough | |
case "01": | |
fallthrough | |
case "02": | |
return 5 | |
case "le": | |
return 7 //leg | |
} | |
return 10 | |
} | |
type ByImageName []*Image | |
func (a ByImageName) Len() int { return len(a) } | |
func (a ByImageName) Swap(i, j int) { a[i], a[j] = a[j], a[i] } | |
func (a ByImageName) Less(i, j int) bool { | |
ti, tj := tag(a[i].name), tag(a[j].name) | |
if ti != tj { | |
return ti < tj | |
} | |
return a[i].name < a[j].name | |
} | |
//zip | |
func extractZipFiles(zipfile, dir string) ([]string, error) { | |
var files []string | |
r, err := zip.OpenReader(zipfile) | |
if err != nil { | |
return files, err | |
} | |
defer r.Close() | |
for _, f := range r.File { | |
fpath := filepath.Join(dir, path.Base(f.Name)) | |
if f.FileInfo().IsDir() { | |
continue | |
} | |
files = append(files, fpath) | |
outFile, err := os.OpenFile(fpath, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, f.Mode()) | |
if err != nil { | |
return files, err | |
} | |
rc, err := f.Open() | |
if err != nil { | |
return files, err | |
} | |
_, err = io.Copy(outFile, rc) | |
// Close the file without defer to close before next iteration of loop | |
outFile.Close() | |
rc.Close() | |
if err != nil { | |
return files, err | |
} | |
} | |
return files, nil | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment