Skip to content

Instantly share code, notes, and snippets.

@klauspost
Created February 19, 2015 14:41
Show Gist options
  • Save klauspost/71fc98a9aa0dcd2676df to your computer and use it in GitHub Desktop.
Save klauspost/71fc98a9aa0dcd2676df to your computer and use it in GitHub Desktop.
Integer version of the scaler, with fast path for RGBA.
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package draw
import (
"image"
"image/color"
"math"
)
// Scale scales the part of the source image defined by src and sr and writes
// to the part of the destination image defined by dst and dr.
func Scale(dst Image, dr image.Rectangle, src image.Image, sr image.Rectangle, q Interpolator) {
NewScaler(dr.Size(), sr.Size(), q).Scale(dst, dr.Min, src, sr.Min)
}
// Scaler scales part of a source image, starting from sp, and writes to a
// destination image, starting from dp. The destination and source width and
// heights are pre-determined, as part of the Scaler.
//
// A Scaler is safe to use concurrently.
type Scaler interface {
Scale(dst Image, dp image.Point, src image.Image, sp image.Point)
}
// TODO: should Scale and NewScaler also take an Op argument?
// NewScaler returns a Scaler that scales a source image of the given size to a
// destination image of the given size.
func NewScaler(dstSize, srcSize image.Point, q Interpolator) Scaler {
dw := int32(dstSize.X)
dh := int32(dstSize.Y)
sw := int32(srcSize.X)
sh := int32(srcSize.Y)
if dw <= 0 || dh <= 0 || sw <= 0 || sh <= 0 {
return nopScaler{}
}
switch q.(type) {
case nearest:
return &nnScaler{
dw: dw,
dh: dh,
sw: sw,
sh: sh,
}
default:
return &scaler{
dw: dw,
dh: dh,
sw: sw,
sh: sh,
horizontal: newDistrib(dw, sw, q),
vertical: newDistrib(dh, sh, q),
}
}
}
type nopScaler struct{}
func (nopScaler) Scale(dst Image, dp image.Point, src image.Image, sp image.Point) {}
// nnScaler implements a nearest-neighbor image scaler.
type nnScaler struct {
dw, dh, sw, sh int32
}
func (z *nnScaler) Scale(dst Image, dp image.Point, src image.Image, sp image.Point) {
dstColorRGBA64 := &color.RGBA64{}
dstColor := color.Color(dstColorRGBA64)
for dy := int32(0); dy < z.dh; dy++ {
sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh))
for dx := int32(0); dx < z.dw; dx++ {
sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw))
pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA()
dstColorRGBA64.R = uint16(pr)
dstColorRGBA64.G = uint16(pg)
dstColorRGBA64.B = uint16(pb)
dstColorRGBA64.A = uint16(pa)
dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor)
}
}
}
// scaler implements a Catmull-Rom image scaler.
type scaler struct {
dw, dh, sw, sh int32
horizontal, vertical distrib
}
func (z *scaler) Scale(dst Image, dp image.Point, src image.Image, sp image.Point) {
// TODO: is it worth having a sync.Pool for this temporary buffer?
tmp := make([][4]int32, z.dw*z.sh)
z.scaleX(tmp, src, sp)
z.scaleY(dst, dp, tmp)
}
// source is a range of contribs, their inverse total weight, and that ITW
// divided by 0xffff.
type source struct {
i, j int32
invTotalWeight float64
invTotalWeightFFFF float64
}
// contrib is the weight of a column or row.
type contrib struct {
coord int32
weight int32
}
// distrib measures how source pixels are distributed over destination pixels.
type distrib struct {
// sources are what contribs each column or row in the source image owns,
// and the total weight of those contribs.
sources []source
// contribs are the contributions indexed by sources[s].i and sources[s].j.
contribs []contrib
}
const scaleBits = 14
const scaleVal = 1 << 14
const scaleRounder = scaleVal / 2
// newDistrib returns a distrib that distributes sw source columns (or rows)
// over dw destination columns (or rows).
func newDistrib(dw, sw int32, f Interpolator) distrib {
scale := float64(sw) / float64(dw)
halfWidth, kernelArgScale := float64(f.Support()), 1.0
if scale > 1 {
halfWidth *= scale
kernelArgScale = 1 / scale
}
// Make the sources slice, one source for each column or row, and temporarily
// appropriate its elements' fields so that invTotalWeight is the scaled
// co-ordinate of the source column or row, and i and j are the lower and
// upper bounds of the range of destination columns or rows affected by the
// source column or row.
n, sources := int32(0), make([]source, dw)
for x := range sources {
center := (float64(x)+0.5)*scale - 0.5
i := int32(math.Floor(center - halfWidth))
if i < 0 {
i = 0
}
j := int32(math.Ceil(center + halfWidth))
if j >= sw {
j = sw - 1
if j < i {
j = i
}
}
sources[x] = source{i: i, j: j, invTotalWeight: center}
n += j - i + 1
}
contribs := make([]contrib, 0, n)
p := make([]float64, dw)
for k, b := range sources {
totalWeight := 0.0
l := int32(len(contribs))
cc := 0
for coord := b.i; coord <= b.j; coord++ {
weight := f.F((b.invTotalWeight - float64(coord)) * kernelArgScale)
if weight == 0 {
continue
}
totalWeight += weight
contribs = append(contribs, contrib{coord, 0})
p[cc] = weight
cc++
}
totalWeight = 1 / totalWeight
for c := range contribs[l:] {
contribs[c+int(l)].weight = int32(totalWeight * scaleVal * p[c])
}
sources[k] = source{
i: l,
j: int32(len(contribs)),
}
}
return distrib{sources, contribs}
}
var testFast = true
// scaleX distributes the source image's columns over the temporary image.
func (z *scaler) scaleX(tmp [][4]int32, src image.Image, sp image.Point) {
switch s := src.(type) {
case *image.RGBA:
if testFast {
z.scaleXRGBA(tmp, s, sp)
return
}
}
t := 0
for y := int32(0); y < z.sh; y++ {
for _, s := range z.horizontal.sources {
var r, g, b, a int32
for _, c := range z.horizontal.contribs[s.i:s.j] {
rr, gg, bb, aa := src.At(sp.X+int(c.coord), sp.Y+int(y)).RGBA()
r += int32(rr) * c.weight
g += int32(gg) * c.weight
b += int32(bb) * c.weight
a += int32(aa) * c.weight
}
tmp[t] = [4]int32{
(r + scaleRounder) >> scaleBits,
(g + scaleRounder) >> scaleBits,
(b + scaleRounder) >> scaleBits,
(a + scaleRounder) >> scaleBits,
}
t++
}
}
}
// scaleX distributes the source image's columns over the temporary image.
func (z *scaler) scaleXRGBA(tmp [][4]int32, src *image.RGBA, sp image.Point) {
t := 0
atPoint := src.Pix[src.PixOffset(sp.X, sp.Y):]
for y := int32(0); y < z.sh; y++ {
for _, s := range z.horizontal.sources {
var r, g, b, a int32
line := atPoint[int(y)*src.Stride:]
for _, c := range z.horizontal.contribs[s.i:s.j] {
coord := int(c.coord) * 4
weight := c.weight * 0x101
r += int32(line[coord]) * weight
g += int32(line[coord+1]) * weight
b += int32(line[coord+2]) * weight
a += int32(line[coord+3]) * weight
}
tmp[t] = [4]int32{
(r + scaleRounder) >> scaleBits,
(g + scaleRounder) >> scaleBits,
(b + scaleRounder) >> scaleBits,
(a + scaleRounder) >> scaleBits,
}
t++
}
}
}
// scaleY distributes the temporary image's rows over the destination image.
func (z *scaler) scaleY(dst Image, dp image.Point, tmp [][4]int32) {
switch d := dst.(type) {
case *image.RGBA:
if testFast {
z.scaleYRGBA(d, dp, tmp)
return
}
}
panic("dieY")
dstColorRGBA64 := &color.RGBA64{}
dstColor := color.Color(dstColorRGBA64)
for x := int32(0); x < z.dw; x++ {
for y, s := range z.vertical.sources {
var r, g, b, a int32
for _, c := range z.vertical.contribs[s.i:s.j] {
p := &tmp[c.coord*z.dw+x]
r += p[0] * c.weight
g += p[1] * c.weight
b += p[2] * c.weight
a += p[3] * c.weight
}
dstColorRGBA64.R = clamp((r + scaleRounder) >> scaleBits)
dstColorRGBA64.G = clamp((g + scaleRounder) >> scaleBits)
dstColorRGBA64.B = clamp((b + scaleRounder) >> scaleBits)
dstColorRGBA64.A = clamp((a + scaleRounder) >> scaleBits)
dst.Set(dp.X+int(x), dp.Y+y, dstColor)
}
}
}
// scaleY distributes the temporary image's rows over the destination image.
func (z *scaler) scaleYRGBA(dst *image.RGBA, dp image.Point, tmp [][4]int32) {
dstPoint := dst.Pix[dst.PixOffset(dp.X, dp.Y):]
for x := int32(0); x < z.dw; x++ {
xpos := dstPoint[int(x)*4:]
for y, s := range z.vertical.sources {
var r, g, b, a int32
for _, c := range z.vertical.contribs[s.i:s.j] {
p := &tmp[c.coord*z.dw+x]
r += p[0] * c.weight
g += p[1] * c.weight
b += p[2] * c.weight
a += p[3] * c.weight
}
ypos := y * dst.Stride
xpos[ypos] = clamp8((r + scaleRounder*256) >> (scaleBits + 8))
xpos[ypos+1] = clamp8((g + scaleRounder*256) >> (scaleBits + 8))
xpos[ypos+2] = clamp8((b + scaleRounder*256) >> (scaleBits + 8))
xpos[ypos+3] = clamp8((a + scaleRounder*256) >> (scaleBits + 8))
}
}
}
// The Interpolator defines the quality and speed of the resize operation.
//
// There are built-in types that have different speed/quality tradeoffs.
// In order from fastest to slowest are:
//
// * NearestNeighbor
// * Linear
// * CatmullRom
// * Lanczos
type Interpolator interface {
F(float64) float64
Support() uint
}
// Fastest resample filter, no antialiasing at all.
// This should only be used if speed is essential.
func NearestNeighbor() Interpolator {
return nearest{}
}
// Nearest is a special case, so only satisfy the interface.
type nearest struct{}
func (f nearest) F(t float64) float64 { return 0 }
func (f nearest) Support() uint { return 0 }
// Bilinear interpolation filter, produces reasonably good, smooth output. It's faster than cubic filters.
func Linear() Interpolator {
return linear{}
}
type linear struct{}
func (f linear) F(x float64) float64 {
x = math.Abs(x)
if x < 1.0 {
return 1.0 - x
}
return 0
}
func (f linear) Support() uint {
return 1
}
// CatmullRom is the Catmull-Rom kernel.
//
// It is an instance of the more general cubic BC-spline kernel with parameters
// B=0 and C=0.5. See Mitchell and Netravali, "Reconstruction Filters in
// Computer Graphics", Computer Graphics, Vol. 22, No. 4, pp. 221-228.
func CatmullRom() Interpolator {
return catmullRom{}
}
type catmullRom struct{}
func (f catmullRom) F(t float64) float64 {
if t < 0 {
t = -t
}
if t < 1 {
return (1.5*t-2.5)*t*t + 1
}
if t < 2 {
return ((-0.5*t+2.5)*t-4)*t + 2
}
return 0
}
func (f catmullRom) Support() uint {
return 2
}
// Lanczos Interpolator
//
// Probably the best resampling filter for photographic images yielding sharp results,
// but it's slower than cubic filters.
func Lanczos() Interpolator {
var lobes uint = 3
return lanczos{Lobes: lobes, FloatLobes: float64(lobes), InvLobes: 1.0 / float64(lobes)}
}
// Lanczos Interpolator with custom number of lobes
//
// It is recommended to use between 3 and 7 lobes.
func LanczosLobes(lobes uint) Interpolator {
return lanczos{Lobes: lobes, FloatLobes: float64(lobes), InvLobes: 1.0 / float64(lobes)}
}
type lanczos struct {
Lobes uint
FloatLobes float64
InvLobes float64
}
func (f lanczos) F(x float64) float64 {
x = math.Abs(x)
if x < f.FloatLobes {
return sinc(x) * sinc(x*f.InvLobes)
}
return 0
}
func (f lanczos) Support() uint {
return f.Lobes
}
func sinc(x float64) float64 {
if x == 0.0 {
return 1.0
}
return math.Sin(math.Pi*x) / (math.Pi * x)
}
func clamp(i int32) uint16 {
if i > 0xffff {
return 0xffff
} else if i > 0 {
return uint16(i)
}
return 0
}
func clamp8(i int32) uint8 {
if i > 0xff {
return 0xff
} else if i > 0 {
return uint8(i)
}
return 0
}
func ftou(f float64) uint16 {
i := int32(0xffff*f + 0.5)
if i > 0xffff {
return 0xffff
} else if i > 0 {
return uint16(i)
}
return 0
}
@klauspost
Copy link
Author

Comparison RGBA->RGBA:

benchmark                old ns/op     new ns/op     delta
BenchmarkScaleDownNN     2348798       2507318       +6.75%
BenchmarkScaleDownBL     5604045       2237284       -60.08%
BenchmarkScaleDownCR     34474378      3477441       -89.91%
BenchmarkScaleUpNN       76634730      81910405      +6.88%
BenchmarkScaleUpBL       183123250     18677372      -89.80%
BenchmarkScaleUpCR       154769660     24703136      -84.04%

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment