Created
February 19, 2015 14:41
-
-
Save klauspost/71fc98a9aa0dcd2676df to your computer and use it in GitHub Desktop.
Integer version of the scaler, with fast path for RGBA.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Copyright 2014 The Go Authors. All rights reserved. | |
// Use of this source code is governed by a BSD-style | |
// license that can be found in the LICENSE file. | |
package draw | |
import ( | |
"image" | |
"image/color" | |
"math" | |
) | |
// Scale scales the part of the source image defined by src and sr and writes | |
// to the part of the destination image defined by dst and dr. | |
func Scale(dst Image, dr image.Rectangle, src image.Image, sr image.Rectangle, q Interpolator) { | |
NewScaler(dr.Size(), sr.Size(), q).Scale(dst, dr.Min, src, sr.Min) | |
} | |
// Scaler scales part of a source image, starting from sp, and writes to a | |
// destination image, starting from dp. The destination and source width and | |
// heights are pre-determined, as part of the Scaler. | |
// | |
// A Scaler is safe to use concurrently. | |
type Scaler interface { | |
Scale(dst Image, dp image.Point, src image.Image, sp image.Point) | |
} | |
// TODO: should Scale and NewScaler also take an Op argument? | |
// NewScaler returns a Scaler that scales a source image of the given size to a | |
// destination image of the given size. | |
func NewScaler(dstSize, srcSize image.Point, q Interpolator) Scaler { | |
dw := int32(dstSize.X) | |
dh := int32(dstSize.Y) | |
sw := int32(srcSize.X) | |
sh := int32(srcSize.Y) | |
if dw <= 0 || dh <= 0 || sw <= 0 || sh <= 0 { | |
return nopScaler{} | |
} | |
switch q.(type) { | |
case nearest: | |
return &nnScaler{ | |
dw: dw, | |
dh: dh, | |
sw: sw, | |
sh: sh, | |
} | |
default: | |
return &scaler{ | |
dw: dw, | |
dh: dh, | |
sw: sw, | |
sh: sh, | |
horizontal: newDistrib(dw, sw, q), | |
vertical: newDistrib(dh, sh, q), | |
} | |
} | |
} | |
type nopScaler struct{} | |
func (nopScaler) Scale(dst Image, dp image.Point, src image.Image, sp image.Point) {} | |
// nnScaler implements a nearest-neighbor image scaler. | |
type nnScaler struct { | |
dw, dh, sw, sh int32 | |
} | |
func (z *nnScaler) Scale(dst Image, dp image.Point, src image.Image, sp image.Point) { | |
dstColorRGBA64 := &color.RGBA64{} | |
dstColor := color.Color(dstColorRGBA64) | |
for dy := int32(0); dy < z.dh; dy++ { | |
sy := (2*uint64(dy) + 1) * uint64(z.sh) / (2 * uint64(z.dh)) | |
for dx := int32(0); dx < z.dw; dx++ { | |
sx := (2*uint64(dx) + 1) * uint64(z.sw) / (2 * uint64(z.dw)) | |
pr, pg, pb, pa := src.At(sp.X+int(sx), sp.Y+int(sy)).RGBA() | |
dstColorRGBA64.R = uint16(pr) | |
dstColorRGBA64.G = uint16(pg) | |
dstColorRGBA64.B = uint16(pb) | |
dstColorRGBA64.A = uint16(pa) | |
dst.Set(dp.X+int(dx), dp.Y+int(dy), dstColor) | |
} | |
} | |
} | |
// scaler implements a Catmull-Rom image scaler. | |
type scaler struct { | |
dw, dh, sw, sh int32 | |
horizontal, vertical distrib | |
} | |
func (z *scaler) Scale(dst Image, dp image.Point, src image.Image, sp image.Point) { | |
// TODO: is it worth having a sync.Pool for this temporary buffer? | |
tmp := make([][4]int32, z.dw*z.sh) | |
z.scaleX(tmp, src, sp) | |
z.scaleY(dst, dp, tmp) | |
} | |
// source is a range of contribs, their inverse total weight, and that ITW | |
// divided by 0xffff. | |
type source struct { | |
i, j int32 | |
invTotalWeight float64 | |
invTotalWeightFFFF float64 | |
} | |
// contrib is the weight of a column or row. | |
type contrib struct { | |
coord int32 | |
weight int32 | |
} | |
// distrib measures how source pixels are distributed over destination pixels. | |
type distrib struct { | |
// sources are what contribs each column or row in the source image owns, | |
// and the total weight of those contribs. | |
sources []source | |
// contribs are the contributions indexed by sources[s].i and sources[s].j. | |
contribs []contrib | |
} | |
const scaleBits = 14 | |
const scaleVal = 1 << 14 | |
const scaleRounder = scaleVal / 2 | |
// newDistrib returns a distrib that distributes sw source columns (or rows) | |
// over dw destination columns (or rows). | |
func newDistrib(dw, sw int32, f Interpolator) distrib { | |
scale := float64(sw) / float64(dw) | |
halfWidth, kernelArgScale := float64(f.Support()), 1.0 | |
if scale > 1 { | |
halfWidth *= scale | |
kernelArgScale = 1 / scale | |
} | |
// Make the sources slice, one source for each column or row, and temporarily | |
// appropriate its elements' fields so that invTotalWeight is the scaled | |
// co-ordinate of the source column or row, and i and j are the lower and | |
// upper bounds of the range of destination columns or rows affected by the | |
// source column or row. | |
n, sources := int32(0), make([]source, dw) | |
for x := range sources { | |
center := (float64(x)+0.5)*scale - 0.5 | |
i := int32(math.Floor(center - halfWidth)) | |
if i < 0 { | |
i = 0 | |
} | |
j := int32(math.Ceil(center + halfWidth)) | |
if j >= sw { | |
j = sw - 1 | |
if j < i { | |
j = i | |
} | |
} | |
sources[x] = source{i: i, j: j, invTotalWeight: center} | |
n += j - i + 1 | |
} | |
contribs := make([]contrib, 0, n) | |
p := make([]float64, dw) | |
for k, b := range sources { | |
totalWeight := 0.0 | |
l := int32(len(contribs)) | |
cc := 0 | |
for coord := b.i; coord <= b.j; coord++ { | |
weight := f.F((b.invTotalWeight - float64(coord)) * kernelArgScale) | |
if weight == 0 { | |
continue | |
} | |
totalWeight += weight | |
contribs = append(contribs, contrib{coord, 0}) | |
p[cc] = weight | |
cc++ | |
} | |
totalWeight = 1 / totalWeight | |
for c := range contribs[l:] { | |
contribs[c+int(l)].weight = int32(totalWeight * scaleVal * p[c]) | |
} | |
sources[k] = source{ | |
i: l, | |
j: int32(len(contribs)), | |
} | |
} | |
return distrib{sources, contribs} | |
} | |
var testFast = true | |
// scaleX distributes the source image's columns over the temporary image. | |
func (z *scaler) scaleX(tmp [][4]int32, src image.Image, sp image.Point) { | |
switch s := src.(type) { | |
case *image.RGBA: | |
if testFast { | |
z.scaleXRGBA(tmp, s, sp) | |
return | |
} | |
} | |
t := 0 | |
for y := int32(0); y < z.sh; y++ { | |
for _, s := range z.horizontal.sources { | |
var r, g, b, a int32 | |
for _, c := range z.horizontal.contribs[s.i:s.j] { | |
rr, gg, bb, aa := src.At(sp.X+int(c.coord), sp.Y+int(y)).RGBA() | |
r += int32(rr) * c.weight | |
g += int32(gg) * c.weight | |
b += int32(bb) * c.weight | |
a += int32(aa) * c.weight | |
} | |
tmp[t] = [4]int32{ | |
(r + scaleRounder) >> scaleBits, | |
(g + scaleRounder) >> scaleBits, | |
(b + scaleRounder) >> scaleBits, | |
(a + scaleRounder) >> scaleBits, | |
} | |
t++ | |
} | |
} | |
} | |
// scaleX distributes the source image's columns over the temporary image. | |
func (z *scaler) scaleXRGBA(tmp [][4]int32, src *image.RGBA, sp image.Point) { | |
t := 0 | |
atPoint := src.Pix[src.PixOffset(sp.X, sp.Y):] | |
for y := int32(0); y < z.sh; y++ { | |
for _, s := range z.horizontal.sources { | |
var r, g, b, a int32 | |
line := atPoint[int(y)*src.Stride:] | |
for _, c := range z.horizontal.contribs[s.i:s.j] { | |
coord := int(c.coord) * 4 | |
weight := c.weight * 0x101 | |
r += int32(line[coord]) * weight | |
g += int32(line[coord+1]) * weight | |
b += int32(line[coord+2]) * weight | |
a += int32(line[coord+3]) * weight | |
} | |
tmp[t] = [4]int32{ | |
(r + scaleRounder) >> scaleBits, | |
(g + scaleRounder) >> scaleBits, | |
(b + scaleRounder) >> scaleBits, | |
(a + scaleRounder) >> scaleBits, | |
} | |
t++ | |
} | |
} | |
} | |
// scaleY distributes the temporary image's rows over the destination image. | |
func (z *scaler) scaleY(dst Image, dp image.Point, tmp [][4]int32) { | |
switch d := dst.(type) { | |
case *image.RGBA: | |
if testFast { | |
z.scaleYRGBA(d, dp, tmp) | |
return | |
} | |
} | |
panic("dieY") | |
dstColorRGBA64 := &color.RGBA64{} | |
dstColor := color.Color(dstColorRGBA64) | |
for x := int32(0); x < z.dw; x++ { | |
for y, s := range z.vertical.sources { | |
var r, g, b, a int32 | |
for _, c := range z.vertical.contribs[s.i:s.j] { | |
p := &tmp[c.coord*z.dw+x] | |
r += p[0] * c.weight | |
g += p[1] * c.weight | |
b += p[2] * c.weight | |
a += p[3] * c.weight | |
} | |
dstColorRGBA64.R = clamp((r + scaleRounder) >> scaleBits) | |
dstColorRGBA64.G = clamp((g + scaleRounder) >> scaleBits) | |
dstColorRGBA64.B = clamp((b + scaleRounder) >> scaleBits) | |
dstColorRGBA64.A = clamp((a + scaleRounder) >> scaleBits) | |
dst.Set(dp.X+int(x), dp.Y+y, dstColor) | |
} | |
} | |
} | |
// scaleY distributes the temporary image's rows over the destination image. | |
func (z *scaler) scaleYRGBA(dst *image.RGBA, dp image.Point, tmp [][4]int32) { | |
dstPoint := dst.Pix[dst.PixOffset(dp.X, dp.Y):] | |
for x := int32(0); x < z.dw; x++ { | |
xpos := dstPoint[int(x)*4:] | |
for y, s := range z.vertical.sources { | |
var r, g, b, a int32 | |
for _, c := range z.vertical.contribs[s.i:s.j] { | |
p := &tmp[c.coord*z.dw+x] | |
r += p[0] * c.weight | |
g += p[1] * c.weight | |
b += p[2] * c.weight | |
a += p[3] * c.weight | |
} | |
ypos := y * dst.Stride | |
xpos[ypos] = clamp8((r + scaleRounder*256) >> (scaleBits + 8)) | |
xpos[ypos+1] = clamp8((g + scaleRounder*256) >> (scaleBits + 8)) | |
xpos[ypos+2] = clamp8((b + scaleRounder*256) >> (scaleBits + 8)) | |
xpos[ypos+3] = clamp8((a + scaleRounder*256) >> (scaleBits + 8)) | |
} | |
} | |
} | |
// The Interpolator defines the quality and speed of the resize operation. | |
// | |
// There are built-in types that have different speed/quality tradeoffs. | |
// In order from fastest to slowest are: | |
// | |
// * NearestNeighbor | |
// * Linear | |
// * CatmullRom | |
// * Lanczos | |
type Interpolator interface { | |
F(float64) float64 | |
Support() uint | |
} | |
// Fastest resample filter, no antialiasing at all. | |
// This should only be used if speed is essential. | |
func NearestNeighbor() Interpolator { | |
return nearest{} | |
} | |
// Nearest is a special case, so only satisfy the interface. | |
type nearest struct{} | |
func (f nearest) F(t float64) float64 { return 0 } | |
func (f nearest) Support() uint { return 0 } | |
// Bilinear interpolation filter, produces reasonably good, smooth output. It's faster than cubic filters. | |
func Linear() Interpolator { | |
return linear{} | |
} | |
type linear struct{} | |
func (f linear) F(x float64) float64 { | |
x = math.Abs(x) | |
if x < 1.0 { | |
return 1.0 - x | |
} | |
return 0 | |
} | |
func (f linear) Support() uint { | |
return 1 | |
} | |
// CatmullRom is the Catmull-Rom kernel. | |
// | |
// It is an instance of the more general cubic BC-spline kernel with parameters | |
// B=0 and C=0.5. See Mitchell and Netravali, "Reconstruction Filters in | |
// Computer Graphics", Computer Graphics, Vol. 22, No. 4, pp. 221-228. | |
func CatmullRom() Interpolator { | |
return catmullRom{} | |
} | |
type catmullRom struct{} | |
func (f catmullRom) F(t float64) float64 { | |
if t < 0 { | |
t = -t | |
} | |
if t < 1 { | |
return (1.5*t-2.5)*t*t + 1 | |
} | |
if t < 2 { | |
return ((-0.5*t+2.5)*t-4)*t + 2 | |
} | |
return 0 | |
} | |
func (f catmullRom) Support() uint { | |
return 2 | |
} | |
// Lanczos Interpolator | |
// | |
// Probably the best resampling filter for photographic images yielding sharp results, | |
// but it's slower than cubic filters. | |
func Lanczos() Interpolator { | |
var lobes uint = 3 | |
return lanczos{Lobes: lobes, FloatLobes: float64(lobes), InvLobes: 1.0 / float64(lobes)} | |
} | |
// Lanczos Interpolator with custom number of lobes | |
// | |
// It is recommended to use between 3 and 7 lobes. | |
func LanczosLobes(lobes uint) Interpolator { | |
return lanczos{Lobes: lobes, FloatLobes: float64(lobes), InvLobes: 1.0 / float64(lobes)} | |
} | |
type lanczos struct { | |
Lobes uint | |
FloatLobes float64 | |
InvLobes float64 | |
} | |
func (f lanczos) F(x float64) float64 { | |
x = math.Abs(x) | |
if x < f.FloatLobes { | |
return sinc(x) * sinc(x*f.InvLobes) | |
} | |
return 0 | |
} | |
func (f lanczos) Support() uint { | |
return f.Lobes | |
} | |
func sinc(x float64) float64 { | |
if x == 0.0 { | |
return 1.0 | |
} | |
return math.Sin(math.Pi*x) / (math.Pi * x) | |
} | |
func clamp(i int32) uint16 { | |
if i > 0xffff { | |
return 0xffff | |
} else if i > 0 { | |
return uint16(i) | |
} | |
return 0 | |
} | |
func clamp8(i int32) uint8 { | |
if i > 0xff { | |
return 0xff | |
} else if i > 0 { | |
return uint8(i) | |
} | |
return 0 | |
} | |
func ftou(f float64) uint16 { | |
i := int32(0xffff*f + 0.5) | |
if i > 0xffff { | |
return 0xffff | |
} else if i > 0 { | |
return uint16(i) | |
} | |
return 0 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Comparison RGBA->RGBA: