Created
March 16, 2020 12:19
-
-
Save AllanChen/c0cb96bf46d5959e0d2d274ed484cc83 to your computer and use it in GitHub Desktop.
yuv test
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// | |
// ViewCtrl_CPUEasyImageProcessing.m | |
// demo_ios | |
// | |
// Created by yyuser on 2018/10/19. | |
// | |
#import "ViewCtrl_CPUEasyImageProcessing.h" | |
#import "vn_core.h" | |
#import "vn_yuvconverter.h" | |
#import "VN_Tensor.h" | |
@interface ViewCtrl_CPUEasyImageProcessing () | |
@property (nonatomic, strong) UIImageView *imgView; | |
@property (nonatomic, assign) Venus::VenusYUVToolKit *yuvConverter; | |
@end | |
@implementation ViewCtrl_CPUEasyImageProcessing | |
- (void)dealloc { | |
delete _yuvConverter; | |
} | |
- (void)loadView { | |
[super loadView]; | |
[self.view addSubview:self.imgView]; | |
} | |
- (void)viewDidLoad { | |
_yuvConverter = nullptr; | |
[super viewDidLoad]; | |
// Do any additional setup after loading the view. | |
} | |
- (UIView *)imgView { | |
if (!_imgView) { | |
_imgView = [[UIImageView alloc] initWithImage:[UIImage imageWithContentsOfFile:[[[[NSBundle mainBundle] bundlePath] stringByAppendingPathComponent:@"UIResources"] stringByAppendingPathComponent:@"icon.png"]]]; | |
[_imgView setFrame:CGRectMake(SCREEN_WIDTH * 1 / 9.0 + (ACTUAL_SCREEN_WIDTH - SCREEN_WIDTH) / 2, | |
SCREEN_HEIGHT * 2 / 16.0 + (ACTUAL_SCREEN_HEIGHT - SCREEN_HEIGHT) / 2, | |
SCREEN_WIDTH * 7.0 / 9.0, | |
SCREEN_HEIGHT * 12.0 / 16.0)]; | |
_imgView.layer.cornerRadius = 8; | |
_imgView.layer.masksToBounds = YES; | |
_imgView.contentMode = UIViewContentModeScaleAspectFit; | |
} | |
return _imgView; | |
} | |
-(UIImage *)getUIImage_With_Height:(int)imH | |
Width:(int)imW | |
BGRADataU8:(u_char *)data_u8 { | |
int imW_align = (imW + 3) / 4 * 4; | |
int byte_per_row = imW * 4; | |
int byte_per_row_align = imW_align * 4; | |
UIGraphicsBeginImageContext(CGSizeMake(imW_align, imH)); | |
CGContextRef c = UIGraphicsGetCurrentContext(); | |
u_char* data_write = (u_char*)CGBitmapContextGetData(c); | |
u_char* data_read = data_u8; | |
if (data_write != NULL && data_read != NULL) { | |
for (int y = 0; y < imH; y++) { | |
memcpy(data_write, data_read, byte_per_row); | |
data_write += byte_per_row_align; | |
data_read += byte_per_row; | |
} | |
} | |
UIImage *img = UIGraphicsGetImageFromCurrentImageContext(); | |
UIGraphicsEndImageContext(); | |
return img; | |
} | |
- (void)videoCaptureCallback:(CVPixelBufferRef)pixelBuffer | |
{ | |
VN_Image input; | |
input.ori_fmt = VN_ORIENT_FMT_DEFAULT; | |
if (CVPixelBufferGetPlaneCount(pixelBuffer) == 0) { | |
CVPixelBufferLockBaseAddress(pixelBuffer, 0); | |
int iBytesPerRow = (int)CVPixelBufferGetBytesPerRow(pixelBuffer); | |
int iHeight = (int)CVPixelBufferGetHeight(pixelBuffer); | |
int iWidth = (int)CVPixelBufferGetWidth(pixelBuffer); | |
uint8_t *baseAddress = (uint8_t*)CVPixelBufferGetBaseAddress(pixelBuffer); | |
unsigned char *ptr_indata = new unsigned char[iWidth * iHeight * 4]; | |
memcpy(ptr_indata, baseAddress, iWidth * iHeight * 4); | |
input.width = iWidth; | |
input.height = iHeight; | |
input.channels = 4; | |
input.pix_fmt = VN_PIX_FMT_BGRA8888; | |
input.data = ptr_indata; | |
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0); | |
} | |
else { | |
CVPixelBufferLockBaseAddress(pixelBuffer, 0); | |
int iBytesPerRow = (int)CVPixelBufferGetBytesPerRow(pixelBuffer); | |
int iHeight = (int)CVPixelBufferGetHeight(pixelBuffer); | |
int iWidth = (int)CVPixelBufferGetWidth(pixelBuffer); | |
int bytePerRowPlane0 = (int)CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 0); | |
int bytePerRowPlane1 = (int)CVPixelBufferGetBytesPerRowOfPlane(pixelBuffer, 1); | |
uint8_t *baseAddress = (uint8_t*)CVPixelBufferGetBaseAddress(pixelBuffer); | |
unsigned char *ptr_indata = new unsigned char[iWidth * iHeight + iWidth * iHeight / 2];//pixelBuffer;//CVPixelBufferGetBaseAddress(pixelBuffer); | |
{ | |
unsigned char *ptr_indata_temp = ptr_indata; | |
unsigned char *ptr_pixdata_temp0 = (unsigned char *)CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 0); | |
for (int r = 0; r < iHeight; r++) { | |
memcpy(ptr_indata_temp, ptr_pixdata_temp0, iWidth); | |
ptr_indata_temp += iWidth; | |
ptr_pixdata_temp0 += bytePerRowPlane0; | |
} | |
} | |
{ | |
unsigned char *ptr_indata_temp = ptr_indata + iWidth * iHeight; | |
unsigned char *ptr_pixdata_temp1 = (unsigned char *)CVPixelBufferGetBaseAddressOfPlane(pixelBuffer, 1); | |
for (int r = 0; r < iHeight / 2; r++) { | |
memcpy(ptr_indata_temp, ptr_pixdata_temp1, iWidth); | |
ptr_indata_temp += iWidth; | |
ptr_pixdata_temp1 += bytePerRowPlane1; | |
} | |
} | |
input.width = iWidth; | |
input.height = iHeight; | |
input.channels = 0; | |
input.pix_fmt = VN_PIX_FMT_YUV420F; | |
input.data = ptr_indata; | |
CVPixelBufferUnlockBaseAddress(pixelBuffer, 0); | |
} | |
VN_Image outCvt; { | |
outCvt.channels = 4; | |
outCvt.width = 720; | |
outCvt.height = 1280; | |
outCvt.pix_fmt = VN_PIX_FMT_RGB888; | |
outCvt.ori_fmt = VN_ORIENT_FMT_DEFAULT; | |
outCvt.data = calloc(outCvt.width * outCvt.height * outCvt.channels, sizeof(float)); | |
} | |
if ((!_yuvConverter) || | |
(_yuvConverter->_height_Y != input.height || _yuvConverter->_width_Y != input.width) || | |
(_yuvConverter->_height_RGB != outCvt.height || _yuvConverter->_width_RGB != outCvt.width) | |
) { | |
if (_yuvConverter) { | |
delete _yuvConverter; | |
_yuvConverter = nullptr; | |
} | |
_yuvConverter = new Venus::VenusYUVToolKit( | |
input.width, | |
input.height, | |
input.width / 2, | |
input.height / 2, | |
outCvt.width, | |
outCvt.height | |
); | |
_yuvConverter->setCvtMat( | |
Venus::ConvertMatrixFromYUV( | |
1.0f, 0.0f, 1.57481f, | |
1.0f, -0.18732f, -0.46813f, | |
1.0f, 1.8556f, 0.0f, | |
-201.57568f, | |
83.897598f, | |
-237.5168f | |
) | |
); | |
} | |
double tic = CACurrentMediaTime(); | |
//// _yuvConverter->convert(input, outCvt); | |
// Venus::VenusYUVToolKit::Convert(input, outCvt, Venus::ConvertMatrixFromYUV( | |
// 1.0f, 0.0f, 1.57481f, | |
// 1.0f, -0.18732f, -0.46813f, | |
// 1.0f, 1.8556f, 0.0f, | |
// -201.57568f, | |
// 83.897598f, | |
// -237.5168f | |
// )); | |
// nv12_to_rgb_fast_asm_ios((unsigned char *)input.data,720, 1280, (unsigned char *)outCvt.data); | |
convertToRGBA((unsigned char *)input.data, 720, 1280, (int *)outCvt.data); | |
double toc = CACurrentMediaTime(); | |
LOGV("yuv Convert cost %f ms", 1000 * (toc - tic)); | |
u_char *ptr_u8 = (u_char *)calloc(outCvt.width * outCvt.height * 4, sizeof(u_char)); | |
memset(ptr_u8, 0xff, outCvt.width * outCvt.height * 4 * sizeof(u_char)); | |
float *ptr_in_f32 = (float *)outCvt.data; | |
u_char *ptr_out_u8 = ptr_u8; | |
for (int y = 0; y < outCvt.height; y++) { | |
for (int x = 0; x < outCvt.width; x++) { | |
for (int c = 0; c < 3; c++) { | |
*ptr_out_u8++ = static_cast<u_char>(ptr_in_f32[c * outCvt.height * outCvt.width + | |
y * outCvt.width + | |
x]); | |
} | |
ptr_out_u8++; | |
} | |
} | |
UIImage *img = [self getUIImage_With_Height:outCvt.height Width:outCvt.width BGRADataU8:ptr_u8]; | |
free(ptr_u8); | |
dispatch_async(dispatch_get_main_queue(), ^{ | |
_imgView.image = img; | |
}); | |
free(input.data); | |
free(outCvt.data); | |
} | |
void nv12_to_rgb_fast_asm_ios(const unsigned char* yuv420sp, int w, int h, unsigned char* rgb){ | |
const unsigned char* yptr = yuv420sp; | |
const unsigned char* uvptr = yuv420sp + w * h; | |
#if __ARM_NEON | |
int8x8_t _v128 = vdup_n_s8(128); | |
int8x8_t _v90 = vdup_n_s8(90); | |
int8x8_t _v46 = vdup_n_s8(46); | |
int8x8_t _v22 = vdup_n_s8(22); | |
int8x8_t _v113 = vdup_n_s8(113); | |
#endif | |
for (int y=0; y<h; y+=2) | |
{ | |
const unsigned char* yptr0 = yptr; | |
const unsigned char* yptr1 = yptr + w; | |
unsigned char* rgb0 = rgb; | |
unsigned char* rgb1 = rgb + w*3; | |
#if __ARM_NEON | |
int nn = w >> 3; | |
int remain = w - (nn << 3); | |
#else | |
int remain = w; | |
#endif // __ARM_NEON | |
#if __ARM_NEON | |
#if __aarch64__ | |
//测试,暂时没问题 | |
// for (; nn>0; nn--) | |
// { | |
// int16x8_t _yy0 = vreinterpretq_s16_u16(vshll_n_u8(vld1_u8(yptr0), 6)); | |
// int16x8_t _yy1 = vreinterpretq_s16_u16(vshll_n_u8(vld1_u8(yptr1), 6)); | |
// | |
// int8x8_t _uuvv = vsub_s8(vreinterpret_s8_u8(vld1_u8(uvptr)), _v128); //uv - 128 | |
// int8x8x2_t _uuuuvvvv = vtrn_s8(_uuvv, _uuvv); | |
// int8x8_t _uu = _uuuuvvvv.val[0]; | |
// int8x8_t _vv = _uuuuvvvv.val[1]; | |
// | |
// int16x8_t _r0 = vmlal_s8(_yy0, _vv, _v90); | |
// int16x8_t _g0 = vmlsl_s8(_yy0, _vv, _v46); | |
// _g0 = vmlsl_s8(_g0, _uu, _v22); | |
// int16x8_t _b0 = vmlal_s8(_yy0, _uu, _v113); | |
// | |
// int16x8_t _r1 = vmlal_s8(_yy1, _vv, _v90); | |
// int16x8_t _g1 = vmlsl_s8(_yy1, _vv, _v46); | |
// _g1 = vmlsl_s8(_g1, _uu, _v22); | |
// int16x8_t _b1 = vmlal_s8(_yy1, _uu, _v113); | |
// | |
// uint8x8x3_t _rgb0; | |
// _rgb0.val[0] = vqshrun_n_s16(_r0, 6); | |
// _rgb0.val[1] = vqshrun_n_s16(_g0, 6); | |
// _rgb0.val[2] = vqshrun_n_s16(_b0, 6); | |
// | |
// uint8x8x3_t _rgb1; | |
// _rgb1.val[0] = vqshrun_n_s16(_r1, 6); | |
// _rgb1.val[1] = vqshrun_n_s16(_g1, 6); | |
// _rgb1.val[2] = vqshrun_n_s16(_b1, 6); | |
// | |
// vst3_u8(rgb0, _rgb0); | |
// vst3_u8(rgb1, _rgb1); | |
// | |
// yptr0 += 8; | |
// yptr1 += 8; | |
// uvptr += 8; | |
// rgb0 += 24; | |
// rgb1 += 24; | |
// } | |
if (nn > 0) | |
{ | |
asm volatile( | |
"0: \n" | |
"ld1 {v2.8b}, [%3], #8 \n" //uv | |
"sub v2.8b, v2.8b, %12.8b \n" //uv - 128 | |
"ld1 {v0.8b}, [%1], #8 \n" //yptr----r0 | |
"ld1 {v1.8b}, [%2], #8 \n" //yptr----r1 | |
"ushll v4.8h, v0.8b, #6 \n" //r0---y<<6 | |
"orr v3.8b, v2.8b, v2.8b \n" //copy of vu | |
"ushll v5.8h, v1.8b, #6 \n" //r1---y<<6 | |
"orr v9.16b, v4.16b, v4.16b \n" //copy of r0---y<<6 | |
"trn1 v14.8b, v2.8b, v3.8b \n" //v14 = u | |
"trn2 v13.8b, v2.8b, v3.8b \n" //v13 = v | |
"orr v11.16b, v5.16b, v5.16b \n" //copy of r1---y<<6 | |
"smlsl v9.8h, v13.8b, %14.8b \n" // r0---- (y << 6) - v * 46 | |
"orr v8.16b, v4.16b, v4.16b \n" //copy of r0---y<<6 | |
"smlsl v11.8h, v13.8b, %14.8b \n" // r1---- (y << 6) - v * 46 | |
"orr v10.16b, v5.16b, v5.16b \n" //copy of r1---y<<6 | |
"smlal v8.8h, v13.8b, %13.8b \n" //r0--- r = (y<<6) + v * 90 | |
"smlal v4.8h, v14.8b, %16.8b \n" //r0--- b = (y<<6) + u * 133 | |
"smlal v10.8h, v13.8b, %13.8b \n" //r1--- r = (y<<6) + v * 90 | |
"smlsl v9.8h, v14.8b, %15.8b \n" //r0--- g = (y << 6) - v * 46 - u * 22 | |
"smlal v5.8h, v14.8b, %16.8b \n" //r1--- b = (y<<6) + u * 133 | |
"smlsl v11.8h, v14.8b, %15.8b \n" //r1--- g = (y << 6) - v * 46 - u * 22 | |
"sqshrun v15.8b, v8.8h, #6 \n" //r0--- r | |
"sqshrun v17.8b, v4.8h, #6 \n" //r0--- b | |
"sqshrun v18.8b, v10.8h, #6 \n" //r1--- r | |
"sqshrun v16.8b, v9.8h, #6 \n" //r0--- g | |
"sqshrun v20.8b, v5.8h, #6 \n" //r1--- b | |
"sqshrun v19.8b, v11.8h, #6 \n" //r1--- g | |
"subs %w0, %w0, #1 \n" | |
"st3 {v15.8b, v16.8b, v17.8b}, [%4], #24 \n" | |
"st3 {v18.8b, v19.8b, v20.8b}, [%5], #24 \n" | |
"bne 0b \n" | |
: "=r"(nn), // %0 | |
"=r"(yptr0), // %1 | |
"=r"(yptr1), // %2 | |
"=r"(uvptr), // %3 | |
"=r"(rgb0), // %4 | |
"=r"(rgb1) // %5 | |
: "0"(nn), | |
"1"(yptr0), | |
"2"(yptr1), | |
"3"(uvptr), | |
"4"(rgb0), | |
"5"(rgb1), | |
"w"(_v128), // %12 | |
"w"(_v90), // %13 | |
"w"(_v46), // %14 | |
"w"(_v22), // %15 | |
"w"(_v113) // %16 | |
: "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v8", "v9", "v10", "v11", "v12", "v13", | |
"v14", "v15", "v16", "v17", "v18", "v19", "v20" | |
); | |
} | |
#else | |
if (nn > 0) | |
{ | |
asm volatile( | |
"0: \n" | |
"pld [%3, #128] \n" | |
"vld1.u8 {d2}, [%3]! \n" //uv | |
"vsub.s8 d2, d2, %12 \n" //uv - 128 | |
"pld [%1, #128] \n" | |
"vld1.u8 {d0}, [%1]! \n" //yptr----r0 | |
"pld [%2, #128] \n" | |
"vld1.u8 {d1}, [%2]! \n" //yptr----r1 | |
"vshll.u8 q2, d0, #6 \n" //r0---y | |
"vorr d3, d2, d2 \n" | |
"vshll.u8 q3, d1, #6 \n" //r1---y | |
"vorr q9, q2, q2 \n" //r0---y | |
"vtrn.s8 d2, d3 \n" //d2 = u, d3 = v | |
"vorr q11, q3, q3 \n" //r1---y | |
"vmlsl.s8 q9, d3, %14 \n" //r0---- y - v * 46 | |
"vorr q8, q2, q2 \n" //r0---y | |
"vmlsl.s8 q11, d3, %14 \n" //r1---- y - v * 46 | |
"vorr q10, q3, q3 \n" //r1---y | |
"vmlal.s8 q8, d3, %13 \n" //r0----r = y + v * 90 | |
"vmlal.s8 q2, d2, %16 \n" //r0----b = y + u * 133 | |
"vmlal.s8 q10, d3, %13 \n" //r1----r = y + v * 90 | |
"vmlsl.s8 q9, d2, %15 \n" //r0----g = (y - v * 46) - u * 22 | |
"vmlal.s8 q3, d2, %16 \n" //r1----b = y + u * 133 | |
"vmlsl.s8 q11, d2, %15 \n" //r1----g = (y - v * 46) - u * 22 | |
"vqshrun.s16 d24, q8, #6 \n" // r0---r | |
"vqshrun.s16 d26, q2, #6 \n" // r0---b | |
"vqshrun.s16 d4, q10, #6 \n" // r1---r | |
"vqshrun.s16 d25, q9, #6 \n" // r0---g | |
"vqshrun.s16 d6, q3, #6 \n" // r1---b | |
"vqshrun.s16 d5, q11, #6 \n" // r1---g | |
"subs %0, #1 \n" | |
"vst3.u8 {d24-d26}, [%4]! \n" | |
//"vsub.s8 d2, d2, %12 \n" | |
"vst3.u8 {d4-d6}, [%5]! \n" | |
"bne 0b \n" | |
: "=r"(nn), // %0 | |
"=r"(yptr0), // %1 | |
"=r"(yptr1), // %2 | |
"=r"(uvptr), // %3 | |
"=r"(rgb0), // %4 | |
"=r"(rgb1) // %5 | |
: "0"(nn), | |
"1"(yptr0), | |
"2"(yptr1), | |
"3"(uvptr), | |
"4"(rgb0), | |
"5"(rgb1), | |
"w"(_v128), // %12 | |
"w"(_v90), // %13 | |
"w"(_v46), // %14 | |
"w"(_v22), // %15 | |
"w"(_v113) // %16 | |
: "cc", "memory", "q0", "q1", "q2", "q3", "q8", "q9", "q10", "q11", "q12", "d26" | |
); | |
} | |
#endif // __aarch64__ | |
#endif // __ARM_NEON | |
// remain = w; | |
for (; remain>0; remain = remain - 2) | |
{ | |
// R = 1.164 * yy + 1.596 * vv | |
// G = 1.164 * yy - 0.813 * vv - 0.391 * uu | |
// B = 1.164 * yy + 2.018 * uu | |
// R = Y + (1.370705 * (V-128)) | |
// G = Y - (0.698001 * (V-128)) - (0.337633 * (U-128)) | |
// B = Y + (1.732446 * (U-128)) | |
// R = ((Y << 6) + 87.72512 * (V-128)) >> 6 | |
// G = ((Y << 6) - 44.672064 * (V-128) - 21.608512 * (U-128)) >> 6 | |
// B = ((Y << 6) + 110.876544 * (U-128)) >> 6 | |
// R = ((Y << 6) + 90 * (V-128)) >> 6 | |
// G = ((Y << 6) - 46 * (V-128) - 22 * (U-128)) >> 6 | |
// B = ((Y << 6) + 113 * (U-128)) >> 6 | |
// R = (yy + 90 * vv) >> 6 | |
// G = (yy - 46 * vv - 22 * uu) >> 6 | |
// B = (yy + 113 * uu) >> 6 | |
int u = uvptr[0] - 128; | |
int v = uvptr[1] - 128; | |
int ruv = 90 * v; | |
int guv = -46 * v + -22 * u; | |
int buv = 113 * u; | |
int y00 = yptr0[0] << 6; | |
rgb0[0] = static_cast<unsigned char>((y00 + ruv) >> 6); | |
rgb0[1] = static_cast<unsigned char>((y00 + guv) >> 6); | |
rgb0[2] = static_cast<unsigned char>((y00 + buv) >> 6); | |
int y01 = yptr0[1] << 6; | |
rgb0[3] = static_cast<unsigned char>((y01 + ruv) >> 6); | |
rgb0[4] = static_cast<unsigned char>((y01 + guv) >> 6); | |
rgb0[5] = static_cast<unsigned char>((y01 + buv) >> 6); | |
int y10 = yptr1[0] << 6; | |
rgb1[0] = static_cast<unsigned char>((y10 + ruv) >> 6); | |
rgb1[1] = static_cast<unsigned char>((y10 + guv) >> 6); | |
rgb1[2] = static_cast<unsigned char>((y10 + buv) >> 6); | |
int y11 = yptr1[1] << 6; | |
rgb1[3] = static_cast<unsigned char>((y11 + ruv) >> 6); | |
rgb1[4] = static_cast<unsigned char>((y11 + guv) >> 6); | |
rgb1[5] = static_cast<unsigned char>((y11 + buv) >> 6); | |
yptr0 += 2; | |
yptr1 += 2; | |
uvptr += 2; | |
rgb0 += 6; | |
rgb1 += 6; | |
} | |
yptr += 2*w; | |
rgb += 2*3*w; | |
} | |
} | |
void convertToRGBA(unsigned char* yuv, int w, int h, int* rgba) | |
{ | |
for (int i=0; i<h; ++i) | |
{ | |
unsigned char* dst = (unsigned char*)(rgba + w*i); | |
unsigned char* y = yuv + w*i; | |
unsigned char* uv = yuv + w*h + w*(i/2); | |
int count = w; | |
#ifdef HAS_NEON | |
/*一次处理16个像素*/ | |
int c = count/16; | |
asm volatile( | |
"movs r4, %[c]\t\n" | |
"beq 2f\t\n" | |
"vmov.u8 d7, #255\t\n"//Alpha | |
"vmov.u8 d3, #255\t\n"//Alpha | |
"vmov.s16 q11, #90\t\n" | |
"vmov.s16 q12, #128\t\n" | |
"vmov.s16 q13, #21\t\n" | |
"vmov.s16 q14, #46\t\n" | |
"vmov.s16 q15, #113\t\n" | |
"1:\t\n" | |
/*Y1 Y2 是交错的两组像素的Y分量,与 UV分量值 正好一一对应*/ | |
"vld2.8 {d8, d9}, [%[y]]!\t\n"//Y1, Y2 | |
/*交错取出 UV 值*/ | |
"vld2.8 {d0, d1}, [%[uv]]!\t\n"//u, v | |
"vmovl.u8 q5, d0\t\n" | |
"vmovl.u8 q6, d1\t\n" | |
"vsub.i16 q5,q5, q12\t\n"//U | |
"vsub.i16 q6,q6, q12\t\n"//V | |
//First RGBA | |
"vshll.u8 q7, d8, #6\t\n" | |
"vshll.u8 q8, d8, #6\t\n" | |
"vshll.u8 q9, d8, #6\t\n" | |
"vmla.i16 q7, q6, q11\t\n" | |
"vmls.i16 q8, q5, q13\t\n" | |
"vmls.i16 q8, q6, q14\t\n" | |
"vmla.i16 q9, q5, q15\t\n" | |
"vshr.s16 q7, q7, #6\t\n" | |
"vshr.s16 q8, q8, #6\t\n" | |
"vshr.s16 q9, q9, #6\t\n" | |
"vmov.s16 q10, #0\t\n" | |
"vmax.s16 q7, q7, q10\t\n" | |
"vmax.s16 q8, q8, q10\t\n" | |
"vmax.s16 q9, q9, q10\t\n" | |
"vmov.u16 q10, #255\t\n" | |
"vmin.u16 q7, q7, q10\t\n" | |
"vmin.u16 q8, q8, q10\t\n" | |
"vmin.u16 q9, q9, q10\t\n" | |
"vmovn.s16 d2, q7\t\n" | |
"vmovn.s16 d1, q8\t\n" | |
"vmovn.s16 d0, q9\t\n" | |
//Second RGBA | |
"vshll.u8 q7, d9, #6\t\n" | |
"vshll.u8 q8, d9, #6\t\n" | |
"vshll.u8 q9, d9, #6\t\n" | |
"vmla.i16 q7, q6, q11\t\n" | |
"vmls.i16 q8, q5, q13\t\n" | |
"vmls.i16 q8, q6, q14\t\n" | |
"vmla.i16 q9, q5, q15\t\n" | |
"vshr.s16 q7, q7, #6\t\n" | |
"vshr.s16 q8, q8, #6\t\n" | |
"vshr.s16 q9, q9, #6\t\n" | |
"vmov.s16 q10, #0\t\n" | |
"vmax.s16 q7, q7, q10\t\n" | |
"vmax.s16 q8, q8, q10\t\n" | |
"vmax.s16 q9, q9, q10\t\n" | |
"vmov.u16 q10, #255\t\n" | |
"vmin.u16 q7, q7, q10\t\n" | |
"vmin.u16 q8, q8, q10\t\n" | |
"vmin.u16 q9, q9, q10\t\n" | |
"vmovn.s16 d6, q7\t\n" | |
"vmovn.s16 d5, q8\t\n" | |
"vmovn.s16 d4, q9\t\n" | |
/*目前我们得到的两组RGB分量值是交错的, | |
* 比如: | |
* d0 : (g0 g2 g4 g6 g8 g10 g12 g14) | |
* d4 : (g1 g3 g5 g7 g9 g11 g13 g15) | |
* 需要做交织,变成如下再存储: | |
* d0 :(g0 g1 g2 g3 g4 g5 g6 g7) | |
* d4 :(g8 g9 g10 g11 g12 g13 g14 g15) | |
*/ | |
"vtrn.8 d2,d6\t\n" | |
"vtrn.16 d2,d6\t\n" | |
"vtrn.32 d2,d6\t\n" | |
"vtrn.8 d1,d5\t\n" | |
"vtrn.16 d1,d5\t\n" | |
"vtrn.32 d1,d5\t\n" | |
"vtrn.8 d0,d4\t\n" | |
"vtrn.16 d0,d4\t\n" | |
"vtrn.32 d0,d4\t\n" | |
"vst4.8 {d0-d3}, [%[dst]]!\t\n" | |
"vst4.8 {d4-d7}, [%[dst]]!\t\n" | |
"subs r4, r4, #1\t\n" | |
"bne 1b\t\n" | |
"2:\t\n" | |
: [dst] "+r" (dst), [y] "+r" (y), [uv] "+r" (uv), [c] "+r" (c) | |
: | |
: "r4", "cc","memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7","d8","d9","d10","d11","d12","d13","d14","d15","d16","d17","d18","d19","d20","d21","d22","d23","d24","d25","d26","d27","d28","d29","d30","d31" | |
); | |
count%=16; | |
#endif | |
/*边角料的处理*/ | |
int r, g, b; | |
while (count > 1) | |
{ | |
unsigned char _y = y[0]; | |
unsigned char _u = uv[0]; | |
unsigned char _v = uv[1]; | |
r = _y + ((179*(_v-128))>>7); | |
g = _y - ((43*(_u-128) - 91*(_v-128))>>7); | |
b = _y + ((227*(_u-128))>>7); | |
r = r<0?0:r;r=r>255?255:r; | |
g = g<0?0:g;g=g>255?255:g; | |
b = b<0?0:b;b=b>255?255:b; | |
dst[0] = b; | |
dst[1] = g; | |
dst[2] = r; | |
dst[3] = 0xFF; | |
y++; | |
dst+=4; | |
_y = y[0]; | |
r = _y + ((179*(_v-128))>>7); | |
g = _y - ((43*(_u-128) - 91*(_v-128))>>7); | |
b = _y + ((227*(_u-128))>>7); | |
r = r<0?0:r;r=r>255?255:r; | |
g = g<0?0:g;g=g>255?255:g; | |
b = b<0?0:b;b=b>255?255:b; | |
dst[0] = b; | |
dst[1] = g; | |
dst[2] = r; | |
dst[3] = 0xFF; | |
y++; | |
uv+=2; | |
dst+=4; | |
count-=2; | |
} | |
if (count > 0) | |
{ | |
unsigned char _y = y[0]; | |
unsigned char _u = uv[0]; | |
unsigned char _v = uv[1]; | |
r = _y + ((179*(_v-128))>>7); | |
g = _y - ((43*(_u-128) - 91*(_v-128))>>7); | |
b = _y + ((227*(_u-128))>>7); | |
r = r<0?0:r;r=r>255?255:r; | |
g = g<0?0:g;g=g>255?255:g; | |
b = b<0?0:b;b=b>255?255:b; | |
dst[0] = b; | |
dst[1] = g; | |
dst[2] = r; | |
dst[3] = 0xFF; | |
} | |
} | |
} | |
@end |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment