Skip to content

Instantly share code, notes, and snippets.

@lydonchandra
lydonchandra / malloc-arm64.s
Last active August 29, 2015 13:56
arm64 basic malloc
//extern uint32_t* test_strb1_x64(uint32_t* wordPtr, uint32_t numWords);
//extern uint32_t* test_malloc3(uint32_t* wordPtr, uint32_t numWords);
//extern void test_malloc4();
//extern uint32_t* test_malloc5(uint32_t intToSet);
.private_extern _test_strb1_x64
.globl _test_strb1_x64
.align 2
_test_strb1_x64:
@lydonchandra
lydonchandra / _test_sum_loop_arm64.s
Created February 15, 2014 19:05
_test_sum_loop_arm64
//int _test_sum_loop(int array[], int num)
// for(int idx=0; idx<num; idx++)
// sum += array[idx]
// return sum
//x0 <- array
//x1 <- num
.private_extern _test_sum_loop
.globl _test_sum_loop
.align 2
_test_sum_loop:
@lydonchandra
lydonchandra / copy_raw_image3.s
Last active August 29, 2015 13:56
copy bytes to *destImageBytesPtr from sourceImageBytes
//TODO: use neon instructions
//void copy_raw_image3(GLubyte ** destImageBytesPtr, GLubyte *sourceImageBytes,
// size_t imageWidth, size_t imageHeight,
// size_t widthScaleFactor, size_t heightScaleFactor);
// x0 <- destImageBytes
// x1 <- sourceImageBytes
// x2 <- imageWidth
// x3 <- imageHeight
// x4 <- widthScaleFactor
// x5 <- heightScaleFactor
@lydonchandra
lydonchandra / _neon_double_bytes_ld1.s
Last active August 29, 2015 13:56
test neon arm64
//objective-c test file
//test.m
extern int neon_double_bytes_ld1( unsigned char * byteArray, unsigned char* sum );
+(void) test_asm {
unsigned char input_array1[] = { 1, 3, 5, 7, 11, 13, 17, 19 };
unsigned char sum;
neon_double_bytes_ld1( input_array1, &sum );
NSLog(@"sum=%d", sum); //sum=152
NSLog(@"input_array[0]=%d", input_array1[0] ); //input_array[0]=2
}
@lydonchandra
lydonchandra / v_copy_raw_image3.s
Created February 22, 2014 22:13
copy_raw_image using NEON arm64 v8
//void v_copy_raw_image3_simd(GLubyte ** destImageBytesPtr, GLubyte *sourceImageBytes,
// size_t imageWidth, size_t imageHeight,
// size_t widthScaleFactor, size_t heightScaleFactor);
// x0 <- destImageBytes
// x1 <- sourceImageBytes
// x2 <- imageWidth
// x3 <- imageHeight
// x4 <- widthScaleFactor
// x5 <- heightScaleFactor
.private_extern _v_copy_raw_image3
@lydonchandra
lydonchandra / arm64_ld1_16b___tbl_16b.s
Created February 27, 2014 16:05
how to use arm64 neon ld1.16b and tbl.16b to convert 64-bytes RGBA to 48-bytes RGB
Test case:
unsigned char inputRgb[] = { 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7 };
unsigned char *outputRgb;
unsigned char inputRgba[] = {
10,10,10,10, 11,11,11,11, 12,12,12,12, 13,13,13,13, 14,14,14,14, 15,15,15,15, 16,16,16,16, 17,17,17,17, //32bytes
0,0,0,0, 1,1,1,1, 2,2,2,2, 3,3,3,3, 4,4,4,4, 5,5,5,5, 6,6,6,6, 7,7,7,7, //32bytes
};
unsigned char tblIndex[] = {
<system.webServer>
<rewrite>
<!--
Tile Rewrite Section
-->
<rules>
<!-- If tile is already saved onto disk, serve it -->
<rule name="Map Tile to TileStatic" stopProcessing="false">
<conditions logicalGrouping="MatchAny">
<add input="C:/xxx/Maps/Tile/{R:1}/{R:2}/{R:3}/{R:4}" matchType="IsFile" />
@lydonchandra
lydonchandra / multiply8bit_using16bit.c
Created July 21, 2014 15:24
Emulate 8-bit multiplication using 16-bit multiplication, http://stackoverflow.com/a/8196824
//Emulate 8-bit multiplication using 16-bit multiplication, http://stackoverflow.com/a/8196824
__m128i _mm_mullo_epi8(__m128i a, __m128i b) {
__m128i zero = _mm_setzero_si128();
__m128i Alo = _mm_cvtepu8_epi16(a); //pmovzxbw SSE4 -- packed mov zero extend byte to word 8-bit to 16bits
__m128i Ahi = _mm_unpackhi_epi8(a, zero); // punpckhbw -- interleave upper 8 signed/unsigned 8-bit int in a with upper 8-bit signed/unsigned in b
__m128i Blo = _mm_cvtepu8_epi16(b); //pmovzxbw
__m128i Bhi = _mm_unpackhi_epi8(b, zero); // punpckhbw
__m128i Clo = _mm_mullo_epi16(Alo, Blo); // pmullw
__m128i Chi = _mm_mullo_epi16(Ahi, Bhi); // pmullw
@lydonchandra
lydonchandra / perl exp to replace grep
Created October 5, 2014 06:33
grep -P does not work in OSX, so use perl
perl -nle'print if m{^\S+\s[0-9]*[1-8]\s}' 20news-bydate-train-stanford-classifier.txt > 20news-bydate-devtrain-stanford-classifier.txt
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Threading;
using System.Threading.Tasks;
namespace ConsoleApplication5
{
class Program