Skip to content

Instantly share code, notes, and snippets.

@BeMg
BeMg / main.py
Last active April 11, 2021 05:47
show file raw byte
if __name__ == "__main__":
rst = ""
filename = "SImulatedDataAnalysis.py"
with open(filename, "rb") as f:
byte = f.read(1)
cnt = 0
while byte:
cnt = cnt + 1
byte = f.read(1)
print("origin {} -> utf-8 ? {}".format(byte.hex(), byte))
@BeMg
BeMg / after_grouping.ll
Created August 17, 2020 05:06
Represent how pocl handle local memory buffer
; ModuleID = 'parallel.bc'
source_filename = "parallel_bc"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n64-S128"
target triple = "riscv64-unknown-linux"
; Function Attrs: alwaysinline nofree norecurse nounwind
define void @_pocl_kernel_transpose(i32 %0, i32 %1, float* nocapture readonly %2, float* nocapture %3, [8 x [8 x float]]* nocapture %4, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* nocapture readonly %5, i64 %6, i64 %7, i64 %8) local_unnamed_addr #0 !kernel_arg_addr_space !7 !kernel_arg_access_qual !8 !kernel_arg_type !9 !kernel_arg_base_type !9 !kernel_arg_type_qual !10 !kernel_arg_name !11 !pocl_generated !12 {
%10 = getelementptr inbounds { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i64 0, i32 2, i64 0
%11 = load i64, i64* %10, align 8
%12 = getelementptr { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }, { [3 x i64], [3 x i64], [3 x i64], i8*, i32*, i32, i32 }* %5, i64 0, i32 2, i64 1
@BeMg
BeMg / sincos.ll
Last active May 21, 2020 06:18
temp
; Function Attrs: nounwind readnone uwtable
define <4 x float> @_Z7_cl_sinDv4_f(<4 x float> %x) local_unnamed_addr #2 {
entry:
%r0 = alloca <4 x float>, align 16
%r1 = alloca <4 x float>, align 16
%astype = bitcast <4 x float> %x to <4 x i32>
%and = and <4 x i32> %astype, <i32 2147483647, i32 2147483647, i32 2147483647, i32 2147483647>
%astype1 = bitcast <4 x i32> %and to <4 x float>
%0 = bitcast <4 x float>* %r0 to i8*
call void @llvm.lifetime.start.p0i8(i64 16, i8* nonnull %0) #10
@BeMg
BeMg / condition_case_1.ll
Last active May 29, 2020 08:16
transpose example
; ModuleID = 'transpose_barrier_condition.bc'
source_filename = "transpose_barrier_condition.bc"
target datalayout = "e-m:e-p:32:32-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024"
target triple = "x86_64-unknown-unknown-unknown"
@transpose.buffer = internal unnamed_addr addrspace(3) global [8 x [8 x float]] undef, align 4
@kernel_config_l2 = external addrspace(2) constant [32 x i32]
; Function Attrs: noduplicate
declare protected void @barrier(i32) #0
@BeMg
BeMg / conv1d.cl
Last active May 29, 2020 03:11
aaa
__kernel void Conv1D(__global int *array, __global int *filter, int N, __global int *output) {
int idx = get_global_id(0);
for (int i=0; i<N; i++) {
output[idx] += array[idx+i] * filter[i];
barrier(CLK_GLOBAL_MEM_FENCE);
printf("Juse for side-effect.\n");
}
}
@BeMg
BeMg / conv2d.cl
Last active May 14, 2020 02:15
TI-opencl clocl example
__kernel void Conv2D( __global int * image_in, //image input
__global int * filter_in, //filter input
int K, //filter kernel size
__global int * image_out) //feature map output
{
int W; //work group global size
int Wn; //padded image width
int x; //global id x
int y; //global id y
int ki, kj; //filter coordinate,(kj, ki)
@BeMg
BeMg / build.txt
Last active May 5, 2020 09:40
ti-opencl
PATH=$PATH:/fast-playground3/piyou/OpenCL/TI-OpenCL/ti/nextgate/linux-devkit/sysroots/armv7at2hf-neon-linux-gnueabi/bin:/fast-playground3/piyou/OpenCL/TI-OpenCL/ti/GCC5/install/bin \
USE_EXTERNAL_FW_REPO=1 \
BUILD_EVE_FIRMWARE=0 \
XDC_DIR=/fast-playground3/piyou/OpenCL/TI-OpenCL/ti/xdctools_3_50_07_20_core \
TI_OCL_CGT_INSTALL=/fast-playground3/piyou/OpenCL/TI-OpenCL/ti/ti-cgt-c6000_8.2.3 \
TI_OCL_M4_CGT_INSTALL=/fast-playground3/piyou/OpenCL/TI-OpenCL/ti/ti-cgt-arm_16.9.2.LTS \
LINUX_DEVKIT_ROOT=/fast-playground3/piyou/OpenCL/TI-OpenCL/ti/nextgate/linux-devkit/sysroots/armv7at2hf-neon-linux-gnueabi \
X86_LLVM_DIR=/fast-playground3/piyou/OpenCL/TI-OpenCL/ti/ti-llvm/install.x86 \
ARM_LLVM_DIR=/fast-playground3/piyou/OpenCL/TI-OpenCL/ti/ti-llvm/install.arm \
DESTDIR=/fast-playground3/piyou/OpenCL/TI-OpenCL/ti/ocl-build \
@BeMg
BeMg / test1.c
Created April 14, 2020 09:20
CBA TEST CASE
#include <stdio.h>
//unsigned long TEMP;
#define TEMP 0xF0000000
#define BASE (TEMP + 0x80000)
#define SRC1 (BASE + 0x000000F0)
#define SRC2 (BASE + 0x000000F4)
#define SRC3 (BASE + 0x000000F8)
#define SRC4 (BASE + 0x000000Fc)
@BeMg
BeMg / stdout.log
Created April 9, 2020 05:09
tvm stride load store example
// attr [PaddedInput] storage_scope = "global"
allocate PaddedInput[float32 * 6553600]
// attr [DepthwiseConv2d] storage_scope = "global"
allocate DepthwiseConv2d[float32 * 2359296]
produce PaddedInput {
for (i1, 0, 256) {
for (i2, 0, 160) {
for (i3, 0, 160) {
PaddedInput[((((i1*160) + i2)*160) + i3)] = tvm_if_then_else(((((32 <= i2) && (i2 < 128)) && (32 <= i3)) && (i3 < 128)), Input[(((((i1*96) + i2)*96) + i3) + -3104)], 0.000000f)
}
@BeMg
BeMg / case1.c
Last active April 1, 2020 05:47
RSRF test case
struct S { int s; unsigned long t; };
__attribute__ ((noinline, noclone)) unsigned long long
bar (struct S *x, unsigned long y)
{
return x->s + y;
}
__attribute__ ((noinline, noclone)) unsigned long long