Last active
November 9, 2021 08:56
-
-
Save nicolasvasilache/0fe30c83cbfe5b4776ec9f0ee465611a to your computer and use it in GitHub Desktop.
LLVM AVX2 transpose 8x8
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
; Further compiled with: | |
; clang -x ir -emit-llvm -S -mcpu=haswell -O3 - -o - | llc -O3 -mcpu=haswell - -o - | |
; ModuleID = 'LLVMDialectModule' | |
source_filename = "LLVMDialectModule" | |
declare i8* @malloc(i64) | |
declare void @free(i8*) | |
; Function Attrs: noinline | |
define void @transpose_2d_on_tensors(float* %0, float* %1, i64 %2, i64 %3, i64 %4, i64 %5, i64 %6, float* %7, float* %8, i64 %9, i64 %10, i64 %11, i64 %12, i64 %13) #0 { | |
%15 = getelementptr float, float* %1, i64 0 | |
%16 = bitcast float* %15 to <8 x float>* | |
%17 = load <8 x float>, <8 x float>* %16, align 4 | |
%18 = getelementptr float, float* %1, i64 8 | |
%19 = bitcast float* %18 to <8 x float>* | |
%20 = load <8 x float>, <8 x float>* %19, align 4 | |
%21 = getelementptr float, float* %1, i64 16 | |
%22 = bitcast float* %21 to <8 x float>* | |
%23 = load <8 x float>, <8 x float>* %22, align 4 | |
%24 = getelementptr float, float* %1, i64 24 | |
%25 = bitcast float* %24 to <8 x float>* | |
%26 = load <8 x float>, <8 x float>* %25, align 4 | |
%27 = getelementptr float, float* %1, i64 32 | |
%28 = bitcast float* %27 to <8 x float>* | |
%29 = load <8 x float>, <8 x float>* %28, align 4 | |
%30 = getelementptr float, float* %1, i64 40 | |
%31 = bitcast float* %30 to <8 x float>* | |
%32 = load <8 x float>, <8 x float>* %31, align 4 | |
%33 = getelementptr float, float* %1, i64 48 | |
%34 = bitcast float* %33 to <8 x float>* | |
%35 = load <8 x float>, <8 x float>* %34, align 4 | |
%36 = getelementptr float, float* %1, i64 56 | |
%37 = bitcast float* %36 to <8 x float>* | |
%38 = load <8 x float>, <8 x float>* %37, align 4 | |
%39 = shufflevector <8 x float> %17, <8 x float> %20, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> | |
%40 = shufflevector <8 x float> %17, <8 x float> %20, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> | |
%41 = shufflevector <8 x float> %23, <8 x float> %26, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> | |
%42 = shufflevector <8 x float> %23, <8 x float> %26, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> | |
%43 = shufflevector <8 x float> %29, <8 x float> %32, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> | |
%44 = shufflevector <8 x float> %29, <8 x float> %32, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> | |
%45 = shufflevector <8 x float> %35, <8 x float> %38, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13> | |
%46 = shufflevector <8 x float> %35, <8 x float> %38, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15> | |
%47 = shufflevector <8 x float> %39, <8 x float> %41, <8 x i32> <i32 2, i32 3, i32 8, i32 9, i32 6, i32 7, i32 12, i32 13> | |
%48 = shufflevector <8 x float> %39, <8 x float> %47, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 4, i32 5, i32 14, i32 15> | |
%49 = shufflevector <8 x float> %41, <8 x float> %47, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 12, i32 13, i32 6, i32 7> | |
%50 = shufflevector <8 x float> %40, <8 x float> %42, <8 x i32> <i32 2, i32 3, i32 8, i32 9, i32 6, i32 7, i32 12, i32 13> | |
%51 = shufflevector <8 x float> %40, <8 x float> %50, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 4, i32 5, i32 14, i32 15> | |
%52 = shufflevector <8 x float> %42, <8 x float> %50, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 12, i32 13, i32 6, i32 7> | |
%53 = shufflevector <8 x float> %43, <8 x float> %45, <8 x i32> <i32 2, i32 3, i32 8, i32 9, i32 6, i32 7, i32 12, i32 13> | |
%54 = shufflevector <8 x float> %43, <8 x float> %53, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 4, i32 5, i32 14, i32 15> | |
%55 = shufflevector <8 x float> %45, <8 x float> %53, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 12, i32 13, i32 6, i32 7> | |
%56 = shufflevector <8 x float> %44, <8 x float> %46, <8 x i32> <i32 2, i32 3, i32 8, i32 9, i32 6, i32 7, i32 12, i32 13> | |
%57 = shufflevector <8 x float> %44, <8 x float> %56, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 4, i32 5, i32 14, i32 15> | |
%58 = shufflevector <8 x float> %46, <8 x float> %56, <8 x i32> <i32 8, i32 9, i32 2, i32 3, i32 12, i32 13, i32 6, i32 7> | |
%59 = shufflevector <8 x float> %48, <8 x float> %54, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> | |
%60 = shufflevector <8 x float> %49, <8 x float> %55, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> | |
%61 = shufflevector <8 x float> %51, <8 x float> %57, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> | |
%62 = shufflevector <8 x float> %52, <8 x float> %58, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11> | |
%63 = shufflevector <8 x float> %48, <8 x float> %54, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> | |
%64 = shufflevector <8 x float> %49, <8 x float> %55, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> | |
%65 = shufflevector <8 x float> %51, <8 x float> %57, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> | |
%66 = shufflevector <8 x float> %52, <8 x float> %58, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> | |
%67 = getelementptr float, float* %8, i64 0 | |
%68 = bitcast float* %67 to <8 x float>* | |
store <8 x float> %59, <8 x float>* %68, align 4 | |
%69 = getelementptr float, float* %8, i64 8 | |
%70 = bitcast float* %69 to <8 x float>* | |
store <8 x float> %60, <8 x float>* %70, align 4 | |
%71 = getelementptr float, float* %8, i64 16 | |
%72 = bitcast float* %71 to <8 x float>* | |
store <8 x float> %61, <8 x float>* %72, align 4 | |
%73 = getelementptr float, float* %8, i64 24 | |
%74 = bitcast float* %73 to <8 x float>* | |
store <8 x float> %62, <8 x float>* %74, align 4 | |
%75 = getelementptr float, float* %8, i64 32 | |
%76 = bitcast float* %75 to <8 x float>* | |
store <8 x float> %63, <8 x float>* %76, align 4 | |
%77 = getelementptr float, float* %8, i64 40 | |
%78 = bitcast float* %77 to <8 x float>* | |
store <8 x float> %64, <8 x float>* %78, align 4 | |
%79 = getelementptr float, float* %8, i64 48 | |
%80 = bitcast float* %79 to <8 x float>* | |
store <8 x float> %65, <8 x float>* %80, align 4 | |
%81 = getelementptr float, float* %8, i64 56 | |
%82 = bitcast float* %81 to <8 x float>* | |
store <8 x float> %66, <8 x float>* %82, align 4 | |
ret void | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment