This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
CPPFLAGS = -O3 -xHost -no-multibyte-chars | |
INCLUDE = -I/opt/local/include/ -I/opt/intel/ipp/include | |
LIBS = -L/opt/local/lib/ -lssl -lcrypto -L/opt/intel/ipp/lib -ipp=crypto | |
all: perf.h | |
icpc $(INCLUDE) $(CFLAGS) -c main.cpp -o main.o | |
icpc $(INCLUDE) $(CFLAGS) -c perf.cpp -o perf.o | |
icpc $(LIBS) perf.o main.o -o rsatest | |
clean: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
IppsBigNumState* createBigNumState(int len, const Ipp32u* pData) { | |
int size; | |
ippsBigNumGetSize(len, &size); | |
IppsBigNumState* pBN = (IppsBigNumState*) ippMalloc(size);; | |
ippsBigNumInit(len, pBN); | |
if (pData != NULL) { | |
ippsSet_BN(IppsBigNumPOS, len, pData, pBN); | |
} | |
return pBN; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
install.packages("ggplot2") | |
library(ggplot2) | |
getLegend <- function() { return(theme(legend.position="bottom")) } | |
getTitle <- function(tmp) { return(ggtitle(tmp)) } | |
getAxisTextElement <- function() { return(element_text(hjust=0.5, size=8, colour="black")) } | |
getAxisTitleElement <- function() { return(element_text(hjust=0.5, size=10)) } | |
getTitleElement <- function () { return(element_text(hjust=0, size=10)) } | |
colorPalette <- c("#000000", "#585858", "#585858", "#585858", "#585858", "#585858", "#585858") | |
shapePalette <- c(16, 23, 24, 25, 22) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Code being analyzed | |
// | |
// 1. package ch.ethz.acl.ngen.saxpy; | |
// 2. | |
// 3. public class JSaxpy { | |
// 4. public void apply(int[] a, int[] b, int s, int n){ | |
// 5. for (int i = 0; i < n; i += 1) { | |
// 6. a[i] += b[i] * s; | |
// 7. } | |
// 8. } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// 1. package ch.ethz.acl.ngen.saxpy; | |
// 2. | |
// 3. public class JSaxpy { | |
// 4. public void apply(float[] a, float[] b, float s, int n){ | |
// 5. for (int i = 0; i < n; i += 1) { | |
// 6. a[i] += b[i] * s; | |
// 7. } | |
// 8. } | |
// 9. } |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Java HotSpot(TM) 64-Bit Server VM warning: printing of assembly code is enabled; turning on DebugNonSafepoints to gain additional output | |
CompilerOracle: print *JVector8.dot | |
Compiled method (c1) 807 864 % 3 ch.ethz.acl.ngen.precison.JVector8::dot @ 35 (134 bytes) | |
total in heap [0x000000010f6dd010,0x000000010f6ddbb0] = 2976 | |
relocation [0x000000010f6dd138,0x000000010f6dd1b0] = 120 | |
main code [0x000000010f6dd1c0,0x000000010f6dd640] = 1152 | |
stub code [0x000000010f6dd640,0x000000010f6dd6d0] = 144 | |
oops [0x000000010f6dd6d0,0x000000010f6dd6d8] = 8 | |
metadata [0x000000010f6dd6d8,0x000000010f6dd6e0] = 8 | |
scopes data [0x000000010f6dd6e0,0x000000010f6dd830] = 336 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <immintrin.h> | |
#include <cstdint> | |
#include <iostream> | |
#include <iomanip> | |
#include <cassert> | |
// | |
// Transpose 8x8 registers | |
// |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <immintrin.h> | |
// | |
// Performs division on 16-bit elements, using floating point division | |
// | |
static inline __m128i sse_mm_div_epi16_division(const __m128i &a_epi16, const __m128i &b_epi16) { | |
const __m128i lo_mask = _mm_set1_epi32(0xFFFF); | |
// | |
// Convert to two 32-bit integers |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <immintrin.h> | |
static inline __m256i avx_mm256_div_epi16_division(const __m256i &a_epi16, const __m256i &b_epi16) { | |
// | |
// Convert to two 32-bit integers | |
// | |
const __m256i a_hi_epi32 = _mm256_srai_epi32(a_epi16, 16); | |
const __m256i a_lo_epi32_shift = _mm256_slli_epi32(a_epi16, 16); | |
const __m256i a_lo_epi32 = _mm256_srai_epi32(a_lo_epi32_shift, 16); | |
const __m256i b_hi_epi32 = _mm256_srai_epi32(b_epi16, 16); |