Wenzel Jakob wjakob

## avx512_transpose.cpp
// g++ -mfma -mf16c -mavx512f -mavx512vnni -mavx512vl

#include <immintrin.h>

#include <stdio.h>

static void print(const __m512& _x)
{
    __attribute__((aligned(64)))
    float a[16];

## latency.markdown

      
        
          
            
              
              2 files
            
          
          
            
              
              0 forks
            
          
          
            
              
              0 comments
            
          
          
            
              
              0 stars
            
          
        
        
          
              
          
          
            
                wjakob
                / latency.markdown
            
            
              Created
              September 5, 2017 19:15
                — forked from hellerbarde/latency.markdown
            
              
                Latency numbers every programmer should know
              
          
        
      
        
  
      
    Latency numbers every programmer should know

L1 cache reference ......................... 0.5 ns
Branch mispredict ............................ 5 ns
L2 cache reference ........................... 7 ns
Mutex lock/unlock ........................... 25 ns
Main memory reference ...................... 100 ns             
Compress 1K bytes with Zippy ............. 3,000 ns  =   3 µs
Send 2K bytes over 1 Gbps network ....... 20,000 ns  =  20 µs
SSD random read ........................ 150,000 ns  = 150 µs

Read 1 MB sequentially from memory ..... 250,000 ns = 250 µs
	// g++ -mfma -mf16c -mavx512f -mavx512vnni -mavx512vl

	#include <immintrin.h>

	#include <stdio.h>

	static void print(const __m512& _x)
	{
	__attribute__((aligned(64)))
	float a[16];