Last active
August 7, 2019 02:12
-
-
Save proywm/36561da2a99f4ada8eafe8bdbdf3bbb7 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
all: | |
gcc -g -O0 *.c -o record_sample |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
$ timeout 1 ./record_sample | |
model 94 cpu_family 6 | |
proessor 30 | |
Using event MEM_LOAD_RETIRED:L1_MISS | |
precise_ip 3 pe.config1 0 | |
sys_perf_event_open(0x7ffcd7b2fab0,0,-1,-1,0 | |
type: 4 | |
size: 112 | |
config: 5308d1 (5441745) | |
sample_period: 10000 | |
sample_type: 4105 | |
read_format: 12 | |
disabled: 1 | |
inherit: 0 | |
pinned: 0 | |
exclusive: 0 | |
exclude_user: 0 | |
exclude_kernel: 1 | |
exclude_hv: 0 | |
exclude_idle: 0 | |
mmap: 0 | |
comm: 0 | |
freq: 0 | |
inherit_stat: 0 | |
enable_on_exec: 0 | |
task: 0 | |
watermark: 0 | |
Returned -1 22 Invalid argument | |
Error opening leader 5308d1 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#define _GNU_SOURCE 1 | |
#include <stdio.h> | |
#include <stdlib.h> | |
#include <stdint.h> | |
#include <stdbool.h> | |
#include <string.h> | |
#include <unistd.h> | |
#include <fcntl.h> | |
#include <errno.h> | |
#include <signal.h> | |
#include <sys/mman.h> | |
#include <sys/ioctl.h> | |
#include <asm/unistd.h> | |
#include <sys/prctl.h> | |
#include <linux/perf_event.h> | |
#include <asm/perf_regs.h> | |
#include <linux/hw_breakpoint.h> | |
#define VENDOR_UNKNOWN -1 | |
#define VENDOR_INTEL 1 | |
#define VENDOR_AMD 2 | |
#define VENDOR_IBM 3 | |
#define VENDOR_ARM 4 | |
#define PROCESSOR_UNKNOWN -1 | |
#define PROCESSOR_PENTIUM_PRO 1 | |
#define PROCESSOR_PENTIUM_II 2 | |
#define PROCESSOR_PENTIUM_III 3 | |
#define PROCESSOR_PENTIUM_4 4 | |
#define PROCESSOR_PENTIUM_M 5 | |
#define PROCESSOR_COREDUO 6 | |
#define PROCESSOR_CORE2 7 | |
#define PROCESSOR_NEHALEM 8 | |
#define PROCESSOR_NEHALEM_EX 9 | |
#define PROCESSOR_WESTMERE 10 | |
#define PROCESSOR_WESTMERE_EX 11 | |
#define PROCESSOR_SANDYBRIDGE 12 | |
#define PROCESSOR_ATOM 13 | |
#define PROCESSOR_K7 14 | |
#define PROCESSOR_K8 15 | |
#define PROCESSOR_AMD_FAM10H 16 | |
#define PROCESSOR_AMD_FAM11H 17 | |
#define PROCESSOR_AMD_FAM14H 18 | |
#define PROCESSOR_AMD_FAM15H 19 | |
#define PROCESSOR_IVYBRIDGE 20 | |
#define PROCESSOR_KNIGHTSCORNER 21 | |
#define PROCESSOR_SANDYBRIDGE_EP 22 | |
#define PROCESSOR_AMD_FAM16H 23 | |
#define PROCESSOR_IVYBRIDGE_EP 24 | |
#define PROCESSOR_HASWELL 25 | |
#define PROCESSOR_ATOM_CEDARVIEW 26 | |
#define PROCESSOR_ATOM_SILVERMONT 27 | |
#define PROCESSOR_BROADWELL 28 | |
#define PROCESSOR_HASWELL_EP 29 | |
#define PROCESSOR_SKYLAKE 30 | |
#define PROCESSOR_POWER3 103 | |
#define PROCESSOR_POWER4 104 | |
#define PROCESSOR_POWER5 105 | |
#define PROCESSOR_POWER6 106 | |
#define PROCESSOR_POWER7 107 | |
#define PROCESSOR_CORTEX_A8 200 | |
#define PROCESSOR_CORTEX_A9 201 | |
#define PROCESSOR_CORTEX_A5 202 | |
#define PROCESSOR_CORTEX_A15 203 | |
#define PROCESSOR_ARM1176 204 | |
#define PROCESSOR_CORTEX_A7 205 | |
#define PROCESSOR_CORTEX_A57 206 | |
#define PROCESSOR_CORTEX_A53 207 | |
#define START_SAMPLING 1 | |
#define STOP_SAMPLING_AND_WRITE_TO_FILE 2 | |
#define DO_SAMPLE 3 | |
#define GET_MESSAGE_LEN 4 | |
#define GET_DATA 5 | |
#define MAX_TEST_EVENTS 16 | |
#define rmb() asm volatile("lfence":::"memory") | |
#define MMAP_DATA_SIZE 8 | |
#define SAMPLE_FREQUENCY 10000 | |
#define DBG_FREQUENCY 5 | |
#define BUFFER_LENGTH 256 ///< The buffer length (crude but fine) | |
#define NUM_REGS PERF_REG_X86_64_MAX | |
static char reg_names[NUM_REGS][8]= | |
{"RAX","RBX","RCX","RDX","RSI","RDI","RBP","RSP", | |
"RIP","RFLAGS","CS","SS","DS","ES","FS","GS", | |
"R8","R9","R10","R11","R12","R13","R14","R15"}; | |
static int count_total=0; | |
static int bp_count_total=0; | |
static char *our_mmap; | |
static char *bp_mmap; | |
static long long prev_head; | |
static long long prev_head_bp; | |
static int quiet; | |
static long long global_sample_type; | |
static long long global_sample_regs_user; | |
static long long global_sample_type_BP; | |
static long long global_sample_regs_user_BP; | |
long long dbgAddr; | |
long long dbgSrc; | |
bool hasPrevious = false; | |
int dbgFdPtr; | |
static int fdLKM; | |
int fdPreciseSample; | |
int fdBP; | |
int setBP = 0; | |
static int processor_type=-2; | |
static int processor_vendor=-2; | |
struct perf_event_attr peBP; | |
#define NUM_VALUES 4 | |
struct recordValues { | |
char names[NUM_VALUES]; | |
unsigned long addresses[NUM_VALUES]; | |
unsigned long values[NUM_VALUES]; | |
}; | |
struct recordValues wp; | |
#include <execinfo.h> | |
void stack_trace(){ | |
void *trace[16]; | |
char **messages = (char **)NULL; | |
int i, trace_size = 0; | |
trace_size = backtrace(trace, 16); | |
messages = backtrace_symbols(trace, trace_size); | |
printf("[stack trace]>>>\n"); | |
for (i=0; i < trace_size; i++) | |
printf("%s\n", messages[i]); | |
printf("<<<[stack trace]\n"); | |
free(messages); | |
} | |
#include <inttypes.h> | |
#define SUBDBG printf | |
static long | |
perf_event_open(struct perf_event_attr *hw_event, pid_t pid, | |
int cpu, int group_fd, unsigned long flags) | |
{ | |
int ret; | |
SUBDBG("sys_perf_event_open(%p,%d,%d,%d,%lx\n",hw_event,pid,cpu,group_fd,flags); | |
SUBDBG(" type: %d\n",hw_event->type); | |
SUBDBG(" size: %d\n",hw_event->size); | |
SUBDBG(" config: %"PRIx64" (%"PRIu64")\n",hw_event->config, | |
hw_event->config); | |
SUBDBG(" sample_period: %"PRIu64"\n",hw_event->sample_period); | |
SUBDBG(" sample_type: %"PRIu64"\n",hw_event->sample_type); | |
SUBDBG(" read_format: %"PRIu64"\n",hw_event->read_format); | |
SUBDBG(" disabled: %d\n",hw_event->disabled); | |
SUBDBG(" inherit: %d\n",hw_event->inherit); | |
SUBDBG(" pinned: %d\n",hw_event->pinned); | |
SUBDBG(" exclusive: %d\n",hw_event->exclusive); | |
SUBDBG(" exclude_user: %d\n",hw_event->exclude_user); | |
SUBDBG(" exclude_kernel: %d\n",hw_event->exclude_kernel); | |
SUBDBG(" exclude_hv: %d\n",hw_event->exclude_hv); | |
SUBDBG(" exclude_idle: %d\n",hw_event->exclude_idle); | |
SUBDBG(" mmap: %d\n",hw_event->mmap); | |
SUBDBG(" comm: %d\n",hw_event->comm); | |
SUBDBG(" freq: %d\n",hw_event->freq); | |
SUBDBG(" inherit_stat: %d\n",hw_event->inherit_stat); | |
SUBDBG(" enable_on_exec: %d\n",hw_event->enable_on_exec); | |
SUBDBG(" task: %d\n",hw_event->task); | |
SUBDBG(" watermark: %d\n",hw_event->watermark); | |
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu, | |
group_fd, flags); | |
SUBDBG("Returned %d %d %s\n",ret, | |
ret<0?errno:0, | |
ret<0?strerror(errno):" "); | |
return ret; | |
} | |
pid_t gettid() { | |
return syscall(__NR_gettid); | |
} | |
static int detect_processor_cpuinfo(void) { | |
FILE *fff; | |
int cpu_family=0,model=0; | |
char string[BUFSIZ]; | |
fff=fopen("/proc/cpuinfo","r"); | |
if (fff==NULL) { | |
fprintf(stderr,"ERROR! Can't open /proc/cpuinfo\n"); | |
return PROCESSOR_UNKNOWN; | |
} | |
while(1) { | |
if (fgets(string,BUFSIZ,fff)==NULL) break; | |
/* Power6 */ | |
if (strstr(string,"POWER6")) { | |
processor_vendor=VENDOR_IBM; | |
processor_type=PROCESSOR_POWER6; | |
return 0; | |
} | |
/* ARM */ | |
if (strstr(string,"CPU part")) { | |
processor_vendor=VENDOR_ARM; | |
if (strstr(string,"0xc05")) { | |
processor_type=PROCESSOR_CORTEX_A5; | |
return 0; | |
} | |
if (strstr(string,"0xc09")) { | |
processor_type=PROCESSOR_CORTEX_A9; | |
return 0; | |
} | |
if (strstr(string,"0xc08")) { | |
processor_type=PROCESSOR_CORTEX_A8; | |
return 0; | |
} | |
if (strstr(string,"0xc07")) { | |
processor_type=PROCESSOR_CORTEX_A7; | |
return 0; | |
} | |
if (strstr(string,"0xc0f")) { | |
processor_type=PROCESSOR_CORTEX_A15; | |
return 0; | |
} | |
if (strstr(string,"0xb76")) { | |
processor_type=PROCESSOR_ARM1176; | |
return 0; | |
} | |
// Cortex R4 - 0xc14 | |
// Cortex R5 - 0xc15 | |
// ARM1136 - 0xb36 | |
// ARM1156 - 0xb56 | |
// ARM1176 - 0xb76 | |
// ARM11 MPCore - 0xb02 | |
} | |
/* vendor */ | |
if (strstr(string,"vendor_id")) { | |
if (strstr(string,"GenuineIntel")) { | |
processor_vendor=VENDOR_INTEL; | |
} | |
if (strstr(string,"AuthenticAMD")) { | |
processor_vendor=VENDOR_AMD; | |
} | |
} | |
/* family */ | |
if (strstr(string,"cpu family")) { | |
sscanf(string,"%*s %*s %*s %d",&cpu_family); | |
} | |
/* model */ | |
if ((strstr(string,"model")) && (!strstr(string,"model name")) ) { | |
sscanf(string,"%*s %*s %d",&model); | |
} | |
} | |
fclose(fff); | |
if (processor_vendor==VENDOR_AMD) { | |
switch(cpu_family) { | |
case 0x6: | |
processor_type=PROCESSOR_K7; | |
break; | |
case 0xf: | |
processor_type=PROCESSOR_K8; | |
break; | |
case 0x10: | |
processor_type=PROCESSOR_AMD_FAM10H; | |
break; | |
case 0x11: | |
processor_type=PROCESSOR_AMD_FAM11H; | |
break; | |
case 0x14: | |
processor_type=PROCESSOR_AMD_FAM14H; | |
break; | |
case 0x15: | |
processor_type=PROCESSOR_AMD_FAM15H; | |
break; | |
case 0x16: | |
processor_type=PROCESSOR_AMD_FAM16H; | |
break; | |
default: | |
processor_type=PROCESSOR_UNKNOWN; | |
break; | |
} | |
return 0; | |
} | |
if (processor_vendor==VENDOR_INTEL) { | |
printf("model %d cpu_family %d\n", model, cpu_family); | |
if (cpu_family==6) { | |
switch(model) { | |
case 1: | |
processor_type=PROCESSOR_PENTIUM_PRO; | |
break; | |
case 3: | |
case 5: | |
case 6: | |
processor_type=PROCESSOR_PENTIUM_II; | |
break; | |
case 7: | |
case 8: | |
case 10: | |
case 11: | |
processor_type=PROCESSOR_PENTIUM_III; | |
break; | |
case 9: | |
case 13: | |
processor_type=PROCESSOR_PENTIUM_M; | |
break; | |
case 14: | |
processor_type=PROCESSOR_COREDUO; | |
break; | |
case 15: | |
case 22: | |
case 23: | |
case 29: | |
processor_type=PROCESSOR_CORE2; | |
break; | |
case 28: | |
case 38: | |
case 39: | |
case 53: | |
processor_type=PROCESSOR_ATOM; | |
break; | |
case 54: | |
processor_type=PROCESSOR_ATOM_CEDARVIEW; | |
break; | |
case 55: | |
case 77: | |
processor_type=PROCESSOR_ATOM_SILVERMONT; | |
break; | |
case 26: | |
case 30: | |
case 31: | |
processor_type=PROCESSOR_NEHALEM; | |
break; | |
case 46: | |
processor_type=PROCESSOR_NEHALEM_EX; | |
break; | |
case 37: | |
case 44: | |
processor_type=PROCESSOR_WESTMERE; | |
break; | |
case 47: | |
processor_type=PROCESSOR_WESTMERE_EX; | |
break; | |
case 42: | |
processor_type=PROCESSOR_SANDYBRIDGE; | |
break; | |
case 45: | |
processor_type=PROCESSOR_SANDYBRIDGE_EP; | |
break; | |
case 58: | |
processor_type=PROCESSOR_IVYBRIDGE; | |
break; | |
case 62: | |
processor_type=PROCESSOR_IVYBRIDGE_EP; | |
break; | |
case 60: | |
case 69: | |
case 70: | |
processor_type=PROCESSOR_HASWELL; | |
break; | |
case 63: | |
processor_type=PROCESSOR_HASWELL_EP; | |
break; | |
case 61: | |
case 71: | |
case 79: | |
processor_type=PROCESSOR_BROADWELL; | |
break; | |
case 94: | |
case 85: | |
processor_type=PROCESSOR_SKYLAKE; | |
break; | |
default: | |
processor_type=PROCESSOR_UNKNOWN; | |
} | |
return 0; | |
} | |
if (cpu_family==11) { | |
processor_type=PROCESSOR_KNIGHTSCORNER; | |
return 0; | |
} | |
if (cpu_family==15) { | |
processor_type=PROCESSOR_PENTIUM_4; | |
return 0; | |
} | |
} | |
processor_type=PROCESSOR_UNKNOWN; | |
return 0; | |
} | |
int detect_processor(void) { | |
if (processor_type==-2) { | |
detect_processor_cpuinfo(); | |
} | |
return processor_type; | |
} | |
int detect_vendor(void) { | |
if (processor_vendor==-2) { | |
detect_processor_cpuinfo(); | |
} | |
return processor_vendor; | |
} | |
/* Test example starts */ | |
#define MATRIX_SIZE 512 | |
static double a[MATRIX_SIZE][MATRIX_SIZE]; | |
static double b[MATRIX_SIZE][MATRIX_SIZE]; | |
static double c[MATRIX_SIZE][MATRIX_SIZE]; | |
static void naive_matrix_multiply(int quiet) { | |
double s; | |
int i,j,k; | |
for(i=0;i<MATRIX_SIZE;i++) { | |
for(j=0;j<MATRIX_SIZE;j++) { | |
a[i][j]=(double)i*(double)j; | |
b[i][j]=(double)i/(double)(j+5); | |
} | |
} | |
for(j=0;j<MATRIX_SIZE;j++) { | |
for(i=0;i<MATRIX_SIZE;i++) { | |
s=0; | |
for(k=0;k<MATRIX_SIZE;k++) { | |
s+=a[i][k]*b[k][j]; | |
} | |
c[i][j] = s; | |
} | |
} | |
s=0.0; | |
for(i=0;i<MATRIX_SIZE;i++) { | |
for(j=0;j<MATRIX_SIZE;j++) { | |
s+=c[i][j]; | |
} | |
} | |
if (!quiet) printf("Matrix multiply sum: s=%lf\n",s); | |
return; | |
} | |
/*Test example ends*/ | |
#define RAW_NONE 0 | |
#define RAW_IBS_FETCH 1 | |
#define RAW_IBS_OP 2 | |
struct validate_values { | |
int pid; | |
int tid; | |
int events; | |
unsigned long branch_low; | |
unsigned long branch_high; | |
}; | |
/* Urgh who designed this interface */ | |
static int handle_struct_read_format(unsigned char *sample, | |
int read_format, | |
struct validate_values *validation, | |
int quiet) { | |
int offset=0,i; | |
if (read_format & PERF_FORMAT_GROUP) { | |
long long nr,time_enabled,time_running; | |
memcpy(&nr,&sample[offset],sizeof(long long)); | |
if (!quiet) printf("\t\tNumber: %lld ",nr); | |
offset+=8; | |
if (validation) { | |
if (validation->events!=nr) { | |
fprintf(stderr,"Error! Wrong number " | |
"of events %d != %lld\n", | |
validation->events,nr); | |
} | |
} | |
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | |
memcpy(&time_enabled,&sample[offset],sizeof(long long)); | |
if (!quiet) printf("enabled: %lld ",time_enabled); | |
offset+=8; | |
} | |
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | |
memcpy(&time_running,&sample[offset],sizeof(long long)); | |
if (!quiet) printf("running: %lld ",time_running); | |
offset+=8; | |
} | |
if (!quiet) printf("\n"); | |
for(i=0;i<nr;i++) { | |
long long value, id; | |
memcpy(&value,&sample[offset],sizeof(long long)); | |
if (!quiet) printf("\t\t\tValue: %lld ",value); | |
offset+=8; | |
if (read_format & PERF_FORMAT_ID) { | |
memcpy(&id,&sample[offset],sizeof(long long)); | |
if (!quiet) printf("id: %lld ",id); | |
offset+=8; | |
} | |
if (!quiet) printf("\n"); | |
} | |
} | |
else { | |
long long value,time_enabled,time_running,id; | |
memcpy(&value,&sample[offset],sizeof(long long)); | |
if (!quiet) printf("\t\tValue: %lld ",value); | |
offset+=8; | |
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) { | |
memcpy(&time_enabled,&sample[offset],sizeof(long long)); | |
if (!quiet) printf("enabled: %lld ",time_enabled); | |
offset+=8; | |
} | |
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) { | |
memcpy(&time_running,&sample[offset],sizeof(long long)); | |
if (!quiet) printf("running: %lld ",time_running); | |
offset+=8; | |
} | |
if (read_format & PERF_FORMAT_ID) { | |
memcpy(&id,&sample[offset],sizeof(long long)); | |
if (!quiet) printf("id: %lld ",id); | |
offset+=8; | |
} | |
if (!quiet) printf("\n"); | |
} | |
return offset; | |
} | |
static int print_regs(int quiet,long long abi,long long reg_mask, | |
unsigned char *data) { | |
int return_offset=0; | |
int num_regs=NUM_REGS; | |
int i; | |
unsigned long long reg_value; | |
if (!quiet) printf("\t\tReg mask %llx\n",reg_mask); | |
for(i=0;i<64;i++) { | |
if (reg_mask&1ULL<<i) { | |
if (!quiet) { | |
memcpy(®_value,&data[return_offset],8); | |
if (i<num_regs) { | |
printf("\t\t%s : ",reg_names[i]); | |
} | |
else { | |
printf("\t\t??? : "); | |
} | |
printf("%llx\n",reg_value); | |
} | |
return_offset+=8; | |
} | |
} | |
return return_offset; | |
} | |
static int dump_raw_ibs_fetch(unsigned char *data, int size) { | |
unsigned long long *msrs; | |
unsigned int *leftover; | |
msrs=(unsigned long long *)(data+4); | |
leftover=(unsigned int *)(data); | |
printf("\t\tHeader: %x\n",leftover[0]); | |
printf("\t\tMSR IBS_FETCH_CONTROL %llx\n",msrs[0]); | |
printf("\t\t\tIBS_RAND_EN: %d\n",!!(msrs[0]&1ULL<<57)); | |
printf("\t\t\tL2 iTLB Miss: %d\n",!!(msrs[0]&1ULL<<56)); | |
printf("\t\t\tL1 iTLB Miss: %d\n",!!(msrs[0]&1ULL<<55)); | |
printf("\t\t\tL1TLB page size: "); | |
switch( (msrs[0]>>53)&0x3) { | |
case 0: printf("4kB\n"); break; | |
case 1: printf("2MB\n"); break; | |
case 2: printf("1GB\n"); break; | |
default: printf("Resreved\n"); break; | |
} | |
printf("\t\t\tFetch Physical Address Valid: %d\n",!!(msrs[0]&1ULL<<52)); | |
printf("\t\t\ticache miss: %d\n",!!(msrs[0]&1ULL<<51)); | |
printf("\t\t\tInstruction Fetch Complete: %d\n",!!(msrs[0]&1ULL<<50)); | |
printf("\t\t\tInstruction Fetch Valid: %d\n",!!(msrs[0]&1ULL<<49)); | |
printf("\t\t\tInstruction Fetch Enabled: %d\n",!!(msrs[0]&1ULL<<48)); | |
printf("\t\t\tInstruction Fetch Latency: %lld\n",((msrs[0]>>32)&0xffff)); | |
printf("\t\t\tInstruction Fetch Count: %lld\n",((msrs[0]>>16)&0xffff)<<4); | |
printf("\t\t\tInstruction Fetch Max Count: %lld\n",(msrs[0]&0xffff)<<4); | |
printf("\t\tMSR IBS_FETCH_LINEAR_ADDRESS %llx\n",msrs[1]); | |
printf("\t\tMSR IBS_FETCH_PHYSICAL_ADDRESS %llx\n",msrs[2]); | |
if (size>24) { | |
printf("\t\tMSR IBS_BRTARGET %llx\n",msrs[3]); | |
} | |
return 0; | |
} | |
static int dump_raw_ibs_op(unsigned char *data, int size) { | |
unsigned long long *msrs; | |
unsigned int *leftover; | |
msrs=(unsigned long long *)(data+4); | |
leftover=(unsigned int *)(data); | |
printf("\t\tHeader: %x\n",leftover[0]); | |
printf("\t\tMSR IBS_EXECUTION_CONTROL %llx\n",msrs[0]); | |
printf("\t\t\tIbsOpCurCnt: %lld\n",((msrs[0]>>32)&0x3ffffff)); | |
printf("\t\t\tIBS OpCntCtl: %d\n",!!(msrs[0]&1ULL<<19)); | |
printf("\t\t\tIBS OpVal: %d\n",!!(msrs[0]&1ULL<<18)); | |
printf("\t\t\tIBS OpEn: %d\n",!!(msrs[0]&1ULL<<17)); | |
printf("\t\t\tIbsOpMaxCnt: %lld\n",((msrs[0]&0xffff)<<4) | | |
(msrs[0]&0x3f00000)); | |
printf("\t\tMSR IBS_OP_LOGICAL_ADDRESS %llx\n",msrs[1]); | |
printf("\t\tMSR IBS_OP_DATA %llx\n",msrs[2]); | |
printf("\t\t\tRIP Invalid: %d\n",!!(msrs[2]&1ULL<<38)); | |
printf("\t\t\tBranch Retired: %d\n",!!(msrs[2]&1ULL<<37)); | |
printf("\t\t\tBranch Mispredicted: %d\n",!!(msrs[2]&1ULL<<36)); | |
printf("\t\t\tBranch Taken: %d\n",!!(msrs[2]&1ULL<<35)); | |
printf("\t\t\tReturn uop: %d\n",!!(msrs[2]&1ULL<<34)); | |
printf("\t\t\tMispredicted Return uop: %d\n",!!(msrs[2]&1ULL<<33)); | |
printf("\t\t\tTag to Retire Cycles: %lld\n",(msrs[2]>>16)&0xffff); | |
printf("\t\t\tCompletion to Retire Cycles: %lld\n",msrs[2]&0xffff); | |
printf("\t\tMSR IBS_OP_DATA2 (Northbridge) %llx\n",msrs[3]); | |
printf("\t\t\tCache Hit State: %c\n",(msrs[3]&1ULL<<5)?'O':'M'); | |
printf("\t\t\tRequest destination node: %s\n", | |
(msrs[3]&1ULL<<4)?"Same":"Different"); | |
printf("\t\t\tNorthbridge data source: "); | |
switch(msrs[3]&0x7) { | |
case 0: printf("No valid status\n"); break; | |
case 1: printf("L3\n"); break; | |
case 2: printf("Cache from another compute unit\n"); break; | |
case 3: printf("DRAM\n"); break; | |
case 4: printf("Reserved remote cache\n"); break; | |
case 5: printf("Reserved\n"); break; | |
case 6: printf("Reserved\n"); break; | |
case 7: printf("Other: MMIO/config/PCI/APIC\n"); break; | |
} | |
printf("\t\tMSR IBS_OP_DATA3 (cache) %llx\n",msrs[4]); | |
printf("\t\t\tData Cache Miss Latency: %lld\n", | |
(msrs[4]>>32)&0xffff); | |
printf("\t\t\tL2TLB data hit in 1GB page: %d\n", | |
!!(msrs[4]&1ULL<<19)); | |
printf("\t\t\tData cache physical addr valid: %d\n", | |
!!(msrs[4]&1ULL<<18)); | |
printf("\t\t\tData cache linear addr valid: %d\n", | |
!!(msrs[4]&1ULL<<17)); | |
printf("\t\t\tMAB hit: %d\n", | |
!!(msrs[4]&1ULL<<16)); | |
printf("\t\t\tData cache locked operation: %d\n", | |
!!(msrs[4]&1ULL<<15)); | |
printf("\t\t\tUncachable memory operation: %d\n", | |
!!(msrs[4]&1ULL<<14)); | |
printf("\t\t\tWrite-combining memory operation: %d\n", | |
!!(msrs[4]&1ULL<<13)); | |
printf("\t\t\tData forwarding store to load canceled: %d\n", | |
!!(msrs[4]&1ULL<<12)); | |
printf("\t\t\tData forwarding store to load operation: %d\n", | |
!!(msrs[4]&1ULL<<11)); | |
printf("\t\t\tBank conflict on load operation: %d\n", | |
!!(msrs[4]&1ULL<<9)); | |
printf("\t\t\tMisaligned access: %d\n", | |
!!(msrs[4]&1ULL<<8)); | |
printf("\t\t\tData cache miss: %d\n", | |
!!(msrs[4]&1ULL<<7)); | |
printf("\t\t\tData cache L2TLB hit in 2M: %d\n", | |
!!(msrs[4]&1ULL<<6)); | |
printf("\t\t\tData cache L2TLB hit in 1G: %d\n", | |
!!(msrs[4]&1ULL<<5)); | |
printf("\t\t\tData cache L1TLB hit in 2M: %d\n", | |
!!(msrs[4]&1ULL<<4)); | |
printf("\t\t\tData cache L2TLB miss: %d\n", | |
!!(msrs[4]&1ULL<<3)); | |
printf("\t\t\tData cache L1TLB miss: %d\n", | |
!!(msrs[4]&1ULL<<2)); | |
printf("\t\t\tOperation is a store: %d\n", | |
!!(msrs[4]&1ULL<<1)); | |
printf("\t\t\tOperation is a load: %d\n", | |
!!(msrs[4]&1ULL<<0)); | |
if (msrs[4]&1ULL<<17) { | |
printf("\t\tMSR IBS_DC_LINEAR_ADDRESS %llx\n",msrs[5]); | |
} | |
if (msrs[4]&1ULL<<18) { | |
printf("\t\tMSR IBS_DC_PHYSICAL_ADDRESS %llx\n",msrs[6]); | |
} | |
if (size>64) { | |
printf("\t\tMSR IBS_OP_DATA4 %llx\n",msrs[7]); | |
} | |
return 0; | |
} | |
long long perf_mmap_read( void *our_mmap, int mmap_size, | |
long long prev_head, | |
int sample_type, int read_format, long long reg_mask, | |
struct validate_values *validate, | |
int quiet, int *events_read, | |
int raw_type ) { | |
struct perf_event_mmap_page *control_page = our_mmap; | |
long long head,offset; | |
int i,size; | |
long long bytesize,prev_head_wrap; | |
unsigned char *data; | |
void *data_mmap=our_mmap+getpagesize(); | |
if (mmap_size==0) return 0; | |
if (control_page==NULL) { | |
fprintf(stderr,"ERROR mmap page NULL\n"); | |
return -1; | |
} | |
head=control_page->data_head; | |
rmb(); /* Must always follow read of data_head */ | |
size=head-prev_head; | |
//printf("Head: %lld Prev_head=%lld\n",head,prev_head); | |
//printf("%d new bytes\n",size); | |
bytesize=mmap_size*getpagesize(); | |
if (size>bytesize) { | |
printf("error! we overflowed the mmap buffer %d>%lld bytes\n", | |
size,bytesize); | |
} | |
data=malloc(bytesize); | |
if (data==NULL) { | |
return -1; | |
} | |
prev_head_wrap=prev_head%bytesize; | |
// printf("Copying %d bytes from %d to %d\n", | |
// bytesize-prev_head_wrap,prev_head_wrap,0); | |
memcpy(data,(unsigned char*)data_mmap + prev_head_wrap, | |
bytesize-prev_head_wrap); | |
//printf("Copying %d bytes from %d to %d\n", | |
// prev_head_wrap,0,bytesize-prev_head_wrap); | |
memcpy(data+(bytesize-prev_head_wrap),(unsigned char *)data_mmap, | |
prev_head_wrap); | |
struct perf_event_header *event; | |
offset=0; | |
if (events_read) *events_read=0; | |
while(offset<size) { | |
//printf("Offset %d Size %d\n",offset,size); | |
event = ( struct perf_event_header * ) & data[offset]; | |
/********************/ | |
/* Print event Type */ | |
/********************/ | |
if (!quiet) { | |
switch(event->type) { | |
case PERF_RECORD_MMAP: | |
printf("PERF_RECORD_MMAP"); | |
break; | |
case PERF_RECORD_LOST: | |
printf("PERF_RECORD_LOST"); | |
break; | |
case PERF_RECORD_COMM: | |
printf("PERF_RECORD_COMM"); | |
break; | |
case PERF_RECORD_EXIT: | |
printf("PERF_RECORD_EXIT"); | |
break; | |
case PERF_RECORD_THROTTLE: | |
printf("PERF_RECORD_THROTTLE"); | |
break; | |
case PERF_RECORD_UNTHROTTLE: | |
printf("PERF_RECORD_UNTHROTTLE"); | |
break; | |
case PERF_RECORD_FORK: | |
printf("PERF_RECORD_FORK"); | |
break; | |
case PERF_RECORD_READ: | |
printf("PERF_RECORD_READ"); | |
break; | |
case PERF_RECORD_SAMPLE: | |
printf("PERF_RECORD_SAMPLE [%x]",sample_type); | |
break; | |
case PERF_RECORD_MMAP2: | |
printf("PERF_RECORD_MMAP2"); | |
break; | |
case PERF_RECORD_AUX: | |
printf("PERF_RECORD_AUX"); | |
break; | |
case PERF_RECORD_ITRACE_START: | |
printf("PERF_RECORD_ITRACE_START"); | |
break; | |
case PERF_RECORD_LOST_SAMPLES: | |
printf("PERF_RECORD_LOST_SAMPLES"); | |
break; | |
case PERF_RECORD_SWITCH: | |
printf("PERF_RECORD_SWITCH"); | |
break; | |
case PERF_RECORD_SWITCH_CPU_WIDE: | |
printf("PERF_RECORD_SWITCH_CPU_WIDE"); | |
break; | |
default: printf("UNKNOWN %d",event->type); | |
break; | |
} | |
printf(", MISC=%d (",event->misc); | |
setBP = 0; | |
switch(event->misc & PERF_RECORD_MISC_CPUMODE_MASK) { | |
case PERF_RECORD_MISC_CPUMODE_UNKNOWN: | |
printf("PERF_RECORD_MISC_CPUMODE_UNKNOWN"); break; | |
case PERF_RECORD_MISC_KERNEL: | |
{ | |
setBP = 1; | |
printf("PERF_RECORD_MISC_KERNEL"); break; | |
} | |
case PERF_RECORD_MISC_USER: | |
{ | |
setBP = 0; | |
printf("PERF_RECORD_MISC_USER"); break; | |
} | |
case PERF_RECORD_MISC_HYPERVISOR: | |
printf("PERF_RECORD_MISC_HYPERVISOR"); break; | |
case PERF_RECORD_MISC_GUEST_KERNEL: | |
printf("PERF_RECORD_MISC_GUEST_KERNEL"); break; | |
case PERF_RECORD_MISC_GUEST_USER: | |
printf("PERF_RECORD_MISC_GUEST_USER"); break; | |
default: | |
printf("Unknown %d!\n",event->misc); break; | |
} | |
/* All three have the same value */ | |
if (event->misc & PERF_RECORD_MISC_MMAP_DATA) { | |
if (event->type==PERF_RECORD_MMAP) { | |
printf(",PERF_RECORD_MISC_MMAP_DATA "); | |
} | |
else if (event->type==PERF_RECORD_COMM) { | |
printf(",PERF_RECORD_MISC_COMM_EXEC "); | |
} | |
else if ((event->type==PERF_RECORD_SWITCH) || | |
(event->type==PERF_RECORD_SWITCH_CPU_WIDE)) { | |
printf(",PERF_RECORD_MISC_SWITCH_OUT "); | |
} | |
else { | |
printf("UNKNOWN ALIAS!!! "); | |
} | |
} | |
if (event->misc & PERF_RECORD_MISC_EXACT_IP) { | |
printf(",PERF_RECORD_MISC_EXACT_IP "); | |
} | |
if (event->misc & PERF_RECORD_MISC_EXT_RESERVED) { | |
printf(",PERF_RECORD_MISC_EXT_RESERVED "); | |
} | |
printf("), Size=%d\n",event->size); | |
} | |
offset+=8; /* skip header */ | |
/***********************/ | |
/* Print event Details */ | |
/***********************/ | |
switch(event->type) { | |
/* Lost */ | |
case PERF_RECORD_LOST: { | |
long long id,lost; | |
memcpy(&id,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tID: %lld\n",id); | |
offset+=8; | |
memcpy(&lost,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tLOST: %lld\n",lost); | |
offset+=8; | |
} | |
break; | |
/* COMM */ | |
case PERF_RECORD_COMM: { | |
int pid,tid,string_size; | |
char *string; | |
memcpy(&pid,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tPID: %d\n",pid); | |
offset+=4; | |
memcpy(&tid,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tTID: %d\n",tid); | |
offset+=4; | |
/* FIXME: sample_id handling? */ | |
/* two ints plus the 64-bit header */ | |
string_size=event->size-16; | |
string=calloc(string_size,sizeof(char)); | |
memcpy(string,&data[offset],string_size); | |
if (!quiet) printf("\tcomm: %s\n",string); | |
offset+=string_size; | |
if (string) free(string); | |
} | |
break; | |
/* Fork */ | |
case PERF_RECORD_FORK: { | |
int pid,ppid,tid,ptid; | |
long long fork_time; | |
memcpy(&pid,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tPID: %d\n",pid); | |
offset+=4; | |
memcpy(&ppid,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tPPID: %d\n",ppid); | |
offset+=4; | |
memcpy(&tid,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tTID: %d\n",tid); | |
offset+=4; | |
memcpy(&ptid,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tPTID: %d\n",ptid); | |
offset+=4; | |
memcpy(&fork_time,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tTime: %lld\n",fork_time); | |
offset+=8; | |
} | |
break; | |
/* mmap */ | |
case PERF_RECORD_MMAP: { | |
int pid,tid,string_size; | |
long long address,len,pgoff; | |
char *filename; | |
memcpy(&pid,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tPID: %d\n",pid); | |
offset+=4; | |
memcpy(&tid,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tTID: %d\n",tid); | |
offset+=4; | |
memcpy(&address,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tAddress: %llx\n",address); | |
offset+=8; | |
memcpy(&len,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tLength: %llx\n",len); | |
offset+=8; | |
memcpy(&pgoff,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tPage Offset: %llx\n",pgoff); | |
offset+=8; | |
string_size=event->size-40; | |
filename=calloc(string_size,sizeof(char)); | |
memcpy(filename,&data[offset],string_size); | |
if (!quiet) printf("\tFilename: %s\n",filename); | |
offset+=string_size; | |
if (filename) free(filename); | |
} | |
break; | |
/* mmap2 */ | |
case PERF_RECORD_MMAP2: { | |
int pid,tid,string_size; | |
long long address,len,pgoff; | |
int major,minor; | |
long long ino,ino_generation; | |
int prot,flags; | |
char *filename; | |
memcpy(&pid,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tPID: %d\n",pid); | |
offset+=4; | |
memcpy(&tid,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tTID: %d\n",tid); | |
offset+=4; | |
memcpy(&address,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tAddress: %llx\n",address); | |
offset+=8; | |
memcpy(&len,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tLength: %llx\n",len); | |
offset+=8; | |
memcpy(&pgoff,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tPage Offset: %llx\n",pgoff); | |
offset+=8; | |
memcpy(&major,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tMajor: %d\n",major); | |
offset+=4; | |
memcpy(&minor,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tMinor: %d\n",minor); | |
offset+=4; | |
memcpy(&ino,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tIno: %llx\n",ino); | |
offset+=8; | |
memcpy(&ino_generation,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tIno generation: %llx\n",ino_generation); | |
offset+=8; | |
memcpy(&prot,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tProt: %d\n",prot); | |
offset+=4; | |
memcpy(&flags,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tFlags: %d\n",flags); | |
offset+=4; | |
string_size=event->size-72; | |
filename=calloc(string_size,sizeof(char)); | |
memcpy(filename,&data[offset],string_size); | |
if (!quiet) printf("\tFilename: %s\n",filename); | |
offset+=string_size; | |
if (filename) free(filename); | |
} | |
break; | |
/* Exit */ | |
case PERF_RECORD_EXIT: { | |
int pid,ppid,tid,ptid; | |
long long fork_time; | |
memcpy(&pid,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tPID: %d\n",pid); | |
offset+=4; | |
memcpy(&ppid,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tPPID: %d\n",ppid); | |
offset+=4; | |
memcpy(&tid,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tTID: %d\n",tid); | |
offset+=4; | |
memcpy(&ptid,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tPTID: %d\n",ptid); | |
offset+=4; | |
memcpy(&fork_time,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tTime: %lld\n",fork_time); | |
offset+=8; | |
} | |
break; | |
/* Throttle/Unthrottle */ | |
case PERF_RECORD_THROTTLE: | |
case PERF_RECORD_UNTHROTTLE: { | |
long long throttle_time,id,stream_id; | |
memcpy(&throttle_time,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tTime: %lld\n",throttle_time); | |
offset+=8; | |
memcpy(&id,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tID: %lld\n",id); | |
offset+=8; | |
memcpy(&stream_id,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tStream ID: %lld\n",stream_id); | |
offset+=8; | |
} | |
break; | |
/* Sample */ | |
case PERF_RECORD_SAMPLE: | |
if (sample_type & PERF_SAMPLE_IP) { | |
long long ip; | |
memcpy(&ip,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tPERF_SAMPLE_IP, IP: %llx\n",ip); | |
offset+=8; | |
} | |
if (sample_type & PERF_SAMPLE_TID) { | |
int pid, tid; | |
memcpy(&pid,&data[offset],sizeof(int)); | |
memcpy(&tid,&data[offset+4],sizeof(int)); | |
if (validate) { | |
if (validate->pid!=pid) { | |
fprintf(stderr,"Error, pid %d != %d\n", | |
validate->pid,pid); | |
} | |
} | |
if (!quiet) { | |
printf("\tPERF_SAMPLE_TID, pid: %d tid %d\n",pid,tid); | |
} | |
offset+=8; | |
} | |
if (sample_type & PERF_SAMPLE_TIME) { | |
long long time; | |
memcpy(&time,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tPERF_SAMPLE_TIME, time: %lld\n",time); | |
offset+=8; | |
} | |
if (sample_type & PERF_SAMPLE_ADDR) { | |
long long addr; | |
memcpy(&addr,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tPERF_SAMPLE_ADDR, addr: %llx\n",addr); | |
dbgAddr = addr; | |
offset+=8; | |
} | |
if (sample_type & PERF_SAMPLE_ID) { | |
long long sample_id; | |
memcpy(&sample_id,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tPERF_SAMPLE_ID, sample_id: %lld\n",sample_id); | |
offset+=8; | |
} | |
if (sample_type & PERF_SAMPLE_STREAM_ID) { | |
long long sample_stream_id; | |
memcpy(&sample_stream_id,&data[offset],sizeof(long long)); | |
if (!quiet) { | |
printf("\tPERF_SAMPLE_STREAM_ID, sample_stream_id: %lld\n",sample_stream_id); | |
} | |
offset+=8; | |
} | |
if (sample_type & PERF_SAMPLE_CPU) { | |
int cpu, res; | |
memcpy(&cpu,&data[offset],sizeof(int)); | |
memcpy(&res,&data[offset+4],sizeof(int)); | |
if (!quiet) printf("\tPERF_SAMPLE_CPU, cpu: %d res %d\n",cpu,res); | |
offset+=8; | |
} | |
if (sample_type & PERF_SAMPLE_PERIOD) { | |
long long period; | |
memcpy(&period,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tPERF_SAMPLE_PERIOD, period: %lld\n",period); | |
offset+=8; | |
} | |
if (sample_type & PERF_SAMPLE_READ) { | |
int length; | |
if (!quiet) printf("\tPERF_SAMPLE_READ, read_format\n"); | |
length=handle_struct_read_format(&data[offset], | |
read_format, | |
validate,quiet); | |
if (length>=0) offset+=length; | |
} | |
if (sample_type & PERF_SAMPLE_CALLCHAIN) { | |
long long nr,ip; | |
memcpy(&nr,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tPERF_SAMPLE_CALLCHAIN, callchain length: %lld\n",nr); | |
offset+=8; | |
for(i=0;i<nr;i++) { | |
memcpy(&ip,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\t\t ip[%d]: %llx\n",i,ip); | |
offset+=8; | |
} | |
} | |
if (sample_type & PERF_SAMPLE_RAW) { | |
int size; | |
memcpy(&size,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tPERF_SAMPLE_RAW, Raw length: %d\n",size); | |
offset+=4; | |
if (!quiet) { | |
if (raw_type==RAW_IBS_FETCH) { | |
dump_raw_ibs_fetch(&data[offset],size); | |
} | |
else if (raw_type==RAW_IBS_OP) { | |
dump_raw_ibs_op(&data[offset],size); | |
} | |
else { | |
printf("\t\t"); | |
for(i=0;i<size;i++) { | |
printf("%d ",data[offset+i]); | |
} | |
printf("\n"); | |
} | |
} | |
offset+=size; | |
} | |
if (sample_type & PERF_SAMPLE_BRANCH_STACK) { | |
long long bnr; | |
memcpy(&bnr,&data[offset],sizeof(long long)); | |
if (!quiet) { | |
printf("\tPERF_SAMPLE_BRANCH_STACK, branch_stack entries: %lld\n",bnr); | |
} | |
offset+=8; | |
for(i=0;i<bnr;i++) { | |
long long from,to,flags; | |
/* From value */ | |
memcpy(&from,&data[offset],sizeof(long long)); | |
offset+=8; | |
/* Could be more complete here */ | |
if (validate) { | |
if (from < validate->branch_low) { | |
fprintf(stderr,"Error! branch out of bounds!\n"); | |
} | |
} | |
/* To Value */ | |
memcpy(&to,&data[offset],sizeof(long long)); | |
offset+=8; | |
if (!quiet) { | |
printf("\t\t lbr[%d]: %llx %llx ", | |
i,from,to); | |
} | |
/* Flags */ | |
memcpy(&flags,&data[offset],sizeof(long long)); | |
offset+=8; | |
if (!quiet) { | |
if (flags==0) printf("0"); | |
if (flags&1) { | |
printf("MISPREDICTED "); | |
flags&=~2; | |
} | |
if (flags&2) { | |
printf("PREDICTED "); | |
flags&=~2; | |
} | |
if (flags&4) { | |
printf("IN_TRANSACTION "); | |
flags&=~4; | |
} | |
if (flags&8) { | |
printf("TRANSACTION_ABORT "); | |
flags&=~8; | |
} | |
printf("\n"); | |
} | |
} | |
} | |
if (sample_type & PERF_SAMPLE_REGS_USER) { | |
long long abi; | |
memcpy(&abi,&data[offset],sizeof(long long)); | |
if (!quiet) { | |
printf("\tPERF_SAMPLE_REGS_USER, ABI: "); | |
if (abi==PERF_SAMPLE_REGS_ABI_NONE) printf ("PERF_SAMPLE_REGS_ABI_NONE"); | |
if (abi==PERF_SAMPLE_REGS_ABI_32) printf("PERF_SAMPLE_REGS_ABI_32"); | |
if (abi==PERF_SAMPLE_REGS_ABI_64) printf("PERF_SAMPLE_REGS_ABI_64"); | |
printf("\n"); | |
} | |
offset+=8; | |
offset+=print_regs(quiet,abi,reg_mask, | |
&data[offset]); | |
if (!quiet) printf("\n"); | |
} | |
if (sample_type & PERF_SAMPLE_REGS_INTR) { | |
long long abi; | |
memcpy(&abi,&data[offset],sizeof(long long)); | |
if (!quiet) { | |
printf("\tPERF_SAMPLE_REGS_INTR, ABI: "); | |
if (abi==PERF_SAMPLE_REGS_ABI_NONE) printf ("PERF_SAMPLE_REGS_ABI_NONE"); | |
if (abi==PERF_SAMPLE_REGS_ABI_32) printf("PERF_SAMPLE_REGS_ABI_32"); | |
if (abi==PERF_SAMPLE_REGS_ABI_64) printf("PERF_SAMPLE_REGS_ABI_64"); | |
printf("\n"); | |
} | |
offset+=8; | |
offset+=print_regs(quiet,abi,reg_mask, | |
&data[offset]); | |
if (!quiet) printf("\n"); | |
} | |
if (sample_type & PERF_SAMPLE_STACK_USER) { | |
long long size,dyn_size; | |
int *stack_data; | |
int k; | |
memcpy(&size,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tPERF_SAMPLE_STACK_USER, Requested size: %lld\n",size); | |
offset+=8; | |
stack_data=malloc(size); | |
memcpy(stack_data,&data[offset],size); | |
offset+=size; | |
memcpy(&dyn_size,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\t\tDynamic (used) size: %lld\n",dyn_size); | |
offset+=8; | |
if (!quiet) printf("\t\t"); | |
for(k=0;k<dyn_size;k+=4) { | |
if (!quiet) printf("0x%x ",stack_data[k]); | |
} | |
free(stack_data); | |
if (!quiet) printf("\n"); | |
} | |
if (sample_type & PERF_SAMPLE_WEIGHT) { | |
long long weight; | |
memcpy(&weight,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tPERF_SAMPLE_WEIGHT, Weight: %lld ",weight); | |
offset+=8; | |
if (!quiet) printf("\n"); | |
} | |
if (sample_type & PERF_SAMPLE_DATA_SRC) { | |
long long src; | |
memcpy(&src,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tPERF_SAMPLE_DATA_SRC, Raw: %llx\n",src); | |
offset+=8; | |
dbgSrc = src; | |
if (!quiet) { | |
if (src!=0) printf("\t\t"); | |
if (src & (PERF_MEM_OP_NA<<PERF_MEM_OP_SHIFT)) | |
printf("Op Not available "); | |
if (src & (PERF_MEM_OP_LOAD<<PERF_MEM_OP_SHIFT)) | |
printf("Load "); | |
if (src & (PERF_MEM_OP_STORE<<PERF_MEM_OP_SHIFT)) | |
printf("Store "); | |
if (src & (PERF_MEM_OP_PFETCH<<PERF_MEM_OP_SHIFT)) | |
printf("Prefetch "); | |
if (src & (PERF_MEM_OP_EXEC<<PERF_MEM_OP_SHIFT)) | |
printf("Executable code "); | |
if (src & (PERF_MEM_LVL_NA<<PERF_MEM_LVL_SHIFT)) | |
printf("Level Not available "); | |
if (src & (PERF_MEM_LVL_HIT<<PERF_MEM_LVL_SHIFT)) | |
printf("Hit "); | |
if (src & (PERF_MEM_LVL_MISS<<PERF_MEM_LVL_SHIFT)) | |
printf("Miss "); | |
if (src & (PERF_MEM_LVL_L1<<PERF_MEM_LVL_SHIFT)) | |
printf("L1 cache "); | |
if (src & (PERF_MEM_LVL_LFB<<PERF_MEM_LVL_SHIFT)) | |
printf("Line fill buffer "); | |
if (src & (PERF_MEM_LVL_L2<<PERF_MEM_LVL_SHIFT)) | |
printf("L2 cache "); | |
if (src & (PERF_MEM_LVL_L3<<PERF_MEM_LVL_SHIFT)) | |
printf("L3 cache "); | |
if (src & (PERF_MEM_LVL_LOC_RAM<<PERF_MEM_LVL_SHIFT)) | |
printf("Local DRAM "); | |
if (src & (PERF_MEM_LVL_REM_RAM1<<PERF_MEM_LVL_SHIFT)) | |
printf("Remote DRAM 1 hop "); | |
if (src & (PERF_MEM_LVL_REM_RAM2<<PERF_MEM_LVL_SHIFT)) | |
printf("Remote DRAM 2 hops "); | |
if (src & (PERF_MEM_LVL_REM_CCE1<<PERF_MEM_LVL_SHIFT)) | |
printf("Remote cache 1 hop "); | |
if (src & (PERF_MEM_LVL_REM_CCE2<<PERF_MEM_LVL_SHIFT)) | |
printf("Remote cache 2 hops "); | |
if (src & (PERF_MEM_LVL_IO<<PERF_MEM_LVL_SHIFT)) | |
printf("I/O memory "); | |
if (src & (PERF_MEM_LVL_UNC<<PERF_MEM_LVL_SHIFT)) | |
printf("Uncached memory "); | |
if (src & (PERF_MEM_SNOOP_NA<<PERF_MEM_SNOOP_SHIFT)) | |
printf("Not available "); | |
if (src & (PERF_MEM_SNOOP_NONE<<PERF_MEM_SNOOP_SHIFT)) | |
printf("No snoop "); | |
if (src & (PERF_MEM_SNOOP_HIT<<PERF_MEM_SNOOP_SHIFT)) | |
printf("Snoop hit "); | |
if (src & (PERF_MEM_SNOOP_MISS<<PERF_MEM_SNOOP_SHIFT)) | |
printf("Snoop miss "); | |
if (src & (PERF_MEM_SNOOP_HITM<<PERF_MEM_SNOOP_SHIFT)) | |
printf("Snoop hit modified "); | |
if (src & (PERF_MEM_LOCK_NA<<PERF_MEM_LOCK_SHIFT)) | |
printf("Not available "); | |
if (src & (PERF_MEM_LOCK_LOCKED<<PERF_MEM_LOCK_SHIFT)) | |
printf("Locked transaction "); | |
if (src & (PERF_MEM_TLB_NA<<PERF_MEM_TLB_SHIFT)) | |
printf("Not available "); | |
if (src & (PERF_MEM_TLB_HIT<<PERF_MEM_TLB_SHIFT)) | |
printf("Hit "); | |
if (src & (PERF_MEM_TLB_MISS<<PERF_MEM_TLB_SHIFT)) | |
printf("Miss "); | |
if (src & (PERF_MEM_TLB_L1<<PERF_MEM_TLB_SHIFT)) | |
printf("Level 1 TLB "); | |
if (src & (PERF_MEM_TLB_L2<<PERF_MEM_TLB_SHIFT)) | |
printf("Level 2 TLB "); | |
if (src & (PERF_MEM_TLB_WK<<PERF_MEM_TLB_SHIFT)) | |
printf("Hardware walker "); | |
if (src & ((long long)PERF_MEM_TLB_OS<<PERF_MEM_TLB_SHIFT)) | |
printf("OS fault handler "); | |
} | |
if (!quiet) printf("\n"); | |
} | |
if (sample_type & PERF_SAMPLE_IDENTIFIER) { | |
long long abi; | |
memcpy(&abi,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tPERF_SAMPLE_IDENTIFIER, Raw length: %lld\n",abi); | |
offset+=8; | |
if (!quiet) printf("\n"); | |
} | |
if (sample_type & PERF_SAMPLE_TRANSACTION) { | |
long long abi; | |
memcpy(&abi,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tPERF_SAMPLE_TRANSACTION, Raw length: %lld\n",abi); | |
offset+=8; | |
if (!quiet) printf("\n"); | |
} | |
break; | |
/* AUX */ | |
case PERF_RECORD_AUX: { | |
long long aux_offset,aux_size,flags; | |
long long sample_id; | |
memcpy(&aux_offset,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tAUX_OFFSET: %lld\n",aux_offset); | |
offset+=8; | |
memcpy(&aux_size,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tAUX_SIZE: %lld\n",aux_size); | |
offset+=8; | |
memcpy(&flags,&data[offset],sizeof(long long)); | |
if (!quiet) { | |
printf("\tFLAGS: %llx ",flags); | |
if (flags & PERF_AUX_FLAG_TRUNCATED) { | |
printf("FLAG_TRUNCATED "); | |
} | |
if (flags & PERF_AUX_FLAG_OVERWRITE) { | |
printf("FLAG_OVERWRITE "); | |
} | |
printf("\n"); | |
} | |
offset+=8; | |
memcpy(&sample_id,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tSAMPLE_ID: %lld\n",sample_id); | |
offset+=8; | |
} | |
break; | |
/* itrace start */ | |
case PERF_RECORD_ITRACE_START: { | |
int pid,tid; | |
memcpy(&pid,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tPID: %d\n",pid); | |
offset+=4; | |
memcpy(&tid,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tTID: %d\n",tid); | |
offset+=4; | |
} | |
break; | |
/* lost samples PEBS */ | |
case PERF_RECORD_LOST_SAMPLES: { | |
long long lost,sample_id; | |
memcpy(&lost,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tLOST: %lld\n",lost); | |
offset+=8; | |
memcpy(&sample_id,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tSAMPLE_ID: %lld\n",sample_id); | |
offset+=8; | |
} | |
break; | |
/* context switch */ | |
case PERF_RECORD_SWITCH: { | |
long long sample_id; | |
memcpy(&sample_id,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tSAMPLE_ID: %lld\n",sample_id); | |
offset+=8; | |
} | |
break; | |
/* context switch cpu-wide*/ | |
case PERF_RECORD_SWITCH_CPU_WIDE: { | |
int prev_pid,prev_tid; | |
long long sample_id; | |
memcpy(&prev_pid,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tPREV_PID: %d\n",prev_pid); | |
offset+=4; | |
memcpy(&prev_tid,&data[offset],sizeof(int)); | |
if (!quiet) printf("\tPREV_TID: %d\n",prev_tid); | |
offset+=4; | |
memcpy(&sample_id,&data[offset],sizeof(long long)); | |
if (!quiet) printf("\tSAMPLE_ID: %lld\n",sample_id); | |
offset+=8; | |
} | |
break; | |
default: | |
if (!quiet) printf("\tUnknown type %d\n",event->type); | |
} | |
if (events_read) (*events_read)++; | |
} | |
control_page->data_tail=head; | |
free(data); | |
return head; | |
} | |
static int bpSet = 0; | |
static int once = 0; | |
#include<pthread.h> | |
pthread_mutex_t lock; | |
static void our_handler(int signum, siginfo_t *info, void *uc) { | |
int ret; | |
char stringToSend[BUFFER_LENGTH]; | |
int fd = info->si_fd; | |
int si_errno = info->si_errno; | |
int si_code= info->si_code; | |
pthread_mutex_lock(&lock); | |
ioctl(fdPreciseSample, PERF_EVENT_IOC_DISABLE, 0); | |
{ | |
printf("PEBS+++++++++>\n"); | |
ret=ioctl(fdBP, PERF_EVENT_IOC_DISABLE, 0); | |
count_total++; | |
prev_head=perf_mmap_read(our_mmap,MMAP_DATA_SIZE,prev_head, | |
global_sample_type,0,global_sample_regs_user, | |
NULL,quiet,NULL,RAW_NONE); | |
} | |
ioctl(fdPreciseSample, PERF_EVENT_IOC_REFRESH, 1); | |
pthread_mutex_unlock(&lock); | |
(void) ret; | |
} | |
int get_latency_load_event(unsigned long long *config, | |
unsigned long long *config1, | |
int *precise_ip, | |
char *name) { | |
int processor,processor_notfound=0; | |
processor=detect_processor(); | |
printf("proessor %d \n", processor); | |
switch(processor) { | |
case PROCESSOR_NEHALEM: | |
case PROCESSOR_NEHALEM_EX: | |
*config=0x100b; | |
*config1=0x3; | |
*precise_ip=2; | |
strcpy(name,"MEM_INST_RETIRED:LATENCY_ABOVE_THRESHOLD"); | |
break; | |
/* env LIBPFM_ENCODE_INACTIVE=1 \ | |
./check_events wsm::MEM_INST_RETIRED:LATENCY_ABOVE_THRESHOLD */ | |
case PROCESSOR_WESTMERE: | |
case PROCESSOR_WESTMERE_EX: | |
*config=0x100b; | |
*config1=0x3; | |
*precise_ip=2; | |
strcpy(name,"MEM_INST_RETIRED:LATENCY_ABOVE_THRESHOLD"); | |
break; | |
case PROCESSOR_SANDYBRIDGE: | |
case PROCESSOR_SANDYBRIDGE_EP: | |
*config=0x1cd; | |
*config1=0x3; | |
*precise_ip=2; | |
strcpy(name,"MEM_TRANS_RETIRED:LATENCY_ABOVE_THRESHOLD"); | |
break; | |
case PROCESSOR_IVYBRIDGE: | |
case PROCESSOR_IVYBRIDGE_EP: | |
*config=0x1cd; | |
*config1=0x3; | |
*precise_ip=2; | |
strcpy(name,"MEM_TRANS_RETIRED:LATENCY_ABOVE_THRESHOLD"); | |
break; | |
case PROCESSOR_HASWELL: | |
case PROCESSOR_HASWELL_EP: | |
*config=0x1cd; | |
*config1=0x3; | |
*precise_ip=2; | |
strcpy(name,"MEM_TRANS_RETIRED:LATENCY_ABOVE_THRESHOLD"); | |
break; | |
case PROCESSOR_BROADWELL: | |
*config=0x1cd; | |
*config1=0x3; | |
*precise_ip=2; | |
strcpy(name,"MEM_TRANS_RETIRED:LATENCY_ABOVE_THRESHOLD"); | |
break; | |
case PROCESSOR_SKYLAKE: | |
*config=0x5308d1; | |
*config1=0x0; | |
*precise_ip=3; | |
strcpy(name, "MEM_LOAD_RETIRED:L1_MISS"); | |
break; | |
default: | |
*config=0x0; | |
*config1=0x0; | |
*precise_ip=0; | |
strcpy(name,"UNKNOWN"); | |
processor_notfound=-1; | |
} | |
return processor_notfound; | |
} | |
#define SHELLSCRIPT "\ | |
#/bin/bash \n\ | |
ping -q -c5 google.com \n\ | |
" | |
int mmap_pages = 1+MMAP_DATA_SIZE; | |
void enablePEBS() | |
{ | |
ioctl(fdPreciseSample, PERF_EVENT_IOC_RESET, 0); | |
ioctl(fdPreciseSample, PERF_EVENT_IOC_ENABLE, 0); | |
} | |
void enableBP() | |
{ | |
ioctl(fdBP, PERF_EVENT_IOC_RESET, 0); | |
ioctl(fdBP, PERF_EVENT_IOC_ENABLE, 0); | |
} | |
void disablePEBS() | |
{ | |
ioctl(fdPreciseSample, PERF_EVENT_IOC_DISABLE, 0); | |
} | |
void disableBP() | |
{ | |
ioctl(fdBP, PERF_EVENT_IOC_DISABLE, 0); | |
} | |
long long readPEBSCount() | |
{ | |
long long count; | |
read(fdPreciseSample, &count, sizeof(long long)); | |
return count; | |
} | |
uint64_t id1, id2; | |
uint64_t val1, val2; | |
struct read_format { | |
uint64_t nr; | |
struct { | |
uint64_t value; | |
uint64_t id; | |
} values[]; | |
}; | |
void initPEBS() | |
{ | |
struct perf_event_attr pe; | |
int result,precise_ip; | |
char event_name[BUFSIZ]; | |
int iter=0; | |
memset(&pe, 0, sizeof(struct perf_event_attr)); | |
result=get_latency_load_event(&pe.config,&pe.config1, | |
&precise_ip,event_name); | |
if (result<0) { | |
if (!quiet) fprintf(stderr,"No load latency event available, trying instructions (probably will return 0)\n"); | |
pe.type=PERF_TYPE_HARDWARE; | |
pe.config=PERF_COUNT_HW_INSTRUCTIONS; | |
} | |
else { | |
pe.type=PERF_TYPE_RAW; | |
if (!quiet) printf("Using event %s\n",event_name); | |
} | |
printf("precise_ip %d pe.config1 %d\n",precise_ip,pe.config1); | |
pe.precise_ip=precise_ip; | |
pe.size = sizeof(struct perf_event_attr); | |
pe.sample_period=SAMPLE_FREQUENCY; | |
pe.sample_type= PERF_SAMPLE_IP | PERF_SAMPLE_ADDR | PERF_SAMPLE_REGS_USER; | |
global_sample_type = pe.sample_type; | |
pe.disabled = 1; | |
pe.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID; | |
pe.wakeup_events=1; | |
pe.exclude_user = 0; | |
pe.exclude_kernel = 1; | |
fdPreciseSample = perf_event_open(&pe, 0, -1, -1, 0); | |
if (fdPreciseSample == -1) { | |
fprintf(stderr, "Error opening leader %llx\n", pe.config); | |
exit(EXIT_FAILURE); | |
} | |
ioctl(fdPreciseSample, PERF_EVENT_IOC_ID, &id2); | |
#if 1 | |
our_mmap=mmap(NULL, mmap_pages*getpagesize(), | |
PROT_READ|PROT_WRITE, MAP_SHARED, fdPreciseSample, 0); | |
/* Sigaction */ | |
fcntl(fdPreciseSample, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC); | |
fcntl(fdPreciseSample, F_SETSIG, SIGIO); | |
fcntl(fdPreciseSample, F_SETOWN,getpid()); | |
/* Sigaction */ | |
#endif | |
} | |
//struct perf_event_attr peBP; | |
#define MAX_WP_SLOTS (5) | |
int main(int argc, char **argv) { | |
long long count; | |
if (pthread_mutex_init(&lock, NULL) != 0) | |
{ | |
printf("\n mutex init failed\n"); | |
return 1; | |
} | |
/* Sigaction */ | |
struct sigaction sa; | |
memset(&sa, 0, sizeof(struct sigaction)); | |
sa.sa_sigaction = our_handler; | |
sa.sa_flags = SA_SIGINFO; | |
if (sigaction( SIGIO, &sa, NULL) < 0) { | |
fprintf(stderr,"Error setting up signal handler\n"); | |
exit(1); | |
} | |
/* Sigaction */ | |
/* Kernel Module init ends */ | |
initPEBS(); | |
pthread_mutex_lock(&lock); | |
ioctl(fdPreciseSample, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP); | |
ioctl(fdPreciseSample, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP); | |
pthread_mutex_unlock(&lock); | |
int coun = 10; | |
while(coun-->0) | |
naive_matrix_multiply(1); | |
pthread_mutex_lock(&lock); | |
ioctl(fdPreciseSample, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP); | |
pthread_mutex_unlock(&lock); | |
#if 1 | |
char buf[4096]; | |
struct read_format* rf = (struct read_format*) buf; | |
int i,j; | |
read(fdBP, buf, sizeof(buf)); | |
for (i = 0; i < rf->nr; i++) { | |
if (rf->values[i].id == id1) { | |
val1 = rf->values[i].value; | |
} else if (rf->values[i].id == id2) { | |
val2 = rf->values[i].value; | |
} | |
} | |
printf("cpu cycles: %"PRIu64"\n", val1); | |
printf("page faults: %"PRIu64"\n", val2); | |
#endif | |
printf("Used %lld instructions. Our handler counted %d and bp_count_total %d\n", count,count_total,bp_count_total); | |
munmap(our_mmap,mmap_pages*getpagesize()); | |
close(fdPreciseSample); | |
pthread_mutex_destroy(&lock); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment