Skip to content

Instantly share code, notes, and snippets.

@proywm
Last active August 7, 2019 02:12
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save proywm/36561da2a99f4ada8eafe8bdbdf3bbb7 to your computer and use it in GitHub Desktop.
Save proywm/36561da2a99f4ada8eafe8bdbdf3bbb7 to your computer and use it in GitHub Desktop.
all:
gcc -g -O0 *.c -o record_sample
$ timeout 1 ./record_sample
model 94 cpu_family 6
proessor 30
Using event MEM_LOAD_RETIRED:L1_MISS
precise_ip 3 pe.config1 0
sys_perf_event_open(0x7ffcd7b2fab0,0,-1,-1,0
type: 4
size: 112
config: 5308d1 (5441745)
sample_period: 10000
sample_type: 4105
read_format: 12
disabled: 1
inherit: 0
pinned: 0
exclusive: 0
exclude_user: 0
exclude_kernel: 1
exclude_hv: 0
exclude_idle: 0
mmap: 0
comm: 0
freq: 0
inherit_stat: 0
enable_on_exec: 0
task: 0
watermark: 0
Returned -1 22 Invalid argument
Error opening leader 5308d1
#define _GNU_SOURCE 1
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include <string.h>
#include <unistd.h>
#include <fcntl.h>
#include <errno.h>
#include <signal.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
#include <asm/unistd.h>
#include <sys/prctl.h>
#include <linux/perf_event.h>
#include <asm/perf_regs.h>
#include <linux/hw_breakpoint.h>
#define VENDOR_UNKNOWN -1
#define VENDOR_INTEL 1
#define VENDOR_AMD 2
#define VENDOR_IBM 3
#define VENDOR_ARM 4
#define PROCESSOR_UNKNOWN -1
#define PROCESSOR_PENTIUM_PRO 1
#define PROCESSOR_PENTIUM_II 2
#define PROCESSOR_PENTIUM_III 3
#define PROCESSOR_PENTIUM_4 4
#define PROCESSOR_PENTIUM_M 5
#define PROCESSOR_COREDUO 6
#define PROCESSOR_CORE2 7
#define PROCESSOR_NEHALEM 8
#define PROCESSOR_NEHALEM_EX 9
#define PROCESSOR_WESTMERE 10
#define PROCESSOR_WESTMERE_EX 11
#define PROCESSOR_SANDYBRIDGE 12
#define PROCESSOR_ATOM 13
#define PROCESSOR_K7 14
#define PROCESSOR_K8 15
#define PROCESSOR_AMD_FAM10H 16
#define PROCESSOR_AMD_FAM11H 17
#define PROCESSOR_AMD_FAM14H 18
#define PROCESSOR_AMD_FAM15H 19
#define PROCESSOR_IVYBRIDGE 20
#define PROCESSOR_KNIGHTSCORNER 21
#define PROCESSOR_SANDYBRIDGE_EP 22
#define PROCESSOR_AMD_FAM16H 23
#define PROCESSOR_IVYBRIDGE_EP 24
#define PROCESSOR_HASWELL 25
#define PROCESSOR_ATOM_CEDARVIEW 26
#define PROCESSOR_ATOM_SILVERMONT 27
#define PROCESSOR_BROADWELL 28
#define PROCESSOR_HASWELL_EP 29
#define PROCESSOR_SKYLAKE 30
#define PROCESSOR_POWER3 103
#define PROCESSOR_POWER4 104
#define PROCESSOR_POWER5 105
#define PROCESSOR_POWER6 106
#define PROCESSOR_POWER7 107
#define PROCESSOR_CORTEX_A8 200
#define PROCESSOR_CORTEX_A9 201
#define PROCESSOR_CORTEX_A5 202
#define PROCESSOR_CORTEX_A15 203
#define PROCESSOR_ARM1176 204
#define PROCESSOR_CORTEX_A7 205
#define PROCESSOR_CORTEX_A57 206
#define PROCESSOR_CORTEX_A53 207
#define START_SAMPLING 1
#define STOP_SAMPLING_AND_WRITE_TO_FILE 2
#define DO_SAMPLE 3
#define GET_MESSAGE_LEN 4
#define GET_DATA 5
#define MAX_TEST_EVENTS 16
#define rmb() asm volatile("lfence":::"memory")
#define MMAP_DATA_SIZE 8
#define SAMPLE_FREQUENCY 10000
#define DBG_FREQUENCY 5
#define BUFFER_LENGTH 256 ///< The buffer length (crude but fine)
#define NUM_REGS PERF_REG_X86_64_MAX
static char reg_names[NUM_REGS][8]=
{"RAX","RBX","RCX","RDX","RSI","RDI","RBP","RSP",
"RIP","RFLAGS","CS","SS","DS","ES","FS","GS",
"R8","R9","R10","R11","R12","R13","R14","R15"};
static int count_total=0;
static int bp_count_total=0;
static char *our_mmap;
static char *bp_mmap;
static long long prev_head;
static long long prev_head_bp;
static int quiet;
static long long global_sample_type;
static long long global_sample_regs_user;
static long long global_sample_type_BP;
static long long global_sample_regs_user_BP;
long long dbgAddr;
long long dbgSrc;
bool hasPrevious = false;
int dbgFdPtr;
static int fdLKM;
int fdPreciseSample;
int fdBP;
int setBP = 0;
static int processor_type=-2;
static int processor_vendor=-2;
struct perf_event_attr peBP;
#define NUM_VALUES 4
struct recordValues {
char names[NUM_VALUES];
unsigned long addresses[NUM_VALUES];
unsigned long values[NUM_VALUES];
};
struct recordValues wp;
#include <execinfo.h>
void stack_trace(){
void *trace[16];
char **messages = (char **)NULL;
int i, trace_size = 0;
trace_size = backtrace(trace, 16);
messages = backtrace_symbols(trace, trace_size);
printf("[stack trace]>>>\n");
for (i=0; i < trace_size; i++)
printf("%s\n", messages[i]);
printf("<<<[stack trace]\n");
free(messages);
}
#include <inttypes.h>
#define SUBDBG printf
static long
perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags)
{
int ret;
SUBDBG("sys_perf_event_open(%p,%d,%d,%d,%lx\n",hw_event,pid,cpu,group_fd,flags);
SUBDBG(" type: %d\n",hw_event->type);
SUBDBG(" size: %d\n",hw_event->size);
SUBDBG(" config: %"PRIx64" (%"PRIu64")\n",hw_event->config,
hw_event->config);
SUBDBG(" sample_period: %"PRIu64"\n",hw_event->sample_period);
SUBDBG(" sample_type: %"PRIu64"\n",hw_event->sample_type);
SUBDBG(" read_format: %"PRIu64"\n",hw_event->read_format);
SUBDBG(" disabled: %d\n",hw_event->disabled);
SUBDBG(" inherit: %d\n",hw_event->inherit);
SUBDBG(" pinned: %d\n",hw_event->pinned);
SUBDBG(" exclusive: %d\n",hw_event->exclusive);
SUBDBG(" exclude_user: %d\n",hw_event->exclude_user);
SUBDBG(" exclude_kernel: %d\n",hw_event->exclude_kernel);
SUBDBG(" exclude_hv: %d\n",hw_event->exclude_hv);
SUBDBG(" exclude_idle: %d\n",hw_event->exclude_idle);
SUBDBG(" mmap: %d\n",hw_event->mmap);
SUBDBG(" comm: %d\n",hw_event->comm);
SUBDBG(" freq: %d\n",hw_event->freq);
SUBDBG(" inherit_stat: %d\n",hw_event->inherit_stat);
SUBDBG(" enable_on_exec: %d\n",hw_event->enable_on_exec);
SUBDBG(" task: %d\n",hw_event->task);
SUBDBG(" watermark: %d\n",hw_event->watermark);
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
group_fd, flags);
SUBDBG("Returned %d %d %s\n",ret,
ret<0?errno:0,
ret<0?strerror(errno):" ");
return ret;
}
pid_t gettid() {
return syscall(__NR_gettid);
}
static int detect_processor_cpuinfo(void) {
FILE *fff;
int cpu_family=0,model=0;
char string[BUFSIZ];
fff=fopen("/proc/cpuinfo","r");
if (fff==NULL) {
fprintf(stderr,"ERROR! Can't open /proc/cpuinfo\n");
return PROCESSOR_UNKNOWN;
}
while(1) {
if (fgets(string,BUFSIZ,fff)==NULL) break;
/* Power6 */
if (strstr(string,"POWER6")) {
processor_vendor=VENDOR_IBM;
processor_type=PROCESSOR_POWER6;
return 0;
}
/* ARM */
if (strstr(string,"CPU part")) {
processor_vendor=VENDOR_ARM;
if (strstr(string,"0xc05")) {
processor_type=PROCESSOR_CORTEX_A5;
return 0;
}
if (strstr(string,"0xc09")) {
processor_type=PROCESSOR_CORTEX_A9;
return 0;
}
if (strstr(string,"0xc08")) {
processor_type=PROCESSOR_CORTEX_A8;
return 0;
}
if (strstr(string,"0xc07")) {
processor_type=PROCESSOR_CORTEX_A7;
return 0;
}
if (strstr(string,"0xc0f")) {
processor_type=PROCESSOR_CORTEX_A15;
return 0;
}
if (strstr(string,"0xb76")) {
processor_type=PROCESSOR_ARM1176;
return 0;
}
// Cortex R4 - 0xc14
// Cortex R5 - 0xc15
// ARM1136 - 0xb36
// ARM1156 - 0xb56
// ARM1176 - 0xb76
// ARM11 MPCore - 0xb02
}
/* vendor */
if (strstr(string,"vendor_id")) {
if (strstr(string,"GenuineIntel")) {
processor_vendor=VENDOR_INTEL;
}
if (strstr(string,"AuthenticAMD")) {
processor_vendor=VENDOR_AMD;
}
}
/* family */
if (strstr(string,"cpu family")) {
sscanf(string,"%*s %*s %*s %d",&cpu_family);
}
/* model */
if ((strstr(string,"model")) && (!strstr(string,"model name")) ) {
sscanf(string,"%*s %*s %d",&model);
}
}
fclose(fff);
if (processor_vendor==VENDOR_AMD) {
switch(cpu_family) {
case 0x6:
processor_type=PROCESSOR_K7;
break;
case 0xf:
processor_type=PROCESSOR_K8;
break;
case 0x10:
processor_type=PROCESSOR_AMD_FAM10H;
break;
case 0x11:
processor_type=PROCESSOR_AMD_FAM11H;
break;
case 0x14:
processor_type=PROCESSOR_AMD_FAM14H;
break;
case 0x15:
processor_type=PROCESSOR_AMD_FAM15H;
break;
case 0x16:
processor_type=PROCESSOR_AMD_FAM16H;
break;
default:
processor_type=PROCESSOR_UNKNOWN;
break;
}
return 0;
}
if (processor_vendor==VENDOR_INTEL) {
printf("model %d cpu_family %d\n", model, cpu_family);
if (cpu_family==6) {
switch(model) {
case 1:
processor_type=PROCESSOR_PENTIUM_PRO;
break;
case 3:
case 5:
case 6:
processor_type=PROCESSOR_PENTIUM_II;
break;
case 7:
case 8:
case 10:
case 11:
processor_type=PROCESSOR_PENTIUM_III;
break;
case 9:
case 13:
processor_type=PROCESSOR_PENTIUM_M;
break;
case 14:
processor_type=PROCESSOR_COREDUO;
break;
case 15:
case 22:
case 23:
case 29:
processor_type=PROCESSOR_CORE2;
break;
case 28:
case 38:
case 39:
case 53:
processor_type=PROCESSOR_ATOM;
break;
case 54:
processor_type=PROCESSOR_ATOM_CEDARVIEW;
break;
case 55:
case 77:
processor_type=PROCESSOR_ATOM_SILVERMONT;
break;
case 26:
case 30:
case 31:
processor_type=PROCESSOR_NEHALEM;
break;
case 46:
processor_type=PROCESSOR_NEHALEM_EX;
break;
case 37:
case 44:
processor_type=PROCESSOR_WESTMERE;
break;
case 47:
processor_type=PROCESSOR_WESTMERE_EX;
break;
case 42:
processor_type=PROCESSOR_SANDYBRIDGE;
break;
case 45:
processor_type=PROCESSOR_SANDYBRIDGE_EP;
break;
case 58:
processor_type=PROCESSOR_IVYBRIDGE;
break;
case 62:
processor_type=PROCESSOR_IVYBRIDGE_EP;
break;
case 60:
case 69:
case 70:
processor_type=PROCESSOR_HASWELL;
break;
case 63:
processor_type=PROCESSOR_HASWELL_EP;
break;
case 61:
case 71:
case 79:
processor_type=PROCESSOR_BROADWELL;
break;
case 94:
case 85:
processor_type=PROCESSOR_SKYLAKE;
break;
default:
processor_type=PROCESSOR_UNKNOWN;
}
return 0;
}
if (cpu_family==11) {
processor_type=PROCESSOR_KNIGHTSCORNER;
return 0;
}
if (cpu_family==15) {
processor_type=PROCESSOR_PENTIUM_4;
return 0;
}
}
processor_type=PROCESSOR_UNKNOWN;
return 0;
}
int detect_processor(void) {
if (processor_type==-2) {
detect_processor_cpuinfo();
}
return processor_type;
}
int detect_vendor(void) {
if (processor_vendor==-2) {
detect_processor_cpuinfo();
}
return processor_vendor;
}
/* Test example starts */
#define MATRIX_SIZE 512
static double a[MATRIX_SIZE][MATRIX_SIZE];
static double b[MATRIX_SIZE][MATRIX_SIZE];
static double c[MATRIX_SIZE][MATRIX_SIZE];
static void naive_matrix_multiply(int quiet) {
double s;
int i,j,k;
for(i=0;i<MATRIX_SIZE;i++) {
for(j=0;j<MATRIX_SIZE;j++) {
a[i][j]=(double)i*(double)j;
b[i][j]=(double)i/(double)(j+5);
}
}
for(j=0;j<MATRIX_SIZE;j++) {
for(i=0;i<MATRIX_SIZE;i++) {
s=0;
for(k=0;k<MATRIX_SIZE;k++) {
s+=a[i][k]*b[k][j];
}
c[i][j] = s;
}
}
s=0.0;
for(i=0;i<MATRIX_SIZE;i++) {
for(j=0;j<MATRIX_SIZE;j++) {
s+=c[i][j];
}
}
if (!quiet) printf("Matrix multiply sum: s=%lf\n",s);
return;
}
/*Test example ends*/
#define RAW_NONE 0
#define RAW_IBS_FETCH 1
#define RAW_IBS_OP 2
struct validate_values {
int pid;
int tid;
int events;
unsigned long branch_low;
unsigned long branch_high;
};
/* Urgh who designed this interface */
static int handle_struct_read_format(unsigned char *sample,
int read_format,
struct validate_values *validation,
int quiet) {
int offset=0,i;
if (read_format & PERF_FORMAT_GROUP) {
long long nr,time_enabled,time_running;
memcpy(&nr,&sample[offset],sizeof(long long));
if (!quiet) printf("\t\tNumber: %lld ",nr);
offset+=8;
if (validation) {
if (validation->events!=nr) {
fprintf(stderr,"Error! Wrong number "
"of events %d != %lld\n",
validation->events,nr);
}
}
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
memcpy(&time_enabled,&sample[offset],sizeof(long long));
if (!quiet) printf("enabled: %lld ",time_enabled);
offset+=8;
}
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
memcpy(&time_running,&sample[offset],sizeof(long long));
if (!quiet) printf("running: %lld ",time_running);
offset+=8;
}
if (!quiet) printf("\n");
for(i=0;i<nr;i++) {
long long value, id;
memcpy(&value,&sample[offset],sizeof(long long));
if (!quiet) printf("\t\t\tValue: %lld ",value);
offset+=8;
if (read_format & PERF_FORMAT_ID) {
memcpy(&id,&sample[offset],sizeof(long long));
if (!quiet) printf("id: %lld ",id);
offset+=8;
}
if (!quiet) printf("\n");
}
}
else {
long long value,time_enabled,time_running,id;
memcpy(&value,&sample[offset],sizeof(long long));
if (!quiet) printf("\t\tValue: %lld ",value);
offset+=8;
if (read_format & PERF_FORMAT_TOTAL_TIME_ENABLED) {
memcpy(&time_enabled,&sample[offset],sizeof(long long));
if (!quiet) printf("enabled: %lld ",time_enabled);
offset+=8;
}
if (read_format & PERF_FORMAT_TOTAL_TIME_RUNNING) {
memcpy(&time_running,&sample[offset],sizeof(long long));
if (!quiet) printf("running: %lld ",time_running);
offset+=8;
}
if (read_format & PERF_FORMAT_ID) {
memcpy(&id,&sample[offset],sizeof(long long));
if (!quiet) printf("id: %lld ",id);
offset+=8;
}
if (!quiet) printf("\n");
}
return offset;
}
static int print_regs(int quiet,long long abi,long long reg_mask,
unsigned char *data) {
int return_offset=0;
int num_regs=NUM_REGS;
int i;
unsigned long long reg_value;
if (!quiet) printf("\t\tReg mask %llx\n",reg_mask);
for(i=0;i<64;i++) {
if (reg_mask&1ULL<<i) {
if (!quiet) {
memcpy(&reg_value,&data[return_offset],8);
if (i<num_regs) {
printf("\t\t%s : ",reg_names[i]);
}
else {
printf("\t\t??? : ");
}
printf("%llx\n",reg_value);
}
return_offset+=8;
}
}
return return_offset;
}
static int dump_raw_ibs_fetch(unsigned char *data, int size) {
unsigned long long *msrs;
unsigned int *leftover;
msrs=(unsigned long long *)(data+4);
leftover=(unsigned int *)(data);
printf("\t\tHeader: %x\n",leftover[0]);
printf("\t\tMSR IBS_FETCH_CONTROL %llx\n",msrs[0]);
printf("\t\t\tIBS_RAND_EN: %d\n",!!(msrs[0]&1ULL<<57));
printf("\t\t\tL2 iTLB Miss: %d\n",!!(msrs[0]&1ULL<<56));
printf("\t\t\tL1 iTLB Miss: %d\n",!!(msrs[0]&1ULL<<55));
printf("\t\t\tL1TLB page size: ");
switch( (msrs[0]>>53)&0x3) {
case 0: printf("4kB\n"); break;
case 1: printf("2MB\n"); break;
case 2: printf("1GB\n"); break;
default: printf("Resreved\n"); break;
}
printf("\t\t\tFetch Physical Address Valid: %d\n",!!(msrs[0]&1ULL<<52));
printf("\t\t\ticache miss: %d\n",!!(msrs[0]&1ULL<<51));
printf("\t\t\tInstruction Fetch Complete: %d\n",!!(msrs[0]&1ULL<<50));
printf("\t\t\tInstruction Fetch Valid: %d\n",!!(msrs[0]&1ULL<<49));
printf("\t\t\tInstruction Fetch Enabled: %d\n",!!(msrs[0]&1ULL<<48));
printf("\t\t\tInstruction Fetch Latency: %lld\n",((msrs[0]>>32)&0xffff));
printf("\t\t\tInstruction Fetch Count: %lld\n",((msrs[0]>>16)&0xffff)<<4);
printf("\t\t\tInstruction Fetch Max Count: %lld\n",(msrs[0]&0xffff)<<4);
printf("\t\tMSR IBS_FETCH_LINEAR_ADDRESS %llx\n",msrs[1]);
printf("\t\tMSR IBS_FETCH_PHYSICAL_ADDRESS %llx\n",msrs[2]);
if (size>24) {
printf("\t\tMSR IBS_BRTARGET %llx\n",msrs[3]);
}
return 0;
}
static int dump_raw_ibs_op(unsigned char *data, int size) {
unsigned long long *msrs;
unsigned int *leftover;
msrs=(unsigned long long *)(data+4);
leftover=(unsigned int *)(data);
printf("\t\tHeader: %x\n",leftover[0]);
printf("\t\tMSR IBS_EXECUTION_CONTROL %llx\n",msrs[0]);
printf("\t\t\tIbsOpCurCnt: %lld\n",((msrs[0]>>32)&0x3ffffff));
printf("\t\t\tIBS OpCntCtl: %d\n",!!(msrs[0]&1ULL<<19));
printf("\t\t\tIBS OpVal: %d\n",!!(msrs[0]&1ULL<<18));
printf("\t\t\tIBS OpEn: %d\n",!!(msrs[0]&1ULL<<17));
printf("\t\t\tIbsOpMaxCnt: %lld\n",((msrs[0]&0xffff)<<4) |
(msrs[0]&0x3f00000));
printf("\t\tMSR IBS_OP_LOGICAL_ADDRESS %llx\n",msrs[1]);
printf("\t\tMSR IBS_OP_DATA %llx\n",msrs[2]);
printf("\t\t\tRIP Invalid: %d\n",!!(msrs[2]&1ULL<<38));
printf("\t\t\tBranch Retired: %d\n",!!(msrs[2]&1ULL<<37));
printf("\t\t\tBranch Mispredicted: %d\n",!!(msrs[2]&1ULL<<36));
printf("\t\t\tBranch Taken: %d\n",!!(msrs[2]&1ULL<<35));
printf("\t\t\tReturn uop: %d\n",!!(msrs[2]&1ULL<<34));
printf("\t\t\tMispredicted Return uop: %d\n",!!(msrs[2]&1ULL<<33));
printf("\t\t\tTag to Retire Cycles: %lld\n",(msrs[2]>>16)&0xffff);
printf("\t\t\tCompletion to Retire Cycles: %lld\n",msrs[2]&0xffff);
printf("\t\tMSR IBS_OP_DATA2 (Northbridge) %llx\n",msrs[3]);
printf("\t\t\tCache Hit State: %c\n",(msrs[3]&1ULL<<5)?'O':'M');
printf("\t\t\tRequest destination node: %s\n",
(msrs[3]&1ULL<<4)?"Same":"Different");
printf("\t\t\tNorthbridge data source: ");
switch(msrs[3]&0x7) {
case 0: printf("No valid status\n"); break;
case 1: printf("L3\n"); break;
case 2: printf("Cache from another compute unit\n"); break;
case 3: printf("DRAM\n"); break;
case 4: printf("Reserved remote cache\n"); break;
case 5: printf("Reserved\n"); break;
case 6: printf("Reserved\n"); break;
case 7: printf("Other: MMIO/config/PCI/APIC\n"); break;
}
printf("\t\tMSR IBS_OP_DATA3 (cache) %llx\n",msrs[4]);
printf("\t\t\tData Cache Miss Latency: %lld\n",
(msrs[4]>>32)&0xffff);
printf("\t\t\tL2TLB data hit in 1GB page: %d\n",
!!(msrs[4]&1ULL<<19));
printf("\t\t\tData cache physical addr valid: %d\n",
!!(msrs[4]&1ULL<<18));
printf("\t\t\tData cache linear addr valid: %d\n",
!!(msrs[4]&1ULL<<17));
printf("\t\t\tMAB hit: %d\n",
!!(msrs[4]&1ULL<<16));
printf("\t\t\tData cache locked operation: %d\n",
!!(msrs[4]&1ULL<<15));
printf("\t\t\tUncachable memory operation: %d\n",
!!(msrs[4]&1ULL<<14));
printf("\t\t\tWrite-combining memory operation: %d\n",
!!(msrs[4]&1ULL<<13));
printf("\t\t\tData forwarding store to load canceled: %d\n",
!!(msrs[4]&1ULL<<12));
printf("\t\t\tData forwarding store to load operation: %d\n",
!!(msrs[4]&1ULL<<11));
printf("\t\t\tBank conflict on load operation: %d\n",
!!(msrs[4]&1ULL<<9));
printf("\t\t\tMisaligned access: %d\n",
!!(msrs[4]&1ULL<<8));
printf("\t\t\tData cache miss: %d\n",
!!(msrs[4]&1ULL<<7));
printf("\t\t\tData cache L2TLB hit in 2M: %d\n",
!!(msrs[4]&1ULL<<6));
printf("\t\t\tData cache L2TLB hit in 1G: %d\n",
!!(msrs[4]&1ULL<<5));
printf("\t\t\tData cache L1TLB hit in 2M: %d\n",
!!(msrs[4]&1ULL<<4));
printf("\t\t\tData cache L2TLB miss: %d\n",
!!(msrs[4]&1ULL<<3));
printf("\t\t\tData cache L1TLB miss: %d\n",
!!(msrs[4]&1ULL<<2));
printf("\t\t\tOperation is a store: %d\n",
!!(msrs[4]&1ULL<<1));
printf("\t\t\tOperation is a load: %d\n",
!!(msrs[4]&1ULL<<0));
if (msrs[4]&1ULL<<17) {
printf("\t\tMSR IBS_DC_LINEAR_ADDRESS %llx\n",msrs[5]);
}
if (msrs[4]&1ULL<<18) {
printf("\t\tMSR IBS_DC_PHYSICAL_ADDRESS %llx\n",msrs[6]);
}
if (size>64) {
printf("\t\tMSR IBS_OP_DATA4 %llx\n",msrs[7]);
}
return 0;
}
long long perf_mmap_read( void *our_mmap, int mmap_size,
long long prev_head,
int sample_type, int read_format, long long reg_mask,
struct validate_values *validate,
int quiet, int *events_read,
int raw_type ) {
struct perf_event_mmap_page *control_page = our_mmap;
long long head,offset;
int i,size;
long long bytesize,prev_head_wrap;
unsigned char *data;
void *data_mmap=our_mmap+getpagesize();
if (mmap_size==0) return 0;
if (control_page==NULL) {
fprintf(stderr,"ERROR mmap page NULL\n");
return -1;
}
head=control_page->data_head;
rmb(); /* Must always follow read of data_head */
size=head-prev_head;
//printf("Head: %lld Prev_head=%lld\n",head,prev_head);
//printf("%d new bytes\n",size);
bytesize=mmap_size*getpagesize();
if (size>bytesize) {
printf("error! we overflowed the mmap buffer %d>%lld bytes\n",
size,bytesize);
}
data=malloc(bytesize);
if (data==NULL) {
return -1;
}
prev_head_wrap=prev_head%bytesize;
// printf("Copying %d bytes from %d to %d\n",
// bytesize-prev_head_wrap,prev_head_wrap,0);
memcpy(data,(unsigned char*)data_mmap + prev_head_wrap,
bytesize-prev_head_wrap);
//printf("Copying %d bytes from %d to %d\n",
// prev_head_wrap,0,bytesize-prev_head_wrap);
memcpy(data+(bytesize-prev_head_wrap),(unsigned char *)data_mmap,
prev_head_wrap);
struct perf_event_header *event;
offset=0;
if (events_read) *events_read=0;
while(offset<size) {
//printf("Offset %d Size %d\n",offset,size);
event = ( struct perf_event_header * ) & data[offset];
/********************/
/* Print event Type */
/********************/
if (!quiet) {
switch(event->type) {
case PERF_RECORD_MMAP:
printf("PERF_RECORD_MMAP");
break;
case PERF_RECORD_LOST:
printf("PERF_RECORD_LOST");
break;
case PERF_RECORD_COMM:
printf("PERF_RECORD_COMM");
break;
case PERF_RECORD_EXIT:
printf("PERF_RECORD_EXIT");
break;
case PERF_RECORD_THROTTLE:
printf("PERF_RECORD_THROTTLE");
break;
case PERF_RECORD_UNTHROTTLE:
printf("PERF_RECORD_UNTHROTTLE");
break;
case PERF_RECORD_FORK:
printf("PERF_RECORD_FORK");
break;
case PERF_RECORD_READ:
printf("PERF_RECORD_READ");
break;
case PERF_RECORD_SAMPLE:
printf("PERF_RECORD_SAMPLE [%x]",sample_type);
break;
case PERF_RECORD_MMAP2:
printf("PERF_RECORD_MMAP2");
break;
case PERF_RECORD_AUX:
printf("PERF_RECORD_AUX");
break;
case PERF_RECORD_ITRACE_START:
printf("PERF_RECORD_ITRACE_START");
break;
case PERF_RECORD_LOST_SAMPLES:
printf("PERF_RECORD_LOST_SAMPLES");
break;
case PERF_RECORD_SWITCH:
printf("PERF_RECORD_SWITCH");
break;
case PERF_RECORD_SWITCH_CPU_WIDE:
printf("PERF_RECORD_SWITCH_CPU_WIDE");
break;
default: printf("UNKNOWN %d",event->type);
break;
}
printf(", MISC=%d (",event->misc);
setBP = 0;
switch(event->misc & PERF_RECORD_MISC_CPUMODE_MASK) {
case PERF_RECORD_MISC_CPUMODE_UNKNOWN:
printf("PERF_RECORD_MISC_CPUMODE_UNKNOWN"); break;
case PERF_RECORD_MISC_KERNEL:
{
setBP = 1;
printf("PERF_RECORD_MISC_KERNEL"); break;
}
case PERF_RECORD_MISC_USER:
{
setBP = 0;
printf("PERF_RECORD_MISC_USER"); break;
}
case PERF_RECORD_MISC_HYPERVISOR:
printf("PERF_RECORD_MISC_HYPERVISOR"); break;
case PERF_RECORD_MISC_GUEST_KERNEL:
printf("PERF_RECORD_MISC_GUEST_KERNEL"); break;
case PERF_RECORD_MISC_GUEST_USER:
printf("PERF_RECORD_MISC_GUEST_USER"); break;
default:
printf("Unknown %d!\n",event->misc); break;
}
/* All three have the same value */
if (event->misc & PERF_RECORD_MISC_MMAP_DATA) {
if (event->type==PERF_RECORD_MMAP) {
printf(",PERF_RECORD_MISC_MMAP_DATA ");
}
else if (event->type==PERF_RECORD_COMM) {
printf(",PERF_RECORD_MISC_COMM_EXEC ");
}
else if ((event->type==PERF_RECORD_SWITCH) ||
(event->type==PERF_RECORD_SWITCH_CPU_WIDE)) {
printf(",PERF_RECORD_MISC_SWITCH_OUT ");
}
else {
printf("UNKNOWN ALIAS!!! ");
}
}
if (event->misc & PERF_RECORD_MISC_EXACT_IP) {
printf(",PERF_RECORD_MISC_EXACT_IP ");
}
if (event->misc & PERF_RECORD_MISC_EXT_RESERVED) {
printf(",PERF_RECORD_MISC_EXT_RESERVED ");
}
printf("), Size=%d\n",event->size);
}
offset+=8; /* skip header */
/***********************/
/* Print event Details */
/***********************/
switch(event->type) {
/* Lost */
case PERF_RECORD_LOST: {
long long id,lost;
memcpy(&id,&data[offset],sizeof(long long));
if (!quiet) printf("\tID: %lld\n",id);
offset+=8;
memcpy(&lost,&data[offset],sizeof(long long));
if (!quiet) printf("\tLOST: %lld\n",lost);
offset+=8;
}
break;
/* COMM */
case PERF_RECORD_COMM: {
int pid,tid,string_size;
char *string;
memcpy(&pid,&data[offset],sizeof(int));
if (!quiet) printf("\tPID: %d\n",pid);
offset+=4;
memcpy(&tid,&data[offset],sizeof(int));
if (!quiet) printf("\tTID: %d\n",tid);
offset+=4;
/* FIXME: sample_id handling? */
/* two ints plus the 64-bit header */
string_size=event->size-16;
string=calloc(string_size,sizeof(char));
memcpy(string,&data[offset],string_size);
if (!quiet) printf("\tcomm: %s\n",string);
offset+=string_size;
if (string) free(string);
}
break;
/* Fork */
case PERF_RECORD_FORK: {
int pid,ppid,tid,ptid;
long long fork_time;
memcpy(&pid,&data[offset],sizeof(int));
if (!quiet) printf("\tPID: %d\n",pid);
offset+=4;
memcpy(&ppid,&data[offset],sizeof(int));
if (!quiet) printf("\tPPID: %d\n",ppid);
offset+=4;
memcpy(&tid,&data[offset],sizeof(int));
if (!quiet) printf("\tTID: %d\n",tid);
offset+=4;
memcpy(&ptid,&data[offset],sizeof(int));
if (!quiet) printf("\tPTID: %d\n",ptid);
offset+=4;
memcpy(&fork_time,&data[offset],sizeof(long long));
if (!quiet) printf("\tTime: %lld\n",fork_time);
offset+=8;
}
break;
/* mmap */
case PERF_RECORD_MMAP: {
int pid,tid,string_size;
long long address,len,pgoff;
char *filename;
memcpy(&pid,&data[offset],sizeof(int));
if (!quiet) printf("\tPID: %d\n",pid);
offset+=4;
memcpy(&tid,&data[offset],sizeof(int));
if (!quiet) printf("\tTID: %d\n",tid);
offset+=4;
memcpy(&address,&data[offset],sizeof(long long));
if (!quiet) printf("\tAddress: %llx\n",address);
offset+=8;
memcpy(&len,&data[offset],sizeof(long long));
if (!quiet) printf("\tLength: %llx\n",len);
offset+=8;
memcpy(&pgoff,&data[offset],sizeof(long long));
if (!quiet) printf("\tPage Offset: %llx\n",pgoff);
offset+=8;
string_size=event->size-40;
filename=calloc(string_size,sizeof(char));
memcpy(filename,&data[offset],string_size);
if (!quiet) printf("\tFilename: %s\n",filename);
offset+=string_size;
if (filename) free(filename);
}
break;
/* mmap2 */
case PERF_RECORD_MMAP2: {
int pid,tid,string_size;
long long address,len,pgoff;
int major,minor;
long long ino,ino_generation;
int prot,flags;
char *filename;
memcpy(&pid,&data[offset],sizeof(int));
if (!quiet) printf("\tPID: %d\n",pid);
offset+=4;
memcpy(&tid,&data[offset],sizeof(int));
if (!quiet) printf("\tTID: %d\n",tid);
offset+=4;
memcpy(&address,&data[offset],sizeof(long long));
if (!quiet) printf("\tAddress: %llx\n",address);
offset+=8;
memcpy(&len,&data[offset],sizeof(long long));
if (!quiet) printf("\tLength: %llx\n",len);
offset+=8;
memcpy(&pgoff,&data[offset],sizeof(long long));
if (!quiet) printf("\tPage Offset: %llx\n",pgoff);
offset+=8;
memcpy(&major,&data[offset],sizeof(int));
if (!quiet) printf("\tMajor: %d\n",major);
offset+=4;
memcpy(&minor,&data[offset],sizeof(int));
if (!quiet) printf("\tMinor: %d\n",minor);
offset+=4;
memcpy(&ino,&data[offset],sizeof(long long));
if (!quiet) printf("\tIno: %llx\n",ino);
offset+=8;
memcpy(&ino_generation,&data[offset],sizeof(long long));
if (!quiet) printf("\tIno generation: %llx\n",ino_generation);
offset+=8;
memcpy(&prot,&data[offset],sizeof(int));
if (!quiet) printf("\tProt: %d\n",prot);
offset+=4;
memcpy(&flags,&data[offset],sizeof(int));
if (!quiet) printf("\tFlags: %d\n",flags);
offset+=4;
string_size=event->size-72;
filename=calloc(string_size,sizeof(char));
memcpy(filename,&data[offset],string_size);
if (!quiet) printf("\tFilename: %s\n",filename);
offset+=string_size;
if (filename) free(filename);
}
break;
/* Exit */
case PERF_RECORD_EXIT: {
int pid,ppid,tid,ptid;
long long fork_time;
memcpy(&pid,&data[offset],sizeof(int));
if (!quiet) printf("\tPID: %d\n",pid);
offset+=4;
memcpy(&ppid,&data[offset],sizeof(int));
if (!quiet) printf("\tPPID: %d\n",ppid);
offset+=4;
memcpy(&tid,&data[offset],sizeof(int));
if (!quiet) printf("\tTID: %d\n",tid);
offset+=4;
memcpy(&ptid,&data[offset],sizeof(int));
if (!quiet) printf("\tPTID: %d\n",ptid);
offset+=4;
memcpy(&fork_time,&data[offset],sizeof(long long));
if (!quiet) printf("\tTime: %lld\n",fork_time);
offset+=8;
}
break;
/* Throttle/Unthrottle */
case PERF_RECORD_THROTTLE:
case PERF_RECORD_UNTHROTTLE: {
long long throttle_time,id,stream_id;
memcpy(&throttle_time,&data[offset],sizeof(long long));
if (!quiet) printf("\tTime: %lld\n",throttle_time);
offset+=8;
memcpy(&id,&data[offset],sizeof(long long));
if (!quiet) printf("\tID: %lld\n",id);
offset+=8;
memcpy(&stream_id,&data[offset],sizeof(long long));
if (!quiet) printf("\tStream ID: %lld\n",stream_id);
offset+=8;
}
break;
/* Sample */
case PERF_RECORD_SAMPLE:
if (sample_type & PERF_SAMPLE_IP) {
long long ip;
memcpy(&ip,&data[offset],sizeof(long long));
if (!quiet) printf("\tPERF_SAMPLE_IP, IP: %llx\n",ip);
offset+=8;
}
if (sample_type & PERF_SAMPLE_TID) {
int pid, tid;
memcpy(&pid,&data[offset],sizeof(int));
memcpy(&tid,&data[offset+4],sizeof(int));
if (validate) {
if (validate->pid!=pid) {
fprintf(stderr,"Error, pid %d != %d\n",
validate->pid,pid);
}
}
if (!quiet) {
printf("\tPERF_SAMPLE_TID, pid: %d tid %d\n",pid,tid);
}
offset+=8;
}
if (sample_type & PERF_SAMPLE_TIME) {
long long time;
memcpy(&time,&data[offset],sizeof(long long));
if (!quiet) printf("\tPERF_SAMPLE_TIME, time: %lld\n",time);
offset+=8;
}
if (sample_type & PERF_SAMPLE_ADDR) {
long long addr;
memcpy(&addr,&data[offset],sizeof(long long));
if (!quiet) printf("\tPERF_SAMPLE_ADDR, addr: %llx\n",addr);
dbgAddr = addr;
offset+=8;
}
if (sample_type & PERF_SAMPLE_ID) {
long long sample_id;
memcpy(&sample_id,&data[offset],sizeof(long long));
if (!quiet) printf("\tPERF_SAMPLE_ID, sample_id: %lld\n",sample_id);
offset+=8;
}
if (sample_type & PERF_SAMPLE_STREAM_ID) {
long long sample_stream_id;
memcpy(&sample_stream_id,&data[offset],sizeof(long long));
if (!quiet) {
printf("\tPERF_SAMPLE_STREAM_ID, sample_stream_id: %lld\n",sample_stream_id);
}
offset+=8;
}
if (sample_type & PERF_SAMPLE_CPU) {
int cpu, res;
memcpy(&cpu,&data[offset],sizeof(int));
memcpy(&res,&data[offset+4],sizeof(int));
if (!quiet) printf("\tPERF_SAMPLE_CPU, cpu: %d res %d\n",cpu,res);
offset+=8;
}
if (sample_type & PERF_SAMPLE_PERIOD) {
long long period;
memcpy(&period,&data[offset],sizeof(long long));
if (!quiet) printf("\tPERF_SAMPLE_PERIOD, period: %lld\n",period);
offset+=8;
}
if (sample_type & PERF_SAMPLE_READ) {
int length;
if (!quiet) printf("\tPERF_SAMPLE_READ, read_format\n");
length=handle_struct_read_format(&data[offset],
read_format,
validate,quiet);
if (length>=0) offset+=length;
}
if (sample_type & PERF_SAMPLE_CALLCHAIN) {
long long nr,ip;
memcpy(&nr,&data[offset],sizeof(long long));
if (!quiet) printf("\tPERF_SAMPLE_CALLCHAIN, callchain length: %lld\n",nr);
offset+=8;
for(i=0;i<nr;i++) {
memcpy(&ip,&data[offset],sizeof(long long));
if (!quiet) printf("\t\t ip[%d]: %llx\n",i,ip);
offset+=8;
}
}
if (sample_type & PERF_SAMPLE_RAW) {
int size;
memcpy(&size,&data[offset],sizeof(int));
if (!quiet) printf("\tPERF_SAMPLE_RAW, Raw length: %d\n",size);
offset+=4;
if (!quiet) {
if (raw_type==RAW_IBS_FETCH) {
dump_raw_ibs_fetch(&data[offset],size);
}
else if (raw_type==RAW_IBS_OP) {
dump_raw_ibs_op(&data[offset],size);
}
else {
printf("\t\t");
for(i=0;i<size;i++) {
printf("%d ",data[offset+i]);
}
printf("\n");
}
}
offset+=size;
}
if (sample_type & PERF_SAMPLE_BRANCH_STACK) {
long long bnr;
memcpy(&bnr,&data[offset],sizeof(long long));
if (!quiet) {
printf("\tPERF_SAMPLE_BRANCH_STACK, branch_stack entries: %lld\n",bnr);
}
offset+=8;
for(i=0;i<bnr;i++) {
long long from,to,flags;
/* From value */
memcpy(&from,&data[offset],sizeof(long long));
offset+=8;
/* Could be more complete here */
if (validate) {
if (from < validate->branch_low) {
fprintf(stderr,"Error! branch out of bounds!\n");
}
}
/* To Value */
memcpy(&to,&data[offset],sizeof(long long));
offset+=8;
if (!quiet) {
printf("\t\t lbr[%d]: %llx %llx ",
i,from,to);
}
/* Flags */
memcpy(&flags,&data[offset],sizeof(long long));
offset+=8;
if (!quiet) {
if (flags==0) printf("0");
if (flags&1) {
printf("MISPREDICTED ");
flags&=~2;
}
if (flags&2) {
printf("PREDICTED ");
flags&=~2;
}
if (flags&4) {
printf("IN_TRANSACTION ");
flags&=~4;
}
if (flags&8) {
printf("TRANSACTION_ABORT ");
flags&=~8;
}
printf("\n");
}
}
}
if (sample_type & PERF_SAMPLE_REGS_USER) {
long long abi;
memcpy(&abi,&data[offset],sizeof(long long));
if (!quiet) {
printf("\tPERF_SAMPLE_REGS_USER, ABI: ");
if (abi==PERF_SAMPLE_REGS_ABI_NONE) printf ("PERF_SAMPLE_REGS_ABI_NONE");
if (abi==PERF_SAMPLE_REGS_ABI_32) printf("PERF_SAMPLE_REGS_ABI_32");
if (abi==PERF_SAMPLE_REGS_ABI_64) printf("PERF_SAMPLE_REGS_ABI_64");
printf("\n");
}
offset+=8;
offset+=print_regs(quiet,abi,reg_mask,
&data[offset]);
if (!quiet) printf("\n");
}
if (sample_type & PERF_SAMPLE_REGS_INTR) {
long long abi;
memcpy(&abi,&data[offset],sizeof(long long));
if (!quiet) {
printf("\tPERF_SAMPLE_REGS_INTR, ABI: ");
if (abi==PERF_SAMPLE_REGS_ABI_NONE) printf ("PERF_SAMPLE_REGS_ABI_NONE");
if (abi==PERF_SAMPLE_REGS_ABI_32) printf("PERF_SAMPLE_REGS_ABI_32");
if (abi==PERF_SAMPLE_REGS_ABI_64) printf("PERF_SAMPLE_REGS_ABI_64");
printf("\n");
}
offset+=8;
offset+=print_regs(quiet,abi,reg_mask,
&data[offset]);
if (!quiet) printf("\n");
}
if (sample_type & PERF_SAMPLE_STACK_USER) {
long long size,dyn_size;
int *stack_data;
int k;
memcpy(&size,&data[offset],sizeof(long long));
if (!quiet) printf("\tPERF_SAMPLE_STACK_USER, Requested size: %lld\n",size);
offset+=8;
stack_data=malloc(size);
memcpy(stack_data,&data[offset],size);
offset+=size;
memcpy(&dyn_size,&data[offset],sizeof(long long));
if (!quiet) printf("\t\tDynamic (used) size: %lld\n",dyn_size);
offset+=8;
if (!quiet) printf("\t\t");
for(k=0;k<dyn_size;k+=4) {
if (!quiet) printf("0x%x ",stack_data[k]);
}
free(stack_data);
if (!quiet) printf("\n");
}
if (sample_type & PERF_SAMPLE_WEIGHT) {
long long weight;
memcpy(&weight,&data[offset],sizeof(long long));
if (!quiet) printf("\tPERF_SAMPLE_WEIGHT, Weight: %lld ",weight);
offset+=8;
if (!quiet) printf("\n");
}
if (sample_type & PERF_SAMPLE_DATA_SRC) {
long long src;
memcpy(&src,&data[offset],sizeof(long long));
if (!quiet) printf("\tPERF_SAMPLE_DATA_SRC, Raw: %llx\n",src);
offset+=8;
dbgSrc = src;
if (!quiet) {
if (src!=0) printf("\t\t");
if (src & (PERF_MEM_OP_NA<<PERF_MEM_OP_SHIFT))
printf("Op Not available ");
if (src & (PERF_MEM_OP_LOAD<<PERF_MEM_OP_SHIFT))
printf("Load ");
if (src & (PERF_MEM_OP_STORE<<PERF_MEM_OP_SHIFT))
printf("Store ");
if (src & (PERF_MEM_OP_PFETCH<<PERF_MEM_OP_SHIFT))
printf("Prefetch ");
if (src & (PERF_MEM_OP_EXEC<<PERF_MEM_OP_SHIFT))
printf("Executable code ");
if (src & (PERF_MEM_LVL_NA<<PERF_MEM_LVL_SHIFT))
printf("Level Not available ");
if (src & (PERF_MEM_LVL_HIT<<PERF_MEM_LVL_SHIFT))
printf("Hit ");
if (src & (PERF_MEM_LVL_MISS<<PERF_MEM_LVL_SHIFT))
printf("Miss ");
if (src & (PERF_MEM_LVL_L1<<PERF_MEM_LVL_SHIFT))
printf("L1 cache ");
if (src & (PERF_MEM_LVL_LFB<<PERF_MEM_LVL_SHIFT))
printf("Line fill buffer ");
if (src & (PERF_MEM_LVL_L2<<PERF_MEM_LVL_SHIFT))
printf("L2 cache ");
if (src & (PERF_MEM_LVL_L3<<PERF_MEM_LVL_SHIFT))
printf("L3 cache ");
if (src & (PERF_MEM_LVL_LOC_RAM<<PERF_MEM_LVL_SHIFT))
printf("Local DRAM ");
if (src & (PERF_MEM_LVL_REM_RAM1<<PERF_MEM_LVL_SHIFT))
printf("Remote DRAM 1 hop ");
if (src & (PERF_MEM_LVL_REM_RAM2<<PERF_MEM_LVL_SHIFT))
printf("Remote DRAM 2 hops ");
if (src & (PERF_MEM_LVL_REM_CCE1<<PERF_MEM_LVL_SHIFT))
printf("Remote cache 1 hop ");
if (src & (PERF_MEM_LVL_REM_CCE2<<PERF_MEM_LVL_SHIFT))
printf("Remote cache 2 hops ");
if (src & (PERF_MEM_LVL_IO<<PERF_MEM_LVL_SHIFT))
printf("I/O memory ");
if (src & (PERF_MEM_LVL_UNC<<PERF_MEM_LVL_SHIFT))
printf("Uncached memory ");
if (src & (PERF_MEM_SNOOP_NA<<PERF_MEM_SNOOP_SHIFT))
printf("Not available ");
if (src & (PERF_MEM_SNOOP_NONE<<PERF_MEM_SNOOP_SHIFT))
printf("No snoop ");
if (src & (PERF_MEM_SNOOP_HIT<<PERF_MEM_SNOOP_SHIFT))
printf("Snoop hit ");
if (src & (PERF_MEM_SNOOP_MISS<<PERF_MEM_SNOOP_SHIFT))
printf("Snoop miss ");
if (src & (PERF_MEM_SNOOP_HITM<<PERF_MEM_SNOOP_SHIFT))
printf("Snoop hit modified ");
if (src & (PERF_MEM_LOCK_NA<<PERF_MEM_LOCK_SHIFT))
printf("Not available ");
if (src & (PERF_MEM_LOCK_LOCKED<<PERF_MEM_LOCK_SHIFT))
printf("Locked transaction ");
if (src & (PERF_MEM_TLB_NA<<PERF_MEM_TLB_SHIFT))
printf("Not available ");
if (src & (PERF_MEM_TLB_HIT<<PERF_MEM_TLB_SHIFT))
printf("Hit ");
if (src & (PERF_MEM_TLB_MISS<<PERF_MEM_TLB_SHIFT))
printf("Miss ");
if (src & (PERF_MEM_TLB_L1<<PERF_MEM_TLB_SHIFT))
printf("Level 1 TLB ");
if (src & (PERF_MEM_TLB_L2<<PERF_MEM_TLB_SHIFT))
printf("Level 2 TLB ");
if (src & (PERF_MEM_TLB_WK<<PERF_MEM_TLB_SHIFT))
printf("Hardware walker ");
if (src & ((long long)PERF_MEM_TLB_OS<<PERF_MEM_TLB_SHIFT))
printf("OS fault handler ");
}
if (!quiet) printf("\n");
}
if (sample_type & PERF_SAMPLE_IDENTIFIER) {
long long abi;
memcpy(&abi,&data[offset],sizeof(long long));
if (!quiet) printf("\tPERF_SAMPLE_IDENTIFIER, Raw length: %lld\n",abi);
offset+=8;
if (!quiet) printf("\n");
}
if (sample_type & PERF_SAMPLE_TRANSACTION) {
long long abi;
memcpy(&abi,&data[offset],sizeof(long long));
if (!quiet) printf("\tPERF_SAMPLE_TRANSACTION, Raw length: %lld\n",abi);
offset+=8;
if (!quiet) printf("\n");
}
break;
/* AUX */
case PERF_RECORD_AUX: {
long long aux_offset,aux_size,flags;
long long sample_id;
memcpy(&aux_offset,&data[offset],sizeof(long long));
if (!quiet) printf("\tAUX_OFFSET: %lld\n",aux_offset);
offset+=8;
memcpy(&aux_size,&data[offset],sizeof(long long));
if (!quiet) printf("\tAUX_SIZE: %lld\n",aux_size);
offset+=8;
memcpy(&flags,&data[offset],sizeof(long long));
if (!quiet) {
printf("\tFLAGS: %llx ",flags);
if (flags & PERF_AUX_FLAG_TRUNCATED) {
printf("FLAG_TRUNCATED ");
}
if (flags & PERF_AUX_FLAG_OVERWRITE) {
printf("FLAG_OVERWRITE ");
}
printf("\n");
}
offset+=8;
memcpy(&sample_id,&data[offset],sizeof(long long));
if (!quiet) printf("\tSAMPLE_ID: %lld\n",sample_id);
offset+=8;
}
break;
/* itrace start */
case PERF_RECORD_ITRACE_START: {
int pid,tid;
memcpy(&pid,&data[offset],sizeof(int));
if (!quiet) printf("\tPID: %d\n",pid);
offset+=4;
memcpy(&tid,&data[offset],sizeof(int));
if (!quiet) printf("\tTID: %d\n",tid);
offset+=4;
}
break;
/* lost samples PEBS */
case PERF_RECORD_LOST_SAMPLES: {
long long lost,sample_id;
memcpy(&lost,&data[offset],sizeof(long long));
if (!quiet) printf("\tLOST: %lld\n",lost);
offset+=8;
memcpy(&sample_id,&data[offset],sizeof(long long));
if (!quiet) printf("\tSAMPLE_ID: %lld\n",sample_id);
offset+=8;
}
break;
/* context switch */
case PERF_RECORD_SWITCH: {
long long sample_id;
memcpy(&sample_id,&data[offset],sizeof(long long));
if (!quiet) printf("\tSAMPLE_ID: %lld\n",sample_id);
offset+=8;
}
break;
/* context switch cpu-wide*/
case PERF_RECORD_SWITCH_CPU_WIDE: {
int prev_pid,prev_tid;
long long sample_id;
memcpy(&prev_pid,&data[offset],sizeof(int));
if (!quiet) printf("\tPREV_PID: %d\n",prev_pid);
offset+=4;
memcpy(&prev_tid,&data[offset],sizeof(int));
if (!quiet) printf("\tPREV_TID: %d\n",prev_tid);
offset+=4;
memcpy(&sample_id,&data[offset],sizeof(long long));
if (!quiet) printf("\tSAMPLE_ID: %lld\n",sample_id);
offset+=8;
}
break;
default:
if (!quiet) printf("\tUnknown type %d\n",event->type);
}
if (events_read) (*events_read)++;
}
control_page->data_tail=head;
free(data);
return head;
}
static int bpSet = 0;
static int once = 0;
#include<pthread.h>
pthread_mutex_t lock;
static void our_handler(int signum, siginfo_t *info, void *uc) {
int ret;
char stringToSend[BUFFER_LENGTH];
int fd = info->si_fd;
int si_errno = info->si_errno;
int si_code= info->si_code;
pthread_mutex_lock(&lock);
ioctl(fdPreciseSample, PERF_EVENT_IOC_DISABLE, 0);
{
printf("PEBS+++++++++>\n");
ret=ioctl(fdBP, PERF_EVENT_IOC_DISABLE, 0);
count_total++;
prev_head=perf_mmap_read(our_mmap,MMAP_DATA_SIZE,prev_head,
global_sample_type,0,global_sample_regs_user,
NULL,quiet,NULL,RAW_NONE);
}
ioctl(fdPreciseSample, PERF_EVENT_IOC_REFRESH, 1);
pthread_mutex_unlock(&lock);
(void) ret;
}
int get_latency_load_event(unsigned long long *config,
unsigned long long *config1,
int *precise_ip,
char *name) {
int processor,processor_notfound=0;
processor=detect_processor();
printf("proessor %d \n", processor);
switch(processor) {
case PROCESSOR_NEHALEM:
case PROCESSOR_NEHALEM_EX:
*config=0x100b;
*config1=0x3;
*precise_ip=2;
strcpy(name,"MEM_INST_RETIRED:LATENCY_ABOVE_THRESHOLD");
break;
/* env LIBPFM_ENCODE_INACTIVE=1 \
./check_events wsm::MEM_INST_RETIRED:LATENCY_ABOVE_THRESHOLD */
case PROCESSOR_WESTMERE:
case PROCESSOR_WESTMERE_EX:
*config=0x100b;
*config1=0x3;
*precise_ip=2;
strcpy(name,"MEM_INST_RETIRED:LATENCY_ABOVE_THRESHOLD");
break;
case PROCESSOR_SANDYBRIDGE:
case PROCESSOR_SANDYBRIDGE_EP:
*config=0x1cd;
*config1=0x3;
*precise_ip=2;
strcpy(name,"MEM_TRANS_RETIRED:LATENCY_ABOVE_THRESHOLD");
break;
case PROCESSOR_IVYBRIDGE:
case PROCESSOR_IVYBRIDGE_EP:
*config=0x1cd;
*config1=0x3;
*precise_ip=2;
strcpy(name,"MEM_TRANS_RETIRED:LATENCY_ABOVE_THRESHOLD");
break;
case PROCESSOR_HASWELL:
case PROCESSOR_HASWELL_EP:
*config=0x1cd;
*config1=0x3;
*precise_ip=2;
strcpy(name,"MEM_TRANS_RETIRED:LATENCY_ABOVE_THRESHOLD");
break;
case PROCESSOR_BROADWELL:
*config=0x1cd;
*config1=0x3;
*precise_ip=2;
strcpy(name,"MEM_TRANS_RETIRED:LATENCY_ABOVE_THRESHOLD");
break;
case PROCESSOR_SKYLAKE:
*config=0x5308d1;
*config1=0x0;
*precise_ip=3;
strcpy(name, "MEM_LOAD_RETIRED:L1_MISS");
break;
default:
*config=0x0;
*config1=0x0;
*precise_ip=0;
strcpy(name,"UNKNOWN");
processor_notfound=-1;
}
return processor_notfound;
}
#define SHELLSCRIPT "\
#/bin/bash \n\
ping -q -c5 google.com \n\
"
int mmap_pages = 1+MMAP_DATA_SIZE;
void enablePEBS()
{
ioctl(fdPreciseSample, PERF_EVENT_IOC_RESET, 0);
ioctl(fdPreciseSample, PERF_EVENT_IOC_ENABLE, 0);
}
void enableBP()
{
ioctl(fdBP, PERF_EVENT_IOC_RESET, 0);
ioctl(fdBP, PERF_EVENT_IOC_ENABLE, 0);
}
void disablePEBS()
{
ioctl(fdPreciseSample, PERF_EVENT_IOC_DISABLE, 0);
}
void disableBP()
{
ioctl(fdBP, PERF_EVENT_IOC_DISABLE, 0);
}
long long readPEBSCount()
{
long long count;
read(fdPreciseSample, &count, sizeof(long long));
return count;
}
uint64_t id1, id2;
uint64_t val1, val2;
struct read_format {
uint64_t nr;
struct {
uint64_t value;
uint64_t id;
} values[];
};
void initPEBS()
{
struct perf_event_attr pe;
int result,precise_ip;
char event_name[BUFSIZ];
int iter=0;
memset(&pe, 0, sizeof(struct perf_event_attr));
result=get_latency_load_event(&pe.config,&pe.config1,
&precise_ip,event_name);
if (result<0) {
if (!quiet) fprintf(stderr,"No load latency event available, trying instructions (probably will return 0)\n");
pe.type=PERF_TYPE_HARDWARE;
pe.config=PERF_COUNT_HW_INSTRUCTIONS;
}
else {
pe.type=PERF_TYPE_RAW;
if (!quiet) printf("Using event %s\n",event_name);
}
printf("precise_ip %d pe.config1 %d\n",precise_ip,pe.config1);
pe.precise_ip=precise_ip;
pe.size = sizeof(struct perf_event_attr);
pe.sample_period=SAMPLE_FREQUENCY;
pe.sample_type= PERF_SAMPLE_IP | PERF_SAMPLE_ADDR | PERF_SAMPLE_REGS_USER;
global_sample_type = pe.sample_type;
pe.disabled = 1;
pe.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
pe.wakeup_events=1;
pe.exclude_user = 0;
pe.exclude_kernel = 1;
fdPreciseSample = perf_event_open(&pe, 0, -1, -1, 0);
if (fdPreciseSample == -1) {
fprintf(stderr, "Error opening leader %llx\n", pe.config);
exit(EXIT_FAILURE);
}
ioctl(fdPreciseSample, PERF_EVENT_IOC_ID, &id2);
#if 1
our_mmap=mmap(NULL, mmap_pages*getpagesize(),
PROT_READ|PROT_WRITE, MAP_SHARED, fdPreciseSample, 0);
/* Sigaction */
fcntl(fdPreciseSample, F_SETFL, O_RDWR|O_NONBLOCK|O_ASYNC);
fcntl(fdPreciseSample, F_SETSIG, SIGIO);
fcntl(fdPreciseSample, F_SETOWN,getpid());
/* Sigaction */
#endif
}
//struct perf_event_attr peBP;
#define MAX_WP_SLOTS (5)
int main(int argc, char **argv) {
long long count;
if (pthread_mutex_init(&lock, NULL) != 0)
{
printf("\n mutex init failed\n");
return 1;
}
/* Sigaction */
struct sigaction sa;
memset(&sa, 0, sizeof(struct sigaction));
sa.sa_sigaction = our_handler;
sa.sa_flags = SA_SIGINFO;
if (sigaction( SIGIO, &sa, NULL) < 0) {
fprintf(stderr,"Error setting up signal handler\n");
exit(1);
}
/* Sigaction */
/* Kernel Module init ends */
initPEBS();
pthread_mutex_lock(&lock);
ioctl(fdPreciseSample, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
ioctl(fdPreciseSample, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
pthread_mutex_unlock(&lock);
int coun = 10;
while(coun-->0)
naive_matrix_multiply(1);
pthread_mutex_lock(&lock);
ioctl(fdPreciseSample, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
pthread_mutex_unlock(&lock);
#if 1
char buf[4096];
struct read_format* rf = (struct read_format*) buf;
int i,j;
read(fdBP, buf, sizeof(buf));
for (i = 0; i < rf->nr; i++) {
if (rf->values[i].id == id1) {
val1 = rf->values[i].value;
} else if (rf->values[i].id == id2) {
val2 = rf->values[i].value;
}
}
printf("cpu cycles: %"PRIu64"\n", val1);
printf("page faults: %"PRIu64"\n", val2);
#endif
printf("Used %lld instructions. Our handler counted %d and bp_count_total %d\n", count,count_total,bp_count_total);
munmap(our_mmap,mmap_pages*getpagesize());
close(fdPreciseSample);
pthread_mutex_destroy(&lock);
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment