Skip to content

Instantly share code, notes, and snippets.

@Twinklebear
Last active August 29, 2015 14:18
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Twinklebear/48b5c4c44400c42a4743 to your computer and use it in GitHub Desktop.
Save Twinklebear/48b5c4c44400c42a4743 to your computer and use it in GitHub Desktop.
Issues hacking bits of AVX into MIPs for a hardware simulator
Intrinsic has incorrect return type!
<8 x float> ()* @llvm.mips.vzerops
fatal error: error in backend: Broken function found, compilation aborted!
clang: error: clang frontend command failed with exit code 70 (use -v to see invocation)
clang version 3.5.0 (tags/RELEASE_350/final)
Target: x86_64-unknown-linux-gnu
Thread model: posix
clang: note: diagnostic msg: PLEASE submit a bug report to http://llvm.org/bugs/ and include the crash backtrace, preprocessed source, and associated run script.
clang: note: diagnostic msg:
********************
PLEASE ATTACH THE FOLLOWING FILES TO THE BUG REPORT:
Preprocessed source(s) and associated run script(s) are located at:
clang: note: diagnostic msg: /tmp/helloworld-88464b.cpp
clang: note: diagnostic msg: /tmp/helloworld-88464b.sh
clang: note: diagnostic msg:
********************
make: *** [objs/helloworld.bc] Error 70
// all trax programs must include trax.hpp
#include "trax.hpp"
// "main" function is called "trax_main", returns void instead of int
void trax_main(){
const v8f32 c = trax_vzerops();
const float *cptr = (float*)&c;
printf("c contains: { ");
for (int i = 0; i < 8; ++i){
printf("%f, ", cptr[i]);
}
printf(" }\n");
}
//===----------------------------------------------------------------------===//
//
// MipsTrax-specific intrinsics.
// This is at the end of the regular IntrinsicsMips.td file, these are the TRaX specific
// intrinsics
//===----------------------------------------------------------------------===//
def int_mips_atominc : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>;
def int_mips_invsqrt : Intrinsic<[llvm_float_ty], [llvm_float_ty], [IntrNoMem]>;
def int_mips_printfmstr : Intrinsic<[], [llvm_ptrptr_ty]>;
def int_mips_printi : Intrinsic<[], [llvm_i32_ty]>;
def int_mips_printf : Intrinsic<[], [llvm_float_ty]>;
def int_mips_barrier : Intrinsic<[], [llvm_i32_ty]>;
def int_mips_increset : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>;
def int_mips_loadi : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty]>;
def int_mips_loadf : Intrinsic<[llvm_float_ty], [llvm_i32_ty, llvm_i32_ty]>;
def int_mips_storef : Intrinsic<[], [llvm_float_ty, llvm_i32_ty, llvm_i32_ty]>;
def int_mips_storei : Intrinsic<[], [llvm_i32_ty, llvm_i32_ty, llvm_i32_ty]>;
def int_mips_max : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_mips_min : Intrinsic<[llvm_float_ty], [llvm_float_ty, llvm_float_ty], [IntrNoMem]>;
def int_mips_semacq : Intrinsic<[], [llvm_i32_ty]>;
def int_mips_semrel : Intrinsic<[], [llvm_i32_ty]>;
def int_mips_rand : Intrinsic<[llvm_float_ty], []>;
def int_mips_getid : Intrinsic<[llvm_i32_ty], [llvm_i32_ty]>;
def int_mips_nthreads : Intrinsic<[llvm_i32_ty], []>;
// will_i32_ptr_ty is defined at the top of the file: def will_i32ptr_ty : LLVMPointerType<llvm_i32_ty>;
def int_mips_emitfragment : Intrinsic<[], [llvm_i32_ty, llvm_float_ty, will_i32ptr_ty]>;
def int_mips_saveframebuffer : Intrinsic<[], []>;
def int_mips_vzerops : Intrinsic<[llvm_v8f32_ty], []>;
# This is just the region of the Makefile where the code for the simulator is compiled
# clang is preferred.
TRAXC=${BINDIR}/clang++
#TRAXC=clang++
TRAXCFLAGS=-DTRAX=1 -I${INCLUDEDIR} -S -O3 -std=c++11
all: mkdirs ${BINARYOUTPUT} ${TRAXOUTPUT}
default: mkdirs ${BINARYOUTPUT} ${TRAXOUTPUT}
#all: mkdirs ${TRAXOUTPUT}
#default: mkdirs ${TRAXOUTPUT}
COBJS := $(addprefix objs/, $(notdir $(CXXSIMFILES:.cc=.o)))
${BINARYOUTPUT}: ${SOURCES} ${COBJS}
@echo "Building ${BINARYOUTPUT}"
@${CXX} ${SOURCES} ${COBJS} ${CXXLIBS} ${CXXFLAGS} ${LDFLAGS} -o ${BINARYOUTPUT}
${TRAXOUTPUT}: rt.s
@echo "Writing ${TRAXOUTPUT}"
@${LINKERDIR}/ln.py rt.s ${LIBDIR}/memset.s ${LIBDIR}/memcpy.s ${LIBDIR}/__extendsfdf2.s > ${TRAXOUTPUT}
mkdirs:
@mkdir -p objs;
objs/%.o: ${SIMDIR}/%.cc
@echo "Building $<"
@${CXX} ${CXXFLAGS} -o $@ -c $<
objs/trax_trax.bc: ${LIBDIR}/trax_trax.cpp
@echo "Building $<"
@${TRAXC} -emit-llvm ${TRAXCFLAGS} -o $@ -c $<
objs/%.bc: %${CSUFFIX}
@echo "Building $<"
@${TRAXC} -emit-llvm ${TRAXCFLAGS} -o $@ -c $<
OBJS := $(addprefix objs/, $(notdir $(SOURCES:${CSUFFIX}=.bc))) objs/trax_trax.bc
#OBJS := $(addprefix objs/, $(notdir $(SOURCES:${CSUFFIX}=.bc)))
rt.s: ${OBJS} ${INCLUDEDIR}/trax.hpp
@${BINDIR}/llvm-link ${OBJS} -o rt.bc
@${BINDIR}/opt rt.bc -O3 -inline-threshold 200000000 -o rt.bc
@${BINDIR}/llc rt.bc -o rt.s -march=mipsel -mattr=+mips32,+single-float -relocation-model=static
// Only change is adding RegKind_FGR256 to MipsOperand::RegKind enum
// Will: hacking in 256 bit regs
/// Broad categories of register classes
/// The exact class is finalized by the render method.
enum RegKind {
RegKind_GPR = 1, /// GPR32 and GPR64 (depending on isGP64bit())
RegKind_FGR = 2, /// FGR32, FGR64, AFGR64 (depending on context and
/// isFP64bit())
RegKind_FCC = 4, /// FCC
RegKind_MSA128 = 8, /// MSA128[BHWD] (makes no difference which)
RegKind_MSACtrl = 16, /// MSA control registers
RegKind_COP2 = 32, /// COP2
RegKind_ACC = 64, /// HI32DSP, LO32DSP, and ACC64DSP (depending on
/// context).
RegKind_CCR = 128, /// CCR
RegKind_HWRegs = 256, /// HWRegs
RegKind_COP3 = 512, /// COP3
// Will: hacking in 256 bit regs
RegKind_FGR256 = 1024,
/// Potentially any (e.g. $1)
RegKind_Numeric = RegKind_GPR | RegKind_FGR | RegKind_FCC | RegKind_MSA128 |
RegKind_MSACtrl | RegKind_COP2 | RegKind_ACC |
RegKind_CCR | RegKind_HWRegs | RegKind_COP3
};
// Will: hacking in 256 bit registers
static DecodeStatus DecodeFGR256RegisterClass(MCInst &Inst,
unsigned RegNo,
uint64_t Address,
const void *Decoder) {
if (RegNo > 15)
return MCDisassembler::Fail;
unsigned Reg = getReg(Decoder, Mips::FGR256RegClassID, RegNo);
Inst.addOperand(MCOperand::CreateReg(Reg));
return MCDisassembler::Success;
}
//===----------------------------------------------------------------------===//
// TRaX intrinsic->instructions
// These are stuck at the end of the regular MipsInstrInfo.td file, adding the TraX
// specific intrinsics
//===----------------------------------------------------------------------===//
def ATOMIC_INC : FR<0x0, 0x2B, (outs GPR32Opnd:$rd), (ins GPR32Opnd:$rt),
"ATOMIC_INC $rd, $rt", [], II_SW >;
def FPINVSQRT : ABSS_FT<"FPINVSQRT", FGR32Opnd, FGR32Opnd, II_INVSQRT_S>,
ABSS_FM<0x10, 16>, ISA_MIPS2;
def PRINTFRMT : SingleArg<0x00, 0x3B, "PRINTF", GPR32Opnd, II_PRINTF>;
def PRINT : SingleArg<0x00, 0x14, "PRINT", GPR32Opnd, II_PRINTF>;
def PRINTF : SingleArg<0x00, 0x15, "PRINT", FGR32Opnd, II_PRINTF>;
let isBarrier=1 in
{
def BARRIER : SingleArg<0x00, 0x12, "BARRIER", GPR32Opnd, NoItinerary>;
def INC_RESET : FR<0x00, 0x1E, (outs GPR32Opnd:$rd), (ins GPR32Opnd:$rt),
"INC_RESET $rd, $rt", [], NoItinerary>;
def SEM_ACQ : SingleArg<0x00, 0x16, "SEM_ACQ", GPR32Opnd, NoItinerary>;
def SEM_REL : SingleArg<0x00, 0x17, "SEM_REL", GPR32Opnd, NoItinerary>;
}
def LOADI : FI<0x11, (outs GPR32Opnd:$rt), (ins GPR32Opnd:$rs, simm16:$imm16),
"LOAD $rt, $rs, $imm16", [], II_TRAX_LOAD>;
def LOADF : FI<0x3B, (outs FGR32Opnd:$rt), (ins GPR32Opnd:$rs, simm16:$imm16),
"LOAD $rt, $rs, $imm16", [], II_TRAX_LOAD>;
def STOREF : FI<0x3C, (outs), (ins FGR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm16),
"STORE $rt, $rs, $imm16", [], II_SW>;
def STOREI : FI<0x3F, (outs), (ins GPR32Opnd:$rs, GPR32Opnd:$rt, simm16:$imm16),
"STORE $rt, $rs, $imm16", [], II_SW>;
def FPMAX : ADDS_FT<"FPMAX", FGR32Opnd, II_ADD_S, 1>,
ADDS_FM<0x0C, 16>;
def FPMIN : ADDS_FT<"FPMIN", FGR32Opnd, II_ADD_S, 1>,
ADDS_FM<0x05, 16>;
def RAND : SingleOut<0x00, 0x0E, "RAND", FGR32Opnd, II_ADD_S>;
// GETID just moves the special register (0-2) in to $rd
def GETID : RI<0x20, "MOV", GPR32Opnd, simm16>;
def NTHREADS : SingleOut<0x00, 0x21, "NTHREADS", GPR32Opnd, NoItinerary>;
def EMIT_FRAGMENT : FI<0x19,(outs), (ins GPR32Opnd:$px, FGR32Opnd:$dpth, GPR32Opnd:$dat),
"EMIT_FRAGMENT $px, $dpth, $dat", [], NoItinerary>;
def SAVE_FRAMEBUFFER : SingleOut<0x00, 0x22, "SAVE_FRAMEBUFFER", GPR32Opnd, NoItinerary>;
def vzerops : SingleOut<0x00, 0x3B, "vzerops", FGR256Opnd, NoItinerary>;
//===----------------------------------------------------------------------===//
// TRaX intrinsic pattern definitions
//===----------------------------------------------------------------------===//
def : Pat<(int_mips_atominc (i32 GPR32Opnd:$in)), (ATOMIC_INC GPR32Opnd:$in)>;
def : Pat<(int_mips_invsqrt (f32 FGR32:$fd)), (FPINVSQRT FGR32:$fd)>;
def : Pat<(int_mips_printfmstr (i32 GPR32Opnd:$target)),(PRINTFRMT GPR32Opnd:$target)>;
def : Pat<(int_mips_printi (i32 GPR32Opnd:$target)),(PRINT GPR32Opnd:$target)>;
def : Pat<(int_mips_printf (f32 FGR32Opnd:$target)),(PRINTF FGR32Opnd:$target)>;
def : Pat<(int_mips_barrier (i32 GPR32Opnd:$in)), (BARRIER GPR32Opnd:$in)>;
def : Pat<(int_mips_increset (i32 GPR32Opnd:$in)), (INC_RESET GPR32Opnd:$in)>;
def : Pat<(int_mips_loadi (i32 GPR32Opnd:$addr), imm:$imm),
(LOADI GPR32Opnd:$addr, imm:$imm)>;
def : Pat<(int_mips_loadf (i32 GPR32Opnd:$addr), imm:$imm),
(LOADF GPR32Opnd:$addr, imm:$imm)>;
def : Pat<(int_mips_storef (f32 FGR32Opnd:$dst), GPR32Opnd:$addr, imm:$imm),
(STOREF FGR32Opnd:$dst, GPR32Opnd:$addr, imm:$imm)>;
def : Pat<(int_mips_storei (i32 GPR32Opnd:$dst), GPR32Opnd:$addr, imm:$imm),
(STOREI GPR32Opnd:$dst, GPR32Opnd:$addr, imm:$imm)>;
def : Pat<(int_mips_max (f32 FGR32Opnd:$ft), (f32 FGR32Opnd:$fs)), (FPMAX FGR32Opnd:$ft, FGR32Opnd:$fs)>;
def : Pat<(int_mips_min (f32 FGR32Opnd:$ft), (f32 FGR32Opnd:$fs)), (FPMIN FGR32Opnd:$ft, FGR32Opnd:$fs)>;
def : Pat<(int_mips_invsqrt (f32 FGR32:$fd)), (FPINVSQRT FGR32:$fd)>;
def : Pat<(int_mips_semacq (i32 GPR32Opnd:$in)), (SEM_ACQ GPR32Opnd:$in)>;
def : Pat<(int_mips_semrel (i32 GPR32Opnd:$in)), (SEM_REL GPR32Opnd:$in)>;
def : Pat<(int_mips_rand), (RAND)>;
def : Pat<(int_mips_getid imm:$imm), (GETID imm:$imm)>;
def : Pat<(int_mips_nthreads), (NTHREADS)>;
def : Pat<(int_mips_emitfragment (i32 GPR32Opnd:$px), (f32 FGR32Opnd:$dpth), (i32 GPR32Opnd:$dat)), (EMIT_FRAGMENT GPR32Opnd:$px, FGR32Opnd:$dpth, GPR32Opnd:$dat)>;
def : Pat<(int_mips_saveframebuffer), (SAVE_FRAMEBUFFER)>;
def : Pat<(int_mips_vzerops), (vzerops)>;
// These are my modifications to the MipsRegisterInfo.td file attempting to add 16 256-bit registers
// They're placed within the Mips namespace in the same areas where the other registers have these same
// things defined
class FPR256<bits<16> Enc, string n> : MipsReg<Enc, n>;
foreach I = 0-15 in
def YMM#I : FPR256<I, "ymm"#I>, DwarfRegNum<[!add(I, 32)]>;
def FGR256 : RegisterClass<"Mips", [v8f32], 256, (sequence "YMM%u", 0, 15)>;
// These aren't in the Mips namespace but are placed where all the other Mips register classes seem
// to be defined
// WILL: Define AVX register operand
def FGR256AsmOperand : MipsAsmRegOperand {
let Name = "FGR256AsmReg";
let PredicateMethod = "isFGR256AsmReg";
}
def FGR256Opnd : RegisterOperand<FGR256> {
// Can we use the FGR32 one?
let ParserMatchClass = FGR256AsmOperand;
}
// Only modification is in MipsSETargetLowering constructor, I've added the register with:
addRegisterClass(MVT::v8f32, &Mips::FGR256Opnd);
#ifndef TRAX_TRAX_H
#define TRAX_TRAX_H
// This file contains the various TRaX specific intrinsics for the simulator
typedef float v8f32 __attribute__((vector_size(32)));
// Trax Intrinsics
extern "C"
{
// Global Memory
extern int loadi( int base, int offset = 0 ) asm("llvm.mips.loadi");
extern float loadf( int base, int offset = 0 ) asm("llvm.mips.loadf");
extern void storei( int value, int base, int offset = 0 ) asm("llvm.mips.storei");
extern void storef( float value, int base, int offset = 0 ) asm("llvm.mips.storef");
// Arithmetic
extern int atomicinc( int location ) asm("llvm.mips.atominc");
extern float min( float left, float right ) asm("llvm.mips.min");
extern float max( float left, float right ) asm("llvm.mips.max");
extern float invsqrt( float value ) asm("llvm.mips.invsqrt");
// Misc
extern int global_reg_read( int location ) asm("llvm.mips.globalrrd");
extern int trax_getid( int value ) asm("llvm.mips.getid");
// Random
extern float trax_rand( ) asm("llvm.mips.rand");
// Synchronization
extern int trax_inc_reset( int reg_num ) asm("llvm.mips.increset");
extern void trax_barrier( int reg_num ) asm("llvm.mips.barrier");
extern void trax_semacq( int reg_num ) asm("llvm.mips.semacq");
extern void trax_semrel( int reg_num ) asm("llvm.mips.semrel");
// Noise
extern void trax_write_srf( float reg1, float reg2, float reg3 ) asm("llvm.mips.writesrf");
extern void trax_write_sri( int reg1, int reg2, int reg3 ) asm("llvm.mips.writesri");
extern float trax_callnoise( ) asm("llvm.mips.callnoise");
// Streams
// Stream writes
extern void start_stream_write( int stream_id ) asm("llvm.mips.startsw");
extern void stream_writei( int value ) asm("llvm.mips.streamwi");
extern void stream_writef( float value ) asm("llvm.mips.streamwf");
extern void end_stream_write() asm("llvm.mips.endsw");
// Stream reads
extern int start_stream_read() asm("llvm.mips.startsr");
extern int stream_readi() asm("llvm.mips.streamri");
extern float stream_readf() asm("llvm.mips.streamrf");
extern void end_stream_read() asm("llvm.mips.endsr");
// Stream scheduling
extern int stream_size( int stream_id ) asm("llvm.mips.streamsize");
extern int stream_schedule( int schedule_id ) asm("llvm.mips.streamschedule");
extern void set_stream_read_id( int stream_id ) asm("llvm.mips.setstreamid");
// Debug
extern void profile( int prof_id ) asm("llvm.mips.profile");
extern int loadl1( int base, int offset = 0 )asm("llvm.mips.loadl1");
//extern int loadl2( int base, int offset = 0 )asm("llvm.mips.loadl2");
extern void trax_printi(int value) asm("llvm.mips.printi"); // print integer
extern void trax_printf( float value ) asm("llvm.mips.printf"); // print float (unfortunately this uses the "printf" name)
extern void trax_printformat(const char** string_addr) asm("llvm.mips.printfmstr"); // 'equivalent' to stdio::printf
extern void trax_emitfragment(int pixel, float depth, int* data) asm("llvm.mips.emitfragment");
extern void trax_saveframebuffer() asm("llvm.mips.saveframebuffer");
extern v8f32 trax_vzerops() asm("llvm.mips.vzerops");
}
// simplified printf for trax
extern int printf ( const char * format, ... );
inline int barrier( int reg_num ) {
int reg_val = trax_inc_reset( reg_num );
trax_barrier( reg_num );
return reg_val;
}
inline float trax_noise( float x, float y, float z ) {
trax_write_srf(x,y,z);
return trax_callnoise();
}
// main function the user must define instead of "main"
void trax_main()__attribute__((noinline));
// the main function (in trax_trax.cpp just calls trax_main())
int main();
#endif
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment