Skip to content

Instantly share code, notes, and snippets.

/-

Created February 18, 2016 00:17
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save anonymous/de3b01211b52b6162380 to your computer and use it in GitHub Desktop.
Save anonymous/de3b01211b52b6162380 to your computer and use it in GitHub Desktop.
diff --git a/lib/Target/NVPTX/NVPTXInstrInfo.td b/lib/Target/NVPTX/NVPTXInstrInfo.td
index 54acf72..8758feb 100644
--- a/lib/Target/NVPTX/NVPTXInstrInfo.td
+++ b/lib/Target/NVPTX/NVPTXInstrInfo.td
@@ -162,130 +162,146 @@ def hasPTX31 : Predicate<"Subtarget->getPTXVersion() >= 31">;
// Some Common Instruction Class Templates
//===----------------------------------------------------------------------===//
{+// Template for instructions which take three int64, int32, or int16 args.+}
{+// The instructions are named "<OpcStr><Width>" (e.g. "add.s64").+}
multiclass I3<string OpcStr, SDNode OpNode> {
def i64rr :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
!strconcat(OpcStr, "64 \t$dst, $a, $b;"),
[(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
def i64ri :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
!strconcat(OpcStr, "64 \t$dst, $a, $b;"),
[(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
def i32rr :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, "32 \t$dst, $a, $b;"),
[(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
def i32ri :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
!strconcat(OpcStr, "32 \t$dst, $a, $b;"),
[(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
def i16rr :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
!strconcat(OpcStr, "16 \t$dst, $a, $b;"),
[(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
def i16ri :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
!strconcat(OpcStr, "16 \t$dst, $a, $b;"),
[(set Int16Regs:$dst, (OpNode Int16Regs:$a, (imm):$b))]>;
}
{+// Template for instructions which take 3 int32 args. The instructions are+}
{+// named "<OpcStr>.s32" (e.g. "addc.cc.s32").+}
multiclass ADD_SUB_INT_32<string OpcStr, SDNode OpNode> {
def i32rr :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
[(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
def i32ri :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
!strconcat(OpcStr, ".s32 \t$dst, $a, $b;"),
[(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
}
{+// Template for instructions which take three fp64 or fp32 args. The+}
{+// instructions are named "<OpcStr>.f<Width>" (e.g. "add.f64").+}
{+//+}
{+// Also defines ftz (flush subnormal inputs and results to sign-preserving+}
{+// zero) variants for fp32 functions.+}
multiclass F3<string OpcStr, SDNode OpNode> {
def f64rr :
NVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, Float64Regs:$b),
!strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
[(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>,
Requires<[allowFMA]>;
def f64ri :
NVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, f64imm:$b),
!strconcat(OpcStr, ".f64 \t$dst, $a, $b;"),
[(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>,
Requires<[allowFMA]>;
def f32rr_ftz :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b),
!strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>,
Requires<[allowFMA, doF32FTZ]>;
def f32ri_ftz :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b),
!strconcat(OpcStr, ".ftz.f32 \t$dst, $a, $b;"),
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
Requires<[allowFMA, doF32FTZ]>;
def f32rr :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b),
!strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>,
Requires<[allowFMA]>;
def f32ri :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b),
!strconcat(OpcStr, ".f32 \t$dst, $a, $b;"),
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
Requires<[allowFMA]>;
}
{+// Same as F3, but defines ".rn" variants (round to nearest even).+}
multiclass F3_rn<string OpcStr, SDNode OpNode> {
def f64rr :
NVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, Float64Regs:$b),
!strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
[(set Float64Regs:$dst, (OpNode Float64Regs:$a, Float64Regs:$b))]>,
Requires<[noFMA]>;
def f64ri :
NVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, f64imm:$b),
!strconcat(OpcStr, ".rn.f64 \t$dst, $a, $b;"),
[(set Float64Regs:$dst, (OpNode Float64Regs:$a, fpimm:$b))]>,
Requires<[noFMA]>;
def f32rr_ftz :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b),
!strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>,
Requires<[noFMA, doF32FTZ]>;
def f32ri_ftz :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b),
!strconcat(OpcStr, ".rn.ftz.f32 \t$dst, $a, $b;"),
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
Requires<[noFMA, doF32FTZ]>;
def f32rr :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b),
!strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, Float32Regs:$b))]>,
Requires<[noFMA]>;
def f32ri :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b),
!strconcat(OpcStr, ".rn.f32 \t$dst, $a, $b;"),
[(set Float32Regs:$dst, (OpNode Float32Regs:$a, fpimm:$b))]>,
Requires<[noFMA]>;
}
{+// Template for operations which take two f32 or f64 operands. Provides three+}
{+// instructions: <OpcStr>.f64, <OpcStr>.f32, and <OpcStr>.ftz.f32 (flush+}
{+// subnormal inputs and results to zero).+}
multiclass F2<string OpcStr, SDNode OpNode> {
def f64 : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$a),
!strconcat(OpcStr, ".f64 \t$dst, $a;"),
[(set Float64Regs:$dst, (OpNode Float64Regs:$a))]>;
def f32_ftz : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
!strconcat(OpcStr, ".ftz.f32 \t$dst, $a;"),
[(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>,
Requires<[doF32FTZ]>;
def f32 : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$a),
!strconcat(OpcStr, ".f32 \t$dst, $a;"),
[(set Float32Regs:$dst, (OpNode Float32Regs:$a))]>;
}
//===----------------------------------------------------------------------===//
@@ -293,160 +309,239 @@ multiclass F2<string OpcStr, SDNode OpNode> {
//===----------------------------------------------------------------------===//
//-----------------------------------
//[-General-] Type Conversion
//-----------------------------------
let hasSideEffects = 0 in {
// Generate a cvt to the given type from all possible types.[-//-] Each instance
{+//+} takes a CvtMode immediate that defines the conversion mode to[-//-] use. It can
{+//+} be CvtNONE to omit a conversion mode.
multiclass CVT_FROM_ALL<string FromName, RegisterClass RC> {
def _s16 :
NVPTXInst<(outs RC:$dst),
(ins Int16Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
FromName, ".s16\t$dst, $src;"), []>;
def _u16 :
NVPTXInst<(outs RC:$dst),
(ins Int16Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
FromName, ".u16\t$dst, $src;"), []>;
def _f16 :
NVPTXInst<(outs RC:$dst),
(ins Int16Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
FromName, ".f16\t$dst, $src;"), []>;
def _s32 :
NVPTXInst<(outs RC:$dst),
(ins Int32Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
FromName, ".s32\t$dst, $src;"), []>;
def _u32 :
NVPTXInst<(outs RC:$dst),
(ins Int32Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
FromName, ".u32\t$dst, $src;"), []>;
def _s64 :
NVPTXInst<(outs RC:$dst),
(ins Int64Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
FromName, ".s64\t$dst, $src;"), []>;
def _u64 :
NVPTXInst<(outs RC:$dst),
(ins Int64Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
FromName, ".u64\t$dst, $src;"), []>;
def _f32 :
NVPTXInst<(outs RC:$dst),
(ins Float32Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
FromName, ".f32\t$dst, $src;"), []>;
def _f64 :
NVPTXInst<(outs RC:$dst),
(ins Float64Regs:$src, CvtMode:$mode),
!strconcat("cvt${mode:base}${mode:ftz}${mode:sat}.",
FromName, ".f64\t$dst, $src;"), []>;
}
// Generate[-a-] cvt[-t-]{+s fr+}o{+m+} all {+ty+}p[-os-]{+e+}s [-ib-]{+to al+}l[-e-] types.
defm CVT_s16 : CVT_FROM_ALL<"s16", Int16Regs>;
defm CVT_u16 : CVT_FROM_ALL<"u16", Int16Regs>;
defm CVT_f16 : CVT_FROM_ALL<"f16", Int16Regs>;
defm CVT_s32 : CVT_FROM_ALL<"s32", Int32Regs>;
defm CVT_u32 : CVT_FROM_ALL<"u32", Int32Regs>;
defm CVT_s64 : CVT_FROM_ALL<"s64", Int64Regs>;
defm CVT_u64 : CVT_FROM_ALL<"u64", Int64Regs>;
defm CVT_f32 : CVT_FROM_ALL<"f32", Float32Regs>;
defm CVT_f64 : CVT_FROM_ALL<"f64", Float64Regs>;
// Th[-is-]{+e+}se[-t of-] cvt[-i-]s {+are+} different from th{+os+}e above[-.-]{+:+} The[-type of the-] source[-//-] and {+des+}t[-a-] r[-g-]e{+gis+}t{+ers+}
{+ //+} are {+of+} the same {+type+}.[-//-]
def CVT_INREG_s16_s8 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
"cvt.s16.s8 \t$dst, $src;", []>;
def CVT_INREG_s32_s8 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
"cvt.s32.s8 \t$dst, $src;", []>;
def CVT_INREG_s32_s16 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
"cvt.s32.s16 \t$dst, $src;", []>;
def CVT_INREG_s64_s8 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
"cvt.s64.s8 \t$dst, $src;", []>;
def CVT_INREG_s64_s16 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
"cvt.s64.s16 \t$dst, $src;", []>;
def CVT_INREG_s64_s32 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
"cvt.s64.s32 \t$dst, $src;", []>;
}
//-----------------------------------
// Integer Arithmetic
//-----------------------------------
{+// Template for xor masquerading as int1 arithmetic.+}
multiclass ADD_SUB_i1<SDNode OpNode> {
def _rr: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
"xor.pred \t$dst, $a, $b;",
[(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
def _ri: NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
"xor.pred \t$dst, $a, $b;",
[(set Int1Regs:$dst, (OpNode Int1Regs:$a, (imm):$b))]>;
}
{+// int1 addition and subtraction are both just xor.+}
defm ADD_i1 : ADD_SUB_i1<add>;
defm SUB_i1 : ADD_SUB_i1<sub>;
{+// int16, int32, and int64 signed addition. Since nvptx is 2's compliment, we+}
{+// also use these for unsigned arithmetic.+}
defm ADD : I3<"add.s", add>;
defm SUB : I3<"sub.s", sub>;
{+// int32 addition and subtraction with carry-out.+}
{+// FIXME: PTX 4.3 adds a 64-bit add.cc (and maybe also 64-bit addc.cc?).+}
defm ADDCC : ADD_SUB_INT_32<"add.cc", addc>;
defm SUBCC : ADD_SUB_INT_32<"sub.cc", subc>;
{+// int32 addition and subtraction with carry-in and carry-out.+}
defm ADDCCC : ADD_SUB_INT_32<"addc.cc", adde>;
defm SUBCCC : ADD_SUB_INT_32<"subc.cc", sube>;
{+defm MULT : I3<"mul.lo.s", mul>;+}
{+defm MULTHS : I3<"mul.hi.s", mulhs>;+}
{+defm MULTHU : I3<"mul.hi.u", mulhu>;+}
{+defm SDIV : I3<"div.s", sdiv>;+}
{+defm UDIV : I3<"div.u", udiv>;+}
// {+The ri versions of rem.s and re+}m{+.+}u {+won't be se+}l[-.-]{+ected; DAGCombiner::visitSREM+}
{+//+} wi{+ll lower it.+}
{+defm SREM : I3<"rem.s", srem>;+}
{+defm UREM : I3<"rem.u", urem>;+}
{+//+}
{+// Wide multiplication+}
{+//+}
de{+f MULWIDES64 :+}
{+ NV+}PTX[-i-]{+I+}nst[-r-]{+<(o+}u[-c-]t{+s Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),+}
{+ "mul.w+}i{+de.s32 \t$dst, $a, $b;", []>;+}
{+def MULWIDES64Imm :+}
{+ NVPTXInst<(+}o{+uts Int64Regs:$dst), (i+}n{+s Int32Regs:$a, i32imm:$b),+}
{+ "mul.wide.s32 \t$dst, $a, $b;", []>;+}
{+def MULWIDES64Imm64 :+}
{+ NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),+}
{+ "mul.wide.s32 \t$dst, $a, $b;", []>;+}
{+def MULWIDEU64 :+}
{+ NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),+}
{+ "mul.wide.u32 \t$dst, $a, $b;", []>;+}
{+def MULWIDEU64Imm :+}
{+ NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i32imm:$b),+}
{+ "mul.wide.u32 \t$dst, $a, $b;", []>;+}
{+def MULWIDEU64Imm64 :+}
{+ NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),+}
{+ "mul.wide.u32 \t$dst, $a, $b;", []>;+}
{+def MULWIDES32 :+}
{+ NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),+}
{+ "mul.wide.s16 \t$dst, $a, $b;", []>;+}
{+def MULWIDES32Imm :+}
{+ NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),+}
{+ "mul.wide.s16 \t$dst, $a, $b;", []>;+}
{+def MULWIDES32Imm32 :+}
{+ NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),+}
{+ "mul.wide.s16 \t$dst, $a, $b;", []>;+}
{+def MULWIDEU32 :+}
{+ NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),+}
{+ "mul.wide.u16 \t$dst, $a, $b;", []>;+}
{+def MULWIDEU32Imm :+}
{+ NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),+}
{+ "mul.wide.u16 \t$dst, $a, $b;", []>;+}
{+def MULWIDEU32Imm32 :+}
{+ NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),+}
{+ "mul.wide.u16 \t$dst, $a, $b;", []>;+}
{+def SDTMulWide : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>;+}
{+def mul_wide_signed : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>;+}
{+def mul_wide_unsigned : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>;+}
{+// Matchers for signed, unsigned mul.wide ISD nodes.+}
{+def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)),+}
{+ (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,+}
{+ Requires<[doMulWide]>;+}
{+def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)),+}
{+ (MULWIDES32Imm Int16Regs:$a, imm:$b)>,+}
{+ Requires<[doMulWide]>;+}
{+def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)),+}
{+ (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,+}
{+ Requires<[doMulWide]>;+}
{+def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)),+}
{+ (MULWIDEU32Imm Int16Regs:$a, imm:$b)>,+}
{+ Requires<[doMulWide]>;+}
{+def : Pat<(i64 (mul_wide_signed Int32Regs:$a, Int32Regs:$b)),+}
{+ (MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,+}
{+ Requires<[doMulWide]>;+}
{+def : Pat<(i64 (mul_wide_signed Int32Regs:$a, imm:$b)),+}
{+ (MULWIDES64Imm Int32Regs:$a, imm:$b)>,+}
{+ Requires<[doMulWide]>;+}
{+def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, Int32Regs:$b)),+}
{+ (MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,+}
{+ Requires<[doMulWide]>;+}
{+def : Pat<(i64 (mul_wide_unsigned Int32Regs:$a, imm:$b)),+}
{+ (MULWIDEU64Imm Int32Regs:$a, imm:$b)>,+}
{+ Requires<[doMulWide]>;+}
{+// Predicates used for converting some patterns to mul.wide.+}
def SInt32Const : PatLeaf<(imm), [{
const APInt &v = N->getAPIntValue();
[-if (-]{+return+} v.isSignedIntN(32)[-)-]
[- return true;-]
[- return false-];
}]>;
def UInt32Const : PatLeaf<(imm), [{
const APInt &v = N->getAPIntValue();
[-if (-]{+return+} v.isIntN(32)[-)-]
[- return true;-]
[- return false-];
}]>;
def SInt16Const : PatLeaf<(imm), [{
const APInt &v = N->getAPIntValue();
[-if (-]{+return+} v.isSignedIntN(16)[-)-]
[- return true;-]
[- return false-];
}]>;
def UInt16Const : PatLeaf<(imm), [{
const APInt &v = N->getAPIntValue();
[-if (-]{+return+} v.isIntN(16)[-)-]
[- return true;-]
[- return false-];
}]>;
def Int5Const : PatLeaf<(imm), [{
{+// Check if 0 <= v < 32; only then will the result of (x << v) be an int32.+}
const APInt &v = N->getAPIntValue();
[-// Check if 0 <= v < 32-]
[- // Only then the-] re[-sul-]t[-f-]{+u+}r[-om (x << v) will be i32-]
[- if (-]{+n+} v.sge(0) && v.slt(32)[-)-]
[- return true;-]
[- return false-];
}]>;
def Int4Const : PatLeaf<(imm), [{
{+// Check if 0 <= v < 16; only then will the result of (x << v) be an int16.+}
const APInt &v = N->getAPIntValue();
[-// Check if 0 <= v < 16-]
[- // Only then the-] re[-sul-]t[-f-]{+u+}r[-om (x << v) will be i16-]
[- if (-]{+n+} v.sge(0) && v.slt(16)[-)-]
[- return true;-]
[- return false-];
}]>;
def SHL2MUL32 : SDNodeXForm<imm, [{
@@ -461,215 +556,133 @@ def SHL2MUL16 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(temp.shl(v), SDLoc(N), MVT::i16);
}]>;
[-def MULWIDES64-]
[- : NVPTXInst<(-]{+// C+}o[-uts I-]n[-t64R-]{+v+}e[-gs:$ds-]{+r+}t [-), (in-]{+"+}s[-Int32Re-]{+i+}g[-s:$a, I-]n[-t32R-]{+/z+}e[-gs:$b),-]
[- "mul.wid-]{+ro-+}e[-.s32 \t$ds-]{+x+}t[-, $a, $b;", []>;-]
[-d-]e[-f MULWIDES64Imm-]
[- : NVPTXInst<(outs I-]n[-t64Regs:$-]d[-st), (ins Int32Regs:$a, i32imm:$b)-],[-"mul.wide.s32 \t$ds-] t[-, $a, $b;", []>;-]
[-d-]{+h+}e[-f MULWIDES64Imm64-]
[- : NVPTXInst<(outs I-]n[-t64Regs:$d-] s[-t), (-]{+h+}i[-ns In-]{+f+}t[-32Regs:$a, i64imm:$b),-]
[- "mu-] l[-.wide.s32 \t$dst, $a, $b;", []>;-]
[-d-]ef[-MULWIDEU64-]
[- : NVPTXInst<(outs Int64Regs:$dst), (ins In-]t [-32Regs:$-]{+by+} a[-, I-]n[-t32Regs:$b),-]
[- "mul.w-] i[-de.u32 \t$dst, $a, $b;", []>;-]
[-def MULWIDEU64I-]mm[-: NVPTXInst<(outs Int64R-]e[-gs:$-]d[-st), (-]i[-ns In-]{+a+}t[-32R-]e[-gs:$a, i32imm:$b),-]" {+to+} mul.wide.[-u32 \t$dst, $a, $b;", []>;-]
[-def MULWIDEU64Imm64-]
[- : NVPTXInst<(outs Int64Regs:$dst), (ins Int32Regs:$a, i64imm:$b),-]
[- "mul.wide.u32 \t$dst, $a, $b;", []>;-]
[-def MULWIDES32-]
[- : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),-]
[- "mul.wide.s16 \t$dst, $a, $b;", []>;-]
[-def MULWIDES32Imm-]
[- : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),-]
[- "mul.wide.s16 \t$dst, $a, $b;", []>;-]
[-def MULWIDES32Imm32-]
[- : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),-]
[- "mul.wide.s16 \t$dst, $a, $b;", []>;-]
[-def MULWIDEU32-]
[- : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),-]
[- "mul.wide.u16 \t$dst, $a, $b;", []>;-]
[-def MULWIDEU32Imm-]
[- : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i16imm:$b),-]
[- "mul.wide.u16 \t$dst, $a, $b;", []>;-]
[-def MULWIDEU32Imm32-]
[- : NVPTXInst<(outs Int32Regs:$dst), (ins Int16Regs:$a, i32imm:$b),-]
[- "mul.wide.u16 \t$dst, $a, $b;", []>;-]
def : Pat<(shl (sext Int32Regs:$a), (i32 Int5Const:$b)),
(MULWIDES64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
Requires<[doMulWide]>;
def : Pat<(shl (zext Int32Regs:$a), (i32 Int5Const:$b)),
(MULWIDEU64Imm Int32Regs:$a, (SHL2MUL32 node:$b))>,
Requires<[doMulWide]>;
def : Pat<(shl (sext Int16Regs:$a), (i16 Int4Const:$b)),
(MULWIDES32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
Requires<[doMulWide]>;
def : Pat<(shl (zext Int16Regs:$a), (i16 Int4Const:$b)),
(MULWIDEU32Imm Int16Regs:$a, (SHL2MUL16 node:$b))>,
Requires<[doMulWide]>;
{+// Convert "sign/zero-extend then multiply" to mul.wide.+}
def : Pat<(mul (sext Int32Regs:$a), (sext Int32Regs:$b)),
(MULWIDES64 Int32Regs:$a, Int32Regs:$b)>,
Requires<[doMulWide]>;
def : Pat<(mul (sext Int32Regs:$a), (i64 SInt32Const:$b)),
(MULWIDES64Imm64 Int32Regs:$a, (i64 SInt32Const:$b))>,
Requires<[doMulWide]>;
def : Pat<(mul (zext Int32Regs:$a), (zext Int32Regs:$b)),
(MULWIDEU64 Int32Regs:$a, Int32Regs:$b)>,
Requires<[doMulWide]>;
def : Pat<(mul (zext Int32Regs:$a), (i64 UInt32Const:$b)),
(MULWIDEU64Imm64 Int32Regs:$a, (i64 UInt32Const:$b))>,
Requires<[doMulWide]>;
def : Pat<(mul (sext Int16Regs:$a), (sext Int16Regs:$b)),
(MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,
Requires<[doMulWide]>;
def : Pat<(mul (sext Int16Regs:$a), (i32 SInt16Const:$b)),
(MULWIDES32Imm32 Int16Regs:$a, (i32 SInt16Const:$b))>,
Requires<[doMulWide]>;
def : Pat<(mul (zext Int16Regs:$a), (zext Int16Regs:$b)),
(MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,
Requires<[doMulWide]>;
def : Pat<(mul (zext Int16Regs:$a), (i32 UInt16Const:$b)),
(MULWIDEU32Imm32 Int16Regs:$a, (i32 UInt16Const:$b))>,
[- Requires<[doMulWide]>;-]
[-def SDTMulWide-]
[- : SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>]>;-]
[-def mul_wide_signed-]
[- : SDNode<"NVPTXISD::MUL_WIDE_SIGNED", SDTMulWide>;-]
[-def mul_wide_unsigned-]
[- : SDNode<"NVPTXISD::MUL_WIDE_UNSIGNED", SDTMulWide>;-]
[-def : Pat<(i32 (mul_wide_signed Int16Regs:$a, Int16Regs:$b)),-]
[- (MULWIDES32 Int16Regs:$a, Int16Regs:$b)>,-]
Requires<[doMulWide]>;
[-def : Pat<(i32 (mul_wide_signed Int16Regs:$a, imm:$b)),-]
[- (MULWIDES32Imm Int16Regs:$a, imm:$b)>,-]
[- Requires<[doMulWide]>;-]
[-def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, Int16Regs:$b)),-]
[- (MULWIDEU32 Int16Regs:$a, Int16Regs:$b)>,-]
[- Requires<[doMulWide]>;-]
[-def : Pat<(i32 (mul_wide_unsigned Int16Regs:$a, imm:$b)),-]
[- (MULWIDEU32Imm Int16Regs:$a, imm:$b)>,-]
[- Requires<[doMulWide]>;-]
{+//+}
{+// Integer multiply-add+}
{+//+}
{+def SDTIMAD :+}
{+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>, SDTCisInt<2>,+}
{+ SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>;+}
{+def imad : SDNode<"NVPTXISD::IMAD", SDTIMAD>;+}
def {+MAD16rrr+} :
{+NV+}P[-a-]{+TXIns+}t<([-i64 (m-]{+o+}u[-l_wide_-]{+t+}s [-ig-]{+I+}n{+t16R+}e{+gs:$+}d{+st),+}
{+ (ins+} Int[-32-]{+16+}Regs:$a, Int[-32-]{+16+}Regs:$b[-)-]{+, Int16Regs:$c+}),
{+"mad.lo.s16 \t$dst, $a, $b, $c;",+}
{+ [+}([-MULW-]{+set+} I[-DES-]{+nt1+}6[-4-]{+Regs:$dst, (imad+} Int[-32-]{+16+}Regs:$a, Int[-32-]{+16+}Regs:$b[-)>-], {+Int16+}Re[-quire-]{+g+}s[-<[doMulWide-]{+:$c))+}]>;
def {+MAD16rri+} :
{+NV+}P[-a-]{+TXIns+}t<([-i64 (m-]{+o+}u[-l_wide_-]{+t+}s [-ig-]{+I+}n{+t16R+}e{+gs:$+}d{+st),+}
{+ (ins+} Int[-32-]{+16+}Regs:$a, {+Int16Regs:$b, i16+}imm:$[-b)-]{+c+}),
{+"mad.lo.s16 \t$dst, $a, $b, $c;",+}
{+ [+}([-MULW-]{+set+} I[-DES-]{+nt1+}6[-4Im-]{+Regs:$dst, (i+}m{+ad+} Int[-32-]{+16+}Regs:$a, {+Int16Regs:$b,+} imm:$[-b-]{+c)+})[->,-]
[- Requires<[doMulWide-]]>;
def {+MAD16rir+} :
{+NV+}P[-a-]{+TXIns+}t<([-i64 (mul_wide_-]{+o+}u[-n-]{+t+}s [-ig-]{+I+}n{+t16R+}e{+gs:$+}d{+st),+}
{+ (ins+} Int[-32-]{+16+}Regs:$a, {+i16imm:$b,+} Int[-32-]{+16+}Regs:$[-b)-]{+c+}),
[-(MULWIDEU-]{+"mad.lo.s1+}6 [-4-]{+\t$dst, $a, $b, $c;",+}
{+ [(set+} Int[-32-]{+16+}Regs:$[-a-]{+dst+}, {+(imad+} Int[-32-]{+16+}Regs:${+a, imm:$+}b[-)>-], {+Int16+}Re[-quire-]{+g+}s[-<[doMulWide-]{+:$c))+}]>;
def {+MAD16rii+} :
{+NV+}P[-a-]{+TXIns+}t<([-i64 (mul_wide_-]{+o+}u[-n-]{+t+}s [-ig-]{+I+}n{+t16R+}e{+gs:$+}d{+st),+}
{+ (ins+} Int[-32-]{+16+}Regs:$a, i{+16i+}mm:$b[-)-]{+, i16imm:$c+}),
{+"mad.lo.s16 \t$dst, $a, $b, $c;",+}
{+ [+}([-MULW-]{+set+} I[-DEU-]{+nt1+}6[-4Im-]{+Regs:$dst, (i+}m{+ad+} Int[-32-]{+16+}Regs:$a, imm:$b[-)>-],[-Requ-] i[-res<[doMulWide-]{+mm:$c))+}]>;
def[-m-] M[-UL-]{+AD32rrr :+}
{+ NVP+}T{+XInst<(outs Int32Regs+}:{+$dst),+}
{+ (ins+} I{+nt+}3[-<-]{+2Regs:$a, Int32Regs:$b, Int32Regs:$c),+}
{+ "mad.lo.s32 \t$dst, $a, $b, $c;+}"{+,+}
{+ [(set Int32Regs:$dst, (i+}m{+ad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>;+}
{+def MAD32rri :+}
{+ NVPTXInst<(o+}u{+ts Int32Regs:$dst),+}
{+ (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),+}
{+ "mad.+}l{+o.s32 \t$dst, $a, $b, $c;",+}
{+ [(set Int32Regs:$dst, (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>;+}
{+def MAD32rir :+}
{+ NVPTXInst<(outs Int32Regs:$dst),+}
{+ (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),+}
{+ "mad+}.lo.s{+32 \t$dst, $a, $b, $c;+}",
{+[(set Int32Regs:$dst, (imad Int32Regs:$a, im+}m{+:$b, Int32Regs:$c))]>;+}
{+def MAD32rii :+}
{+ NVPTXInst<(o+}u{+ts Int32Regs:$dst),+}
{+ (ins Int32Regs:$a, i32imm:$b, i32imm:$c),+}
{+ "mad.+}l{+o.s32 \t$dst, $a, $b, $c;",+}
{+ [(set Int32Regs:$dst, (imad Int32Regs:$a, imm:$b, imm:$c))]+}>;
def[-m-] M[-ULTHS-]{+AD64rrr+} :
{+NVPTX+}I[-3-]{+nst+}<{+(outs Int64Regs:$dst),+}
{+ (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),+}
"m[-ul-]{+ad+}.[-hi-]{+lo+}.s{+64 \t$dst, $a, $b, $c;+}",
{+[(set Int64Regs:$dst, (i+}m[-ulh-]{+ad Int64Reg+}s{+:$a, Int64Regs:$b, Int64Regs:$c))]+}>;
def[-m-] M[-ULTHU-]{+AD64rri+} :
{+NVPTX+}I[-3-]{+nst+}<{+(outs Int64Regs:$dst),+}
{+ (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),+}
"m[-u-]{+ad.+}l{+o+}.[-h-]{+s64 \t$dst, $a, $b, $c;",+}
{+ [(set Int64Regs:$dst, (+}i[-.-]{+mad Int64Regs:$a, Int64Regs:$b, imm:$c))]>;+}
{+def MAD64rir :+}
{+ NVPTXInst<(o+}u{+ts Int64Regs:$dst),+}
{+ (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),+}
{+ "mad.lo.s64 \t$dst, $a, $b, $c;+}",
{+[(set Int64Regs:$dst, (i+}m{+ad Int64Regs:$a, imm:$b, Int64Regs:$c))]>;+}
{+def MAD64rii :+}
{+ NVPTXInst<(o+}u{+ts Int64Regs:$dst),+}
{+ (ins Int64Regs:$a, i64imm:$b, i64imm:$c),+}
{+ "mad.+}l[-hu-]{+o.s64 \t$dst, $a, $b, $c;",+}
{+ [(set Int64Regs:$dst, (imad Int64Regs:$a, imm:$b, imm:$c))]+}>;
def[-m SDIV : I3<"div.s", sdiv>;-]
[-defm UDIV : I3<"div.u", udiv>;-]
[-defm SREM : I3<"rem.s", srem>;-]
[-// The ri version will not be selected as DAGCombiner::visitSREM will lower it.-]
[-defm UREM : I3<"rem.u", urem>;-]
[-// The ri version will not be selected as DAGCombiner::visitUREM will lower it.-]
[-def SDTIMAD-]
[- : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisInt<0>,-]
[- SDTCisInt<2>, SDTCisSameAs<0, 2>,-]
[- SDTCisSameAs<0, 3>]>;-]
[-def imad-]
[- : SDNode<"NVPTXISD::IMAD", SDTIMAD>;-]
[-def MAD16rrr : NVPTXInst<(outs Int16Regs:$dst),-]
[- (ins Int16Regs:$a, Int16Regs:$b, Int16Regs:$c),-]
[- "mad.lo.s16 \t$dst, $a, $b, $c;",-]
[- [(set Int16Regs:$dst,-]
[- (imad Int16Regs:$a, Int16Regs:$b, Int16Regs:$c))]>;-]
[-def MAD16rri : NVPTXInst<(outs Int16Regs:$dst),-]
[- (ins Int16Regs:$a, Int16Regs:$b, i16imm:$c),-]
[- "mad.lo.s16 \t$dst, $a, $b, $c;",-]
[- [(set Int16Regs:$dst,-]
[- (imad Int16Regs:$a, Int16Regs:$b, imm:$c))]>;-]
[-def MAD16rir : NVPTXInst<(outs Int16Regs:$dst),-]
[- (ins Int16Regs:$a, i16imm:$b, Int16Regs:$c),-]
[- "mad.lo.s16 \t$dst, $a, $b, $c;",-]
[- [(set Int16Regs:$dst,-]
[- (imad Int16Regs:$a, imm:$b, Int16Regs:$c))]>;-]
[-def MAD16rii : NVPTXInst<(outs Int16Regs:$dst),-]
[- (ins Int16Regs:$a, i16imm:$b, i16imm:$c),-]
[- "mad.lo.s16 \t$dst, $a, $b, $c;",-]
[- [(set Int16Regs:$dst,-]
[- (imad Int16Regs:$a, imm:$b, imm:$c))]>;-]
[-def MAD32rrr : NVPTXInst<(outs Int32Regs:$dst),-]
[- (ins Int32Regs:$a, Int32Regs:$b, Int32Regs:$c),-]
[- "mad.lo.s32 \t$dst, $a, $b, $c;",-]
[- [(set Int32Regs:$dst,-]
[- (imad Int32Regs:$a, Int32Regs:$b, Int32Regs:$c))]>;-]
[-def MAD32rri : NVPTXInst<(outs Int32Regs:$dst),-]
[- (ins Int32Regs:$a, Int32Regs:$b, i32imm:$c),-]
[- "mad.lo.s32 \t$dst, $a, $b, $c;",-]
[- [(set Int32Regs:$dst,-]
[- (imad Int32Regs:$a, Int32Regs:$b, imm:$c))]>;-]
[-def MAD32rir : NVPTXInst<(outs Int32Regs:$dst),-]
[- (ins Int32Regs:$a, i32imm:$b, Int32Regs:$c),-]
[- "mad.lo.s32 \t$dst, $a, $b, $c;",-]
[- [(set Int32Regs:$dst,-]
[- (imad Int32Regs:$a, imm:$b, Int32Regs:$c))]>;-]
[-def MAD32rii : NVPTXInst<(outs Int32Regs:$dst),-]
[- (ins Int32Regs:$a, i32imm:$b, i32imm:$c),-]
[- "mad.lo.s32 \t$dst, $a, $b, $c;",-]
[- [(set Int32Regs:$dst,-]
[- (imad Int32Regs:$a, imm:$b, imm:$c))]>;-]
[-def MAD64rrr : NVPTXInst<(outs Int64Regs:$dst),-]
[- (ins Int64Regs:$a, Int64Regs:$b, Int64Regs:$c),-]
[- "mad.lo.s64 \t$dst, $a, $b, $c;",-]
[- [(set Int64Regs:$dst,-]
[- (imad Int64Regs:$a, Int64Regs:$b, Int64Regs:$c))]>;-]
[-def MAD64rri : NVPTXInst<(outs Int64Regs:$dst),-]
[- (ins Int64Regs:$a, Int64Regs:$b, i64imm:$c),-]
[- "mad.lo.s64 \t$dst, $a, $b, $c;",-]
[- [(set Int64Regs:$dst,-]
[- (imad Int64Regs:$a, Int64Regs:$b, imm:$c))]>;-]
[-def MAD64rir : NVPTXInst<(outs Int64Regs:$dst),-]
[- (ins Int64Regs:$a, i64imm:$b, Int64Regs:$c),-]
[- "mad.lo.s64 \t$dst, $a, $b, $c;",-]
[- [(set Int64Regs:$dst,-]
[- (imad Int64Regs:$a, imm:$b, Int64Regs:$c))]>;-]
[-def MAD64rii : NVPTXInst<(outs Int64Regs:$dst),-]
[- (ins Int64Regs:$a, i64imm:$b, i64imm:$c),-]
[- "mad.lo.s64 \t$dst, $a, $b, $c;",-]
[- [(set Int64Regs:$dst,-]
[- (imad Int64Regs:$a, imm:$b, imm:$c))]>;-]
[-def-] INEG16 :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
"neg.s16 \t$dst, $src;",
[(set Int16Regs:$dst, (ineg Int16Regs:$src))]>;
def INEG32 :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
"neg.s32 \t$dst, $src;",
[(set Int32Regs:$dst, (ineg Int32Regs:$src))]>;
def INEG64 :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
"neg.s64 \t$dst, $src;",
[(set Int64Regs:$dst, (ineg Int64Regs:$src))]>;
//-----------------------------------
// Floating Point Arithmetic
@@ -677,17 +690,13 @@ def INEG64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
// Constant 1.0f
def FloatConst1 : PatLeaf<(fpimm), [{
[-if (-]{+return+} &[-(-]N->getValueAPF().getSemantics() [-) !-]{+=+}= &llvm::APFloat::IEEEsingle [-)-]
[- return false;-]
[- float f = (float)-]{+&&+}
N->getValueAPF().convertToFloat()[-;-]
[- return (f-] == 1.0f[-)-];
}]>;
// Constan[-d-]{+t 1.0+} (double)[-1.0-]
def DoubleConst1 : PatLeaf<(fpimm), [{
[-if (-]{+return+} &[-(-]N->getValueAPF().getSemantics() [-) !-]{+=+}= &llvm::APFloat::IEEEdouble [-)-]
[- return false;-]
[- double d = (double)-]{+&&+}
N->getValueAPF().convertToDouble()[-;-]
[- return (d-] == 1.0[-)-];
}]>;
defm FADD : F3<"add", fadd>;
@@ -698,157 +707,157 @@ defm FADD_rn : F3_rn<"add", fadd>;
defm FSUB_rn : F3_rn<"sub", fsub>;
defm FMUL_rn : F3_rn<"mul", fmul>;
defm FABS : F2<"abs", fabs>;
defm FNEG : F2<"neg", fneg>;
defm FSQRT : F2<"sqrt.rn", fsqrt>;
//
// F64 division
//
def FDIV641r :
NVPTXInst<(outs Float64Regs:$dst),
(ins f64imm:$a, Float64Regs:$b),
"rcp.rn.f64 \t$dst, $b;",
[(set Float64Regs:$dst, (fdiv DoubleConst1:$a, Float64Regs:$b))]>;
def FDIV64rr :
NVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, Float64Regs:$b),
"div.rn.f64 \t$dst, $a, $b;",
[(set Float64Regs:$dst, (fdiv Float64Regs:$a, Float64Regs:$b))]>;
def FDIV64ri :
NVPTXInst<(outs Float64Regs:$dst),
(ins Float64Regs:$a, f64imm:$b),
"div.rn.f64 \t$dst, $a, $b;",
[(set Float64Regs:$dst, (fdiv Float64Regs:$a, fpimm:$b))]>;
//
// F32 Approximate reciprocal
//
def FDIV321r_ftz :
NVPTXInst<(outs Float32Regs:$dst),
(ins f32imm:$a, Float32Regs:$b),
"rcp.approx.ftz.f32 \t$dst, $b;",
[(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
Requires<[do_DIVF32_APPROX, doF32FTZ]>;
def FDIV321r :
NVPTXInst<(outs Float32Regs:$dst),
(ins f32imm:$a, Float32Regs:$b),
"rcp.approx.f32 \t$dst, $b;",
[(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
Requires<[do_DIVF32_APPROX]>;
//
// F32 Approximate division
//
def FDIV32approxrr_ftz :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b),
"div.approx.ftz.f32 \t$dst, $a, $b;",
[(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
Requires<[do_DIVF32_APPROX, doF32FTZ]>;
def FDIV32approxri_ftz :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b),
"div.approx.ftz.f32 \t$dst, $a, $b;",
[(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
Requires<[do_DIVF32_APPROX, doF32FTZ]>;
def FDIV32approxrr :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b),
"div.approx.f32 \t$dst, $a, $b;",
[(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
Requires<[do_DIVF32_APPROX]>;
def FDIV32approxri :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b),
"div.approx.f32 \t$dst, $a, $b;",
[(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
Requires<[do_DIVF32_APPROX]>;
//
// F32 Semi-accurate reciprocal
//
// rcp.approx gives the same result as div.full(1.0f, a) and is faster.
//
def FDIV321r_approx_ftz :
NVPTXInst<(outs Float32Regs:$dst),
(ins f32imm:$a, Float32Regs:$b),
"rcp.approx.ftz.f32 \t$dst, $b;",
[(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
Requires<[do_DIVF32_FULL, doF32FTZ]>;
def FDIV321r_approx :
NVPTXInst<(outs Float32Regs:$dst),
(ins f32imm:$a, Float32Regs:$b),
"rcp.approx.f32 \t$dst, $b;",
[(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
Requires<[do_DIVF32_FULL]>;
//
// F32 Semi-accurate division
//
def FDIV32rr_ftz :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b),
"div.full.ftz.f32 \t$dst, $a, $b;",
[(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
Requires<[do_DIVF32_FULL, doF32FTZ]>;
def FDIV32ri_ftz :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b),
"div.full.ftz.f32 \t$dst, $a, $b;",
[(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
Requires<[do_DIVF32_FULL, doF32FTZ]>;
def FDIV32rr :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b),
"div.full.f32 \t$dst, $a, $b;",
[(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
Requires<[do_DIVF32_FULL]>;
def FDIV32ri :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b),
"div.full.f32 \t$dst, $a, $b;",
[(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
Requires<[do_DIVF32_FULL]>;
//
// F32 Accurate reciprocal
//
def FDIV321r_prec_ftz :
NVPTXInst<(outs Float32Regs:$dst),
(ins f32imm:$a, Float32Regs:$b),
"rcp.rn.ftz.f32 \t$dst, $b;",
[(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
Requires<[reqPTX20, doF32FTZ]>;
def FDIV321r_prec :
NVPTXInst<(outs Float32Regs:$dst),
(ins f32imm:$a, Float32Regs:$b),
"rcp.rn.f32 \t$dst, $b;",
[(set Float32Regs:$dst, (fdiv FloatConst1:$a, Float32Regs:$b))]>,
Requires<[reqPTX20]>;
//
// F32 Accurate division
//
def FDIV32rr_prec_ftz :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b),
"div.rn.ftz.f32 \t$dst, $a, $b;",
[(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
Requires<[doF32FTZ, reqPTX20]>;
def FDIV32ri_prec_ftz :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b),
"div.rn.ftz.f32 \t$dst, $a, $b;",
[(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
Requires<[doF32FTZ, reqPTX20]>;
def FDIV32rr_prec :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, Float32Regs:$b),
"div.rn.f32 \t$dst, $a, $b;",
[(set Float32Regs:$dst, (fdiv Float32Regs:$a, Float32Regs:$b))]>,
Requires<[reqPTX20]>;
def FDIV32ri_prec :
NVPTXInst<(outs Float32Regs:$dst),
(ins Float32Regs:$a, f32imm:$b),
"div.rn.f32 \t$dst, $a, $b;",
[(set Float32Regs:$dst, (fdiv Float32Regs:$a, fpimm:$b))]>,
Requires<[reqPTX20]>;
//
// F32 rsqrt
@@ -857,68 +866,39 @@ def FDIV32ri_prec : NVPTXInst<(outs Float32Regs:$dst),
def RSQRTF32approx1r : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$b),
"rsqrt.approx.f32 \t$dst, $b;", []>;
{+// Convert 1.0f/sqrt(x) to rsqrt.approx.f32. (There is an rsqrt.approx.f64, but+}
{+// it's emulated in software.)+}
def: Pat<(fdiv FloatConst1, (int_nvvm_sqrt_f Float32Regs:$b)),
(RSQRTF32approx1r Float32Regs:$b)>,
Requires<[do_DIVF32_FULL, do_SQRTF32_APPROX, doNoF32FTZ]>;
multiclass F[-PCONTR-]{+M+}A[-CT32-]<string OpcStr, {+RegisterClass RC, Operand ImmCls,+} Predicate Pred> {
def rrr : NVPTXInst<(outs[-Float32-] R[-egs-]{+C+}:$dst), (ins[-Float32-] R[-egs-]{+C+}:$a,[-Float32-] R[-egs-]{+C+}:$b,[-Float32-] R[-egs-]{+C+}:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
[(set[-Float32-] R[-egs-]{+C+}:$dst, (fma[-Float32-] R[-egs-]{+C+}:$a,[-Float32-] R[-egs-]{+C+}:$b,[-Float32-] R[-egs-]{+C+}:$c))]>,
Requires<[Pred]>;
def rri : NVPTXInst<(outs[-Float32-] R[-egs-]{+C+}:$dst),
(ins[-Float32-] R[-egs-]{+C+}:$a,[-Float32-] R[-egs-]{+C+}:$b, [-f32i-]{+I+}mm{+Cls+}:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
[(set[-Float32-] R[-egs-]{+C+}:$dst, (fma[-Float32-] R[-egs-]{+C+}:$a,[-Float32-] R[-egs-]{+C+}:$b, fpimm:$c))]>,
Requires<[Pred]>;
def rir : NVPTXInst<(outs[-Float32-] R[-egs-]{+C+}:$dst),
(ins[-Float32-] R[-egs-]{+C+}:$a, [-f32i-]{+I+}mm{+Cls+}:$b,[-Float32-] R[-egs-]{+C+}:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
[(set[-Float32-] R[-egs-]{+C+}:$dst, (fma[-Float32-] R[-egs-]{+C+}:$a, fpimm:$b,[-Float32-] R[-egs-]{+C+}:$c))]>,
Requires<[Pred]>;
def rii : NVPTXInst<(outs[-Float32-] R[-egs-]{+C+}:$dst),
(ins[-Float32-] R[-egs-]{+C+}:$a, [-f32i-]{+I+}mm{+Cls+}:$b, [-f32i-]{+I+}mm{+Cls+}:$c),
!strconcat(OpcStr, " \t$dst, $a, $b, $c;"),
[(set[-Float32-] R[-egs-]{+C+}:$dst, (fma[-Float32-] R[-egs-]{+C+}:$a, fpimm:$b, fpimm:$c))]>,
Requires<[Pred]>;
}
[-multiclass FPCONTRACT64<string OpcStr, Predicate Pred> {-]
[- def rrr : NVPTXInst<(outs Float64Regs:$dst),-]
[- (ins Float64Regs:$a, Float64Regs:$b, Float64Regs:$c),-]
[- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),-]
[- [(set Float64Regs:$dst,-]
[- (fma Float64Regs:$a, Float64Regs:$b, Float64Regs:$c))]>,-]
[- Requires<[Pred]>;-]
[- def rri : NVPTXInst<(outs Float64Regs:$dst),-]
[- (ins Float64Regs:$a, Float64Regs:$b, f64imm:$c),-]
[- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),-]
[- [(set Float64Regs:$dst,-]
[- (fma Float64Regs:$a, Float64Regs:$b, fpimm:$c))]>,-]
[- Requires<[Pred]>;-]
[- def rir : NVPTXInst<(outs Float64Regs:$dst),-]
[- (ins Float64Regs:$a, f64imm:$b, Float64Regs:$c),-]
[- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),-]
[- [(set Float64Regs:$dst,-]
[- (fma Float64Regs:$a, fpimm:$b, Float64Regs:$c))]>,-]
[- Requires<[Pred]>;-]
[- def rii : NVPTXInst<(outs Float64Regs:$dst),-]
[- (ins Float64Regs:$a, f64imm:$b, f64imm:$c),-]
[- !strconcat(OpcStr, " \t$dst, $a, $b, $c;"),-]
[- [(set Float64Regs:$dst,-]
[- (fma Float64Regs:$a, fpimm:$b, fpimm:$c))]>,-]
[- Requires<[Pred]>;-]
[-}-]defm FMA32_ftz : F[-PCONTR-]{+M+}A[-CT32-]<"fma.rn.ftz.f32", {+Float32Regs, f32imm,+} doF32FTZ>;
defm FMA32 : F[-PCONTR-]{+M+}A[-CT32-]<"fma.rn.f32", {+Float32Regs, f32imm,+} true>;
defm FMA64 : F[-PCONTR-]{+M+}A[-CT64-]<"fma.rn.f64", {+Float64Regs, f64imm,+} true>;
{+// sin/cos+}
def SINF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
"sin.approx.f32 \t$dst, $src;",
[(set Float32Regs:$dst, (fsin Float32Regs:$src))]>;
@@ -926,8 +906,8 @@ def COSF: NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
"cos.approx.f32 \t$dst, $src;",
[(set Float32Regs:$dst, (fcos Float32Regs:$src))]>;
// Lower (frem x, y) into (sub x, (mul (floor (div x, y)) y)){+,+}
// [-e-]{+i+}.[-g-]{+e+}. "poor man's fmod()"
// frem - f32 FTZ
def : Pat<(frem Float32Regs:$x, Float32Regs:$y),
@@ -962,183 +942,152 @@ def : Pat<(frem Float64Regs:$x, fpimm:$y),
fpimm:$y))>;
//-----------------------------------
// [-Logical Ar-]{+B+}it[-hm-]{+wis+}e {+opera+}ti[-c-]{+ons+}
//-----------------------------------
{+// Template for three-arg bitwise operations. Takes three args, Creates .b16,+}
{+// .b32, .b64, and .pred (predicate registers -- i.e., i1) versions of OpcStr.+}
multiclass [-LOG_FORMA-]{+BI+}T{+WISE+}<string OpcStr, SDNode OpNode> {
def b1rr :
NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, Int1Regs:$b),
!strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
[(set Int1Regs:$dst, (OpNode Int1Regs:$a, Int1Regs:$b))]>;
def b1ri :
NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$a, i1imm:$b),
!strconcat(OpcStr, ".pred \t$dst, $a, $b;"),
[(set Int1Regs:$dst, (OpNode Int1Regs:$a, imm:$b))]>;
def b16rr :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int16Regs:$b),
!strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
[(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int16Regs:$b))]>;
def b16ri :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i16imm:$b),
!strconcat(OpcStr, ".b16 \t$dst, $a, $b;"),
[(set Int16Regs:$dst, (OpNode Int16Regs:$a, imm:$b))]>;
def b32rr :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
[(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
def b32ri :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
!strconcat(OpcStr, ".b32 \t$dst, $a, $b;"),
[(set Int32Regs:$dst, (OpNode Int32Regs:$a, imm:$b))]>;
def b64rr :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int64Regs:$b),
!strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
[(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int64Regs:$b))]>;
def b64ri :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i64imm:$b),
!strconcat(OpcStr, ".b64 \t$dst, $a, $b;"),
[(set Int64Regs:$dst, (OpNode Int64Regs:$a, imm:$b))]>;
}
defm OR : [-LOG_FORMA-]{+BI+}T{+WISE+}<"or", or>;
defm AND : [-LOG_FORMA-]{+BI+}T{+WISE+}<"and", and>;
defm XOR : [-LOG_FORMA-]{+BI+}T{+WISE+}<"xor", xor>;
def NOT1 : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$src),
"not.pred \t$dst, $src;",
[(set Int1Regs:$dst, (not Int1Regs:$src))]>;
def NOT16 : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
"not.b16 \t$dst, $src;",
[(set Int16Regs:$dst, (not Int16Regs:$src))]>;
def NOT32 : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src),
"not.b32 \t$dst, $src;",
[(set Int32Regs:$dst, (not Int32Regs:$src))]>;
def NOT64 : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src),
"not.b64 \t$dst, $src;",
[(set Int64Regs:$dst, (not Int64Regs:$src))]>;
// [-F-]{+Template f+}or {+left/right+} shifts[-,-]{+. Takes+} th{+r+}e[-s-]e[-c-] o{+pera+}nds{+,+}
{+// [dest (reg), s+}rc [-op-]{+(reg), shift (r+}e{+g o+}r {+imm)].+}
{+// dest+} and {+src+} m{+ay be int64, int32, or int16, b+}u{+t+} s{+hift is always int32.+}
{+//+}
{+// This templa+}t[-b-]e {+also defines a+} 32-bit [-val-]{+shift (imm, imm) instr+}u[-e-]{+ction.+}
multiclass[-L-] SHIFT[-_FORMAT-]<string OpcStr, SDNode OpNode> {
def i64rr :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, "64 \t$dst, $a, $b;"),
[(set Int64Regs:$dst, (OpNode Int64Regs:$a, Int32Regs:$b))]>;
def i64ri :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),
!strconcat(OpcStr, "64 \t$dst, $a, $b;"),
[(set Int64Regs:$dst, (OpNode Int64Regs:$a, (i32 imm:$b)))]>;
def i32rr :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, "32 \t$dst, $a, $b;"),
[(set Int32Regs:$dst, (OpNode Int32Regs:$a, Int32Regs:$b))]>;
def i32ri :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),
!strconcat(OpcStr, "32 \t$dst, $a, $b;"),
[(set Int32Regs:$dst, (OpNode Int32Regs:$a, (i32 imm:$b)))]>;
def i32ii :
NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),
!strconcat(OpcStr, "32 \t$dst, $a, $b;"),
[(set Int32Regs:$dst, (OpNode (i32 imm:$a), (i32 imm:$b)))]>;
def i16rr :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, Int32Regs:$b),
!strconcat(OpcStr, "16 \t$dst, $a, $b;"),
[(set Int16Regs:$dst, (OpNode Int16Regs:$a, Int32Regs:$b))]>;
def i16ri :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),
!strconcat(OpcStr, "16 \t$dst, $a, $b;"),
[(set Int16Regs:$dst, (OpNode Int16Regs:$a, (i32 imm:$b)))]>;
}
defm SHL :[-LSHIFT_FORMAT<"shl.b", shl>;-]
[-// For shifts, the second src operand must be 32-bit value-]
[-// Need to add cvt for the 8-bits.-]
[-multiclass RSHIFT_FORMAT<string OpcStr, SDNode OpNode> {-]
[- def i64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a,-]
[- Int32Regs:$b),-]
[- !strconcat(OpcStr, "64 \t$dst, $a, $b;"),-]
[- [(set Int64Regs:$dst, (OpNode Int64Regs:$a,-]
[- Int32Regs:$b))]>;-]
[- def i64ri : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$a, i32imm:$b),-]
[- !strconcat(OpcStr, "64 \t$dst, $a, $b;"),-]
[- [(set Int64Regs:$dst, (OpNode Int64Regs:$a,-]
[- (i32 imm:$b)))]>;-]
[- def i32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a,-]
[- Int32Regs:$b),-]
[- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),-]
[- [(set Int32Regs:$dst, (OpNode Int32Regs:$a,-]
[- Int32Regs:$b))]>;-]
[- def i32ri : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$a, i32imm:$b),-]
[- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),-]
[- [(set Int32Regs:$dst, (OpNode Int32Regs:$a,-]
[- (i32 imm:$b)))]>;-]
[- def i32ii : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$a, i32imm:$b),-]
[- !strconcat(OpcStr, "32 \t$dst, $a, $b;"),-]
[- [(set Int32Regs:$dst, (OpNode (i32 imm:$a),-]
[- (i32 imm:$b)))]>;-]
[- def i16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$a,-]
[- Int32Regs:$b),-]
[- !strconcat(Opc-] S[-tr, "16 \t$dst, $a, $b;"),-]
[- [(set Int16Regs:$dst, (OpNode Int16Regs:$a,-]{+H+}I[-nt32Regs:$b))]>;-]
[- def i16ri : NVP-]{+F+}T[-XInst-]<[-(outs Int16Regs:$dst), (ins Int16Regs:$a, i32imm:$b),-]
[- !strconcat(OpcStr,-]"[-16 \t$d-]s[-t, $a, $-]{+hl.+}b[-;-]"[-),-]
[- [(set Int16Regs:$dst-],[-(OpNode Int16Reg-] s[-:$a,-]
[- (i32 imm:$b)))]-]{+hl+}>;[-}-]
defm SRA :[-R-] SHIFT[-_FORMAT-]<"shr.s", sra>;
defm SRL :[-R-] SHIFT[-_FORMAT-]<"shr.u", srl>;
//
// Rotate: [-u-]{+U+}se ptx shf instruction if available.
//
// 32 bit r2 = rotl r1, n
// =>
// r2 = shf.l r1, r1, n
def ROTL32imm_hw :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt),
"shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
[(set Int32Regs:$dst, (rotl Int32Regs:$src, (i32 imm:$amt)))]>,
Requires<[hasHWROT32]>;
def ROTL32reg_hw :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt),
"shf.l.wrap.b32 \t$dst, $src, $src, $amt;",
[(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
Requires<[hasHWROT32]>;
// 32 bit r2 = rotr r1, n
// =>
// r2 = shf.r r1, r1, n
def ROTR32imm_hw :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, i32imm:$amt),
"shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
[(set Int32Regs:$dst, (rotr Int32Regs:$src, (i32 imm:$amt)))]>,
Requires<[hasHWROT32]>;
def ROTR32reg_hw :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt),
"shf.r.wrap.b32 \t$dst, $src, $src, $amt;",
[(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
Requires<[hasHWROT32]>;
// [-// Rotate:-]{+32-b+}i[-f p-]t[-x-] s[-h-]{+o+}f[-ins-]t{+ware+} r[-uction is n-]ota[-va-]{+te by immed+}i[-l-]a[-bl-]{+t+}e[-,-]{+. $am+}t{+2 s+}h[-en-]{+o+}u[-s-]{+ld+} e[-shift+-]{+qu+}a[-dd-]
[-//-]
[-//-]{+l+} 32 [-bi-]{+- $am+}t{+1.+}
def ROT32imm_sw :
NVPTXInst<(outs Int32Regs:$dst),
(ins Int32Regs:$src, i32imm:$amt1, i32imm:$amt2),[-!strconcat(-]
"{{\n\t"[-,-]
[- !strconcat(-]
".reg .b32 %lhs;\n\t"[-,-]
[- !strconcat(-]
".reg .b32 %rhs;\n\t"[-,-]
[- !strconcat(-]
"shl.b32 \t%lhs, $src, $amt1;\n\t"[-,-]
[- !strconcat(-]
"shr.b32 \t%rhs, $src, $amt2;\n\t"[-,-]
[- !strconcat(-]
"add.u32 \t$dst, %lhs, %rhs;\n\t"[-,-]
[- !strconcat(-]
"}}",[-""))))))),-]
[]>;
def SUB_FRM_32 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(32 - N->getZExtValue(), SDLoc(N), MVT::i32);
}]>;
def : Pat<(rotl Int32Regs:$src, (i32 imm:$amt)),
@@ -1148,45 +1097,48 @@ def : Pat<(rotr Int32Regs:$src, (i32 imm:$amt)),
(ROT32imm_sw Int32Regs:$src, (SUB_FRM_32 node:$amt), imm:$amt)>,
Requires<[noHWROT32]>;
{+// 32-bit software rotate left by register.+}
def ROTL32reg_sw :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt),[-!strconcat(-]
"{{\n\t"[-,-]
[- !strconcat(-]
".reg .b32 %lhs;\n\t"[-,-]
[- !strconcat(-]
".reg .b32 %rhs;\n\t"[-,-]
[- !strconcat(-]
".reg .b32 %amt2;\n\t"[-,-]
[- !strconcat(-]
"shl.b32 \t%lhs, $src, $amt;\n\t"[-,-]
[- !strconcat(-]
"sub.s32 \t%amt2, 32, $amt;\n\t"[-,-]
[- !strconcat(-]
"shr.b32 \t%rhs, $src, %amt2;\n\t"[-,-]
[- !strconcat(-]
"add.u32 \t$dst, %lhs, %rhs;\n\t"[-,-]
[- !strconcat(-]
"}}",[-""))))))))),-]
[(set Int32Regs:$dst, (rotl Int32Regs:$src, Int32Regs:$amt))]>,
Requires<[noHWROT32]>;
{+// 32-bit software rotate right by register.+}
def ROTR32reg_sw :
NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$src, Int32Regs:$amt),[-!strconcat(-]
"{{\n\t"[-,-]
[- !strconcat(-]
".reg .b32 %lhs;\n\t"[-,-]
[- !strconcat(-]
".reg .b32 %rhs;\n\t"[-,-]
[- !strconcat(-]
".reg .b32 %amt2;\n\t"[-,-]
[- !strconcat(-]
"shr.b32 \t%lhs, $src, $amt;\n\t"[-,-]
[- !strconcat(-]
"sub.s32 \t%amt2, 32, $amt;\n\t"[-,-]
[- !strconcat(-]
"shl.b32 \t%rhs, $src, %amt2;\n\t"[-,-]
[- !strconcat(-]
"add.u32 \t$dst, %lhs, %rhs;\n\t"[-,-]
[- !strconcat(-]
"}}",[-""))))))))),-]
[(set Int32Regs:$dst, (rotr Int32Regs:$src, Int32Regs:$amt))]>,
Requires<[noHWROT32]>;
// 64{+-+}bit {+software rotate by immediate. $amt2 should equal 64 - $amt1.+}
def ROT64imm_sw :
NVPTXInst<(outs Int64Regs:$dst),
(ins Int64Regs:$src, i32imm:$amt1, i32imm:$amt2),[-!strconcat(-]
"{{\n\t"[-,-]
[- !strconcat(-]
".reg .b64 %lhs;\n\t"[-,-]
[- !strconcat(-]
".reg .b64 %rhs;\n\t"[-,-]
[- !strconcat(-]
"shl.b64 \t%lhs, $src, $amt1;\n\t"[-,-]
[- !strconcat(-]
"shr.b64 \t%rhs, $src, $amt2;\n\t"[-,-]
[- !strconcat(-]
"add.u64 \t$dst, %lhs, %rhs;\n\t"[-,-]
[- !strconcat(-]
"}}",[-""))))))),-]
[]>;
def SUB_FRM_64 : SDNodeXForm<imm, [{
return CurDAG->getTargetConstant(64-N->getZExtValue(), SDLoc(N), MVT::i32);
@@ -1197,37 +1149,70 @@ def : Pat<(rotl Int64Regs:$src, (i32 imm:$amt)),
def : Pat<(rotr Int64Regs:$src, (i32 imm:$amt)),
(ROT64imm_sw Int64Regs:$src, (SUB_FRM_64 node:$amt), imm:$amt)>;
{+// 64-bit software rotate left by register.+}
def ROTL64reg_sw :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt),[-!strconcat(-]
"{{\n\t"[-,-]
[- !strconcat(-]
".reg .b64 %lhs;\n\t"[-,-]
[- !strconcat(-]
".reg .b64 %rhs;\n\t"[-,-]
[- !strconcat(-]
".reg .u32 %amt2;\n\t"[-,-]
[- !strconcat(-]
"shl.b64 \t%lhs, $src, $amt;\n\t"[-,-]
[- !strconcat(-]
"sub.u32 \t%amt2, 64, $amt;\n\t"[-,-]
[- !strconcat(-]
"shr.b64 \t%rhs, $src, %amt2;\n\t"[-,-]
[- !strconcat(-]
"add.u64 \t$dst, %lhs, %rhs;\n\t"[-,-]
[- !strconcat(-]
"}}",[-""))))))))),-]
[(set Int64Regs:$dst, (rotl Int64Regs:$src, Int32Regs:$amt))]>;
def ROTR64reg_sw :
NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$src, Int32Regs:$amt),[-!strconcat(-]
"{{\n\t"[-,-]
[- !strconcat(-]
".reg .b64 %lhs;\n\t"[-,-]
[- !strconcat(-]
".reg .b64 %rhs;\n\t"[-,-]
[- !strconcat(-]
".reg .u32 %amt2;\n\t"[-,-]
[- !strconcat(-]
"shr.b64 \t%lhs, $src, $amt;\n\t"[-,-]
[- !strconcat(-]
"sub.u32 \t%amt2, 64, $amt;\n\t"[-,-]
[- !strconcat(-]
"shl.b64 \t%rhs, $src, %amt2;\n\t"[-,-]
[- !strconcat(-]
"add.u64 \t$dst, %lhs, %rhs;\n\t"[-,-]
[- !strconcat(-]
"}}",[-""))))))))),-]
[(set Int64Regs:$dst, (rotr Int64Regs:$src, Int32Regs:$amt))]>;
{+//+}
{+// Funnnel shift in clamp mode+}
{+//+}
{+// Create SDNodes so they can be used in the DAG code, e.g.+}
{+// NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts)+}
{+def SDTIntShiftDOp :+}
{+ SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,+}
{+ SDTCisInt<0>, SDTCisInt<3>]>;+}
{+def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>;+}
{+def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>;+}
{+def FUNSHFLCLAMP :+}
{+ NVPTXInst<(outs Int32Regs:$dst),+}
{+ (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),+}
{+ "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;",+}
{+ [(set Int32Regs:$dst,+}
{+ (FUN_SHFL_CLAMP Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt))]>;+}
{+def FUNSHFRCLAMP :+}
{+ NVPTXInst<(outs Int32Regs:$dst),+}
{+ (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),+}
{+ "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;",+}
{+ [(set Int32Regs:$dst,+}
{+ (FUN_SHFR_CLAMP Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt))]>;+}
{+//+}
// BFE - bit-field extract
{+//+}
{+// Template for BFE instructions. Takes four args,+}
{+// [dest (reg), src (reg), start (reg or imm), end (reg or imm)].+}
{+// Start may be an imm only if end is also an imm. FIXME: Is this a+}
{+// restriction in PTX?+}
{+//+}
{+// dest and src may be int32 or int64, but start and end are always int32.+}
multiclass BFE<string TyStr, RegisterClass RC> {
[- // BFE supports both 32-bit and 64-bit values, but the start and length-]
[- // operands are always 32-bit-]
def rrr
: NVPTXInst<(outs RC:$d),
(ins RC:$a, Int32Regs:$b, Int32Regs:$c),
@@ -1248,23 +1233,25 @@ defm BFE_S64 : BFE<"s64", Int64Regs>;
defm BFE_U64 : BFE<"u64", Int64Regs>;
//-----------------------------------
//[-General-] Comparison {+instructions (setp, set)+}
//-----------------------------------
// [-G-]{+FIXME: This do+}e{+s+}n{+'t cov+}er {+versions of set+} a[-l-]{+nd+} setp {+that comb+}in[-s-]{+e wi+}t{+h a+}
{+// boolean p+}r[-u-]{+edi+}c{+a+}t[-ion-]{+e, e.g.+} s{+etp.eq.and.b16.+}
multiclass SETP<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr :
NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, RC:$b, CmpMode:$cmp),
!strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
"\t$dst, $a, $b;"), []>;
def ri :
NVPTXInst<(outs Int1Regs:$dst), (ins RC:$a, ImmCls:$b, CmpMode:$cmp),
!strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
"\t$dst, $a, $b;"), []>;
def ir :
NVPTXInst<(outs Int1Regs:$dst), (ins ImmCls:$a, RC:$b, CmpMode:$cmp),
!strconcat("setp${cmp:base}${cmp:ftz}.", TypeStr,
"\t$dst, $a, $b;"), []>;
}
defm SETP_b16 : SETP<"b16", Int16Regs, i16imm>;
@@ -1279,7 +1266,10 @@ defm SETP_u64 : SETP<"u64", Int64Regs, i64imm>;
defm SETP_f32 : SETP<"f32", Float32Regs, f32imm>;
defm SETP_f64 : SETP<"f64", Float64Regs, f64imm>;
// [-G-]{+FIXME: This do+}e{+s+}n{+'t app+}e{+ar to be cor+}r{+ect. The "set" mnemonic h+}a[-l-]{+s the form+}
{+// "+}set{+.CmpOp{.ftz}.dtype.stype", where dtype is the type of the dest+}in{+ation+}
{+// reg, either u32,+} s{+32, or f32. Anyway+} t{+hese a+}r{+en't+} u[-c-]{+sed a+}t [-i-]{+the m+}o{+me+}n[-s-]{+t.+}
multiclass SET<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr : NVPTXInst<(outs Int32Regs:$dst),
(ins RC:$a, RC:$b, CmpMode:$cmp),
@@ -1305,10 +1295,13 @@ defm SET_f32 : SET<"f32", Float32Regs, f32imm>;
defm SET_f64 : SET<"f64", Float64Regs, f64imm>;
//-----------------------------------
//[-General-] Selection {+instructions (selp)+}
//-----------------------------------
// [-Ge-]{+FIXME: Missi+}n[-era-]{+g s+}l{+ct+}
{+//+} selp instructions {+that don't have any pattern matches; we explicitly use+}
{+// them within this file.+}
multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
def rr : NVPTXInst<(outs RC:$dst),
(ins RC:$a, RC:$b, Int1Regs:$p),
@@ -1326,24 +1319,30 @@ multiclass SELP<string TypeStr, RegisterClass RC, Operand ImmCls> {
multiclass SELP_PATTERN<string TypeStr, RegisterClass RC, Operand ImmCls,
SDNode ImmNode> {
def rr :
NVPTXInst<(outs RC:$dst),
(ins RC:$a, RC:$b, Int1Regs:$p),
!strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
[(set RC:$dst, (select Int1Regs:$p, RC:$a, RC:$b))]>;
def ri :
NVPTXInst<(outs RC:$dst),
(ins RC:$a, ImmCls:$b, Int1Regs:$p),
!strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
[(set RC:$dst, (select Int1Regs:$p, RC:$a, ImmNode:$b))]>;
def ir :
NVPTXInst<(outs RC:$dst),
(ins ImmCls:$a, RC:$b, Int1Regs:$p),
!strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
[(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, RC:$b))]>;
def ii :
NVPTXInst<(outs RC:$dst),
(ins ImmCls:$a, ImmCls:$b, Int1Regs:$p),
!strconcat("selp.", TypeStr, "\t$dst, $a, $b, $p;"),
[(set RC:$dst, (select Int1Regs:$p, ImmNode:$a, ImmNode:$b))]>;
}
{+// Don't pattern match on selp.{s,u}{16,32,64} -- selp.b{16,32,64} is just as+}
{+// good.+}
defm SELP_b16 : SELP_PATTERN<"b16", Int16Regs, i16imm, imm>;
defm SELP_s16 : SELP<"s16", Int16Regs, i16imm>;
defm SELP_u16 : SELP<"u16", Int16Regs, i16imm>;
@@ -1356,40 +1355,14 @@ defm SELP_u64 : SELP<"u64", Int64Regs, i64imm>;
defm SELP_f32 : SELP_PATTERN<"f32", Float32Regs, f32imm, fpimm>;
defm SELP_f64 : SELP_PATTERN<"f64", Float64Regs, f64imm, fpimm>;
[-//-]
[-// Funnnel shift in clamp mode-]
[-//-]
[-// - SDNodes are created so they can be used in the DAG code,-]
[-// e.g. NVPTXISelLowering (LowerShiftLeftParts and LowerShiftRightParts)-]
[-//-]
[-def SDTIntShiftDOp: SDTypeProfile<1, 3,-]
[- [SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>,-]
[- SDTCisInt<0>, SDTCisInt<3>]>;-]
[-def FUN_SHFL_CLAMP : SDNode<"NVPTXISD::FUN_SHFL_CLAMP", SDTIntShiftDOp, []>;-]
[-def FUN_SHFR_CLAMP : SDNode<"NVPTXISD::FUN_SHFR_CLAMP", SDTIntShiftDOp, []>;-]
[-def FUNSHFLCLAMP : NVPTXInst<(outs Int32Regs:$dst),-]
[- (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),-]
[- "shf.l.clamp.b32 \t$dst, $lo, $hi, $amt;",-]
[- [(set Int32Regs:$dst,-]
[- (FUN_SHFL_CLAMP Int32Regs:$lo,-]
[- Int32Regs:$hi, Int32Regs:$amt))]>;-]
[-def FUNSHFRCLAMP : NVPTXInst<(outs Int32Regs:$dst),-]
[- (ins Int32Regs:$lo, Int32Regs:$hi, Int32Regs:$amt),-]
[- "shf.r.clamp.b32 \t$dst, $lo, $hi, $amt;",-]
[- [(set Int32Regs:$dst,-]
[- (FUN_SHFR_CLAMP Int32Regs:$lo,-]
[- Int32Regs:$hi, Int32Regs:$amt))]>;-]
//-----------------------------------
// Data Movement (Load / Store, Move)
//-----------------------------------
def ADDRri : ComplexPattern<i32, 2, "SelectADDRri", [frameindex],
[SDNPWantRoot]>;
def ADDRri64 : ComplexPattern<i64, 2, "SelectADDRri64", [frameindex],
[SDNPWantRoot]>;
def MEMri : Operand<i32> {
let PrintMethod = "printMemOperand";
@@ -1401,82 +1374,81 @@ def MEMri64 : Operand<i64> {
}
def imem : Operand<iPTR> {
let PrintMethod = "printOperand";
}
def imemAny : Operand<iPTRAny> {
let PrintMethod = "printOperand";
}
def LdStCode : Operand<i32> {
let PrintMethod = "printLdStCode";
}
def SDTWrapper : SDTypeProfile<1, 1, [SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
def Wrapper : SDNode<"NVPTXISD::Wrapper", SDTWrapper>;
{+// Load a memory address into a u32 or u64 register.+}
def MOV_ADDR : NVPTXInst<(outs Int32Regs:$dst), (ins imem:$a),
"mov.u32 \t$dst, $a;",
[(set Int32Regs:$dst, (Wrapper tglobaladdr:$a))]>;
def MOV_ADDR64 : NVPTXInst<(outs Int64Regs:$dst), (ins imem:$a),
"mov.u64 \t$dst, $a;",
[(set Int64Regs:$dst, (Wrapper tglobaladdr:$a))]>;
// Get pointer to local stack{+.+}
def MOV_DEPOT_ADDR : NVPTXInst<(outs Int32Regs:$d), (ins i32imm:$num),
"mov.u32 \t$d, __local_depot$num;", []>;
def MOV_DEPOT_ADDR_64 : NVPTXInst<(outs Int64Regs:$d), (ins i32imm:$num),
"mov.u64 \t$d, __local_depot$num;", []>;
// copyPhysreg is hard-coded in NVPTXInstrInfo.cpp
let IsSimpleMove=1 in {
def IMOV1rr : NVPTXInst<(outs Int1Regs:$dst), (ins Int1Regs:$sss),
"mov.pred \t$dst, $sss;", []>;
def IMOV16rr : NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$sss),
"mov.u16 \t$dst, $sss;", []>;
def IMOV32rr : NVPTXInst<(outs Int32Regs:$dst), (ins Int32Regs:$sss),
"mov.u32 \t$dst, $sss;", []>;
def IMOV64rr : NVPTXInst<(outs Int64Regs:$dst), (ins Int64Regs:$sss),
"mov.u64 \t$dst, $sss;", []>;
def FMOV32rr : NVPTXInst<(outs Float32Regs:$dst), (ins Float32Regs:$src),
"mov.f32 \t$dst, $src;", []>;
def FMOV64rr : NVPTXInst<(outs Float64Regs:$dst), (ins Float64Regs:$src),
"mov.f64 \t$dst, $src;", []>;
}
[-def IMOV1ri: NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),-]
[- "mov.pred \t$dst, $src;",-]
[- [(set Int1Regs:$dst, imm:$src)]>;-]
[-def IMOV16ri: NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),-]
[- "mov.u16 \t$dst, $src;",-]
[- [(set Int16Regs:$dst, imm:$src)]>;-]
[-def IMOV32ri: NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),-]
[- "mov.u32 \t$dst, $src;",-]
[- [(set Int32Regs:$dst, imm:$src)]>;-]
[-def IMOV64i: NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),-]
[- "mov.u64 \t$dst, $src;",-]
[- [(set Int64Regs:$dst, imm:$src)]>;-]
def {+IMOV1ri : NVPTXInst<(outs Int1Regs:$dst), (ins i1imm:$src),+}
{+ "mov.pred \t$dst, $src;",+}
{+ [(set Int1Regs:$dst, imm:$src)]>;+}
{+def IMOV16ri : NVPTXInst<(outs Int16Regs:$dst), (ins i16imm:$src),+}
{+ "mov.u16 \t$dst, $src;",+}
{+ [(set Int16Regs:$dst, imm:$src)]>;+}
{+def IMOV32ri : NVPTXInst<(outs Int32Regs:$dst), (ins i32imm:$src),+}
{+ "mov.u32 \t$dst, $src;",+}
{+ [(set Int32Regs:$dst, imm:$src)]>;+}
{+def IMOV64i : NVPTXInst<(outs Int64Regs:$dst), (ins i64imm:$src),+}
{+ "mov.u64 \t$dst, $src;",+}
{+ [(set Int64Regs:$dst, imm:$src)]>;+}
{+def+} FMOV32ri : NVPTXInst<(outs Float32Regs:$dst), (ins f32imm:$src),
"mov.f32 \t$dst, $src;",
[(set Float32Regs:$dst, fpimm:$src)]>;
def FMOV64ri : NVPTXInst<(outs Float64Regs:$dst), (ins f64imm:$src),
"mov.f64 \t$dst, $src;",
[(set Float64Regs:$dst, fpimm:$src)]>;
def : Pat<(i32 (Wrapper texternalsym:$dst)), (IMOV32ri texternalsym:$dst)>;
//---- Copy Frame Index ----
def LEA_ADDRi : NVPTXInst<(outs Int32Regs:$dst), (ins MEMri:$addr),
"add.u32 \t$dst, ${addr:add};",
[(set Int32Regs:$dst, ADDRri:$addr)]>;
def LEA_ADDRi64 : NVPTXInst<(outs Int64Regs:$dst), (ins MEMri64:$addr),
"add.u64 \t$dst, ${addr:add};",
[(set Int64Regs:$dst, ADDRri64:$addr)]>;
//-----------------------------------
// Comparison and Selection
@@ -1554,7 +1526,7 @@ multiclass ISET_FORMAT_SIGNED<PatFrag OpNode, PatLeaf Mode>
SET_s16rr, SET_s16ri, SET_s16ir,
SET_s32rr, SET_s32ri, SET_s32ir,
SET_s64rr, SET_s64ri, SET_s64ir> {
// TableGen doesn't like empty multiclasses{+.+}
def : PatLeaf<(i32 0)>;
}
@@ -1566,21 +1538,21 @@ multiclass ISET_FORMAT_UNSIGNED<PatFrag OpNode, PatLeaf Mode>
SET_u16rr, SET_u16ri, SET_u16ir,
SET_u32rr, SET_u32ri, SET_u32ir,
SET_u64rr, SET_u64ri, SET_u64ir> {
// TableGen doesn't like empty multiclasses{+.+}
def : PatLeaf<(i32 0)>;
}
defm : ISET_FORMAT_SIGNED<setgt, CmpGT>;
[-defm : ISET_FORMAT_UNSIGNED<setugt, CmpGT>;-]
defm : ISET_FORMAT_SIGNED<setlt, CmpLT>;
[-defm : ISET_FORMAT_UNSIGNED<setult, CmpLT>;-]
defm : ISET_FORMAT_SIGNED<setge, CmpGE>;
[-defm : ISET_FORMAT_UNSIGNED<setuge, CmpGE>;-]
defm : ISET_FORMAT_SIGNED<setle, CmpLE>;
[-defm : ISET_FORMAT_UNSIGNED<setule, CmpLE>;-]
defm : ISET_FORMAT_SIGNED<seteq, CmpEQ>;
[-defm : ISET_FORMAT_UNSIGNED<setueq, CmpEQ>;-]
defm : ISET_FORMAT_SIGNED<setne, CmpNE>;
{+defm : ISET_FORMAT_UNSIGNED<setugt, CmpGT>;+}
{+defm : ISET_FORMAT_UNSIGNED<setult, CmpLT>;+}
{+defm : ISET_FORMAT_UNSIGNED<setuge, CmpGE>;+}
{+defm : ISET_FORMAT_UNSIGNED<setule, CmpLE>;+}
{+defm : ISET_FORMAT_UNSIGNED<setueq, CmpEQ>;+}
defm : ISET_FORMAT_UNSIGNED<setune, CmpNE>;
// i1 compares
@@ -1678,13 +1650,14 @@ defm FSetNE : FSET_FORMAT<setne, CmpNE, CmpNE_FTZ>;
defm FSetNUM : FSET_FORMAT<seto, CmpNUM, CmpNUM_FTZ>;
defm FSetNAN : FSET_FORMAT<setuo, CmpNAN, CmpNAN_FTZ>;
// {+FIXME: What is this doing here? Can it be deleted?+}
{+//+} def ld_param : SDNode<"NVPTXISD::LOAD_PARAM", SDTLoad,
// [SDNPHasChain, SDNPMayLoad, SDNPMemOperand]>;
def SDTDeclareParamProfile :
SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>;
def SDTDeclareScalarParamProfile :
SDTypeProfile<0, 3, [SDTCisInt<0>, SDTCisInt<1>, SDTCisInt<2>]>;
def SDTLoadParamProfile : SDTypeProfile<1, 2, [SDTCisInt<1>, SDTCisInt<2>]>;
def SDTLoadParamV2Profile : SDTypeProfile<2, 2, [SDTCisSameAs<0, 1>, SDTCisInt<2>, SDTCisInt<3>]>;
def SDTLoadParamV4Profile : SDTypeProfile<4, 2, [SDTCisInt<4>, SDTCisInt<5>]>;
@@ -1704,187 +1677,198 @@ def SDTStoreRetvalV2Profile : SDTypeProfile<0, 3, [SDTCisInt<0>]>;
def SDTStoreRetvalV4Profile : SDTypeProfile<0, 5, [SDTCisInt<0>]>;
def SDTPseudoUseParamProfile : SDTypeProfile<0, 1, []>;
def DeclareParam :
SDNode<"NVPTXISD::DeclareParam", SDTDeclareParamProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def DeclareScalarParam :
SDNode<"NVPTXISD::DeclareScalarParam", SDTDeclareScalarParamProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def DeclareRetParam :
SDNode<"NVPTXISD::DeclareRetParam", SDTDeclareParamProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def DeclareRet :
SDNode<"NVPTXISD::DeclareRet", SDTDeclareScalarParamProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def LoadParam :
SDNode<"NVPTXISD::LoadParam", SDTLoadParamProfile,
[SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
def LoadParamV2 :
SDNode<"NVPTXISD::LoadParamV2", SDTLoadParamV2Profile,
[SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
def LoadParamV4 :
SDNode<"NVPTXISD::LoadParamV4", SDTLoadParamV4Profile,
[SDNPHasChain, SDNPMayLoad, SDNPOutGlue, SDNPInGlue]>;
def PrintCall :
SDNode<"NVPTXISD::PrintCall", SDTPrintCallProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def PrintCallUni :
SDNode<"NVPTXISD::PrintCallUni", SDTPrintCallUniProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def StoreParam :
SDNode<"NVPTXISD::StoreParam", SDTStoreParamProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def StoreParamV2 :
SDNode<"NVPTXISD::StoreParamV2", SDTStoreParamV2Profile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def StoreParamV4 :
SDNode<"NVPTXISD::StoreParamV4", SDTStoreParamV4Profile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def StoreParamU32 :
SDNode<"NVPTXISD::StoreParamU32", SDTStoreParam32Profile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def StoreParamS32 :
SDNode<"NVPTXISD::StoreParamS32", SDTStoreParam32Profile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def CallArgBegin :
SDNode<"NVPTXISD::CallArgBegin", SDTCallArgMarkProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def CallArg :
SDNode<"NVPTXISD::CallArg", SDTCallArgProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def LastCallArg :
SDNode<"NVPTXISD::LastCallArg", SDTCallArgProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def CallArgEnd :
SDNode<"NVPTXISD::CallArgEnd", SDTCallVoidProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def CallVoid :
SDNode<"NVPTXISD::CallVoid", SDTCallVoidProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def Prototype :
SDNode<"NVPTXISD::Prototype", SDTCallVoidProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def CallVal :
SDNode<"NVPTXISD::CallVal", SDTCallValProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def MoveParam :
SDNode<"NVPTXISD::MoveParam", SDTMoveParamProfile, []>;
def StoreRetval :
SDNode<"NVPTXISD::StoreRetval", SDTStoreRetvalProfile,
[SDNPHasChain, SDNPSideEffect]>;
def StoreRetvalV2 :
SDNode<"NVPTXISD::StoreRetvalV2", SDTStoreRetvalV2Profile,
[SDNPHasChain, SDNPSideEffect]>;
def StoreRetvalV4 :
SDNode<"NVPTXISD::StoreRetvalV4", SDTStoreRetvalV4Profile,
[SDNPHasChain, SDNPSideEffect]>;
def PseudoUseParam :
SDNode<"NVPTXISD::PseudoUseParam", SDTPseudoUseParamProfile,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def RETURNNode :
SDNode<"NVPTXISD::RETURN", SDTCallArgMarkProfile,
[SDNPHasChain, SDNPSideEffect]>;
class LoadParamMemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
!strconcat(!strconcat("ld.param", opstr),
"\t$dst, [retval0+$b];"),
[]>;
class LoadParamRegInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst), (ins i32imm:$b),
!strconcat([-!strconcat(-]"mov", opstr[-)-], "\t$dst, retval$b;"),
[(set regclass:$dst, (LoadParam (i32 0), (i32 imm:$b)))]>;
class LoadParamV2MemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst, regclass:$dst2), (ins i32imm:$b),
!strconcat([-!strconcat(-]"ld.param.v2", opstr[-)-],
"\t{{$dst, $dst2}}, [retval0+$b];"), []>;
class LoadParamV4MemInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs regclass:$dst, regclass:$dst2, regclass:$dst3,
regclass:$dst4),
(ins i32imm:$b),
!strconcat([-!strconcat(-]"ld.param.v4", opstr[-)-],
"\t{{$dst, $dst2, $dst3, $dst4}}, [retval0+$b];"),
[]>;
class StoreParamInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, i32imm:$a, i32imm:$b),
!strconcat([-!strconcat(-]"st.param", opstr[-)-], "\t[param$a+$b], $val;"),
[]>;
class StoreParamV2Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, regclass:$val2,
i32imm:$a, i32imm:$b),
!strconcat([-!strconcat(-]"st.param.v2", opstr[-)-],
"\t[param$a+$b], {{$val, $val2}};"),
[]>;
class StoreParamV4Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, regclass:$val1, regclass:$val2,
regclass:$val3, i32imm:$a, i32imm:$b),
!strconcat([-!strconcat(-]"st.param.v4", opstr[-)-],
"\t[param$a+$b], {{$val, $val2, $val3, $val4}};"),
[]>;
class StoreRetvalInst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, i32imm:$a),
!strconcat([-!strconcat(-]"st.param", opstr[-)-], "\t[func_retval0+$a], $val;"),
[]>;
class StoreRetvalV2Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs), (ins regclass:$val, regclass:$val2, i32imm:$a),
!strconcat([-!strconcat(-]"st.param.v2", opstr[-)-],
"\t[func_retval0+$a], {{$val, $val2}};"),
[]>;
class StoreRetvalV4Inst<NVPTXRegClass regclass, string opstr> :
NVPTXInst<(outs),
(ins regclass:$val, regclass:$val2, regclass:$val3,
regclass:$val4, i32imm:$a),
!strconcat([-!strconcat(-]"st.param.v4", opstr[-)-],
"\t[func_retval0+$a], {{$val, $val2, $val3, $val4}};"),
[]>;
let isCall=1 in {
def PrintCall{+NoRetInst : NVPTXInst<(outs), (ins),+}
{+ "call ", [(PrintCall (i32 0))]>;+}
{+ def PrintCall+}RetInst1 : NVPTXInst<(outs), (ins),
"call (retval0), ", [(PrintCall (i32 1))]>;
def PrintCallRetInst2 : NVPTXInst<(outs), (ins),
"call (retval0, retval1), ", [(PrintCall (i32 2))]>;
def PrintCallRetInst3 : NVPTXInst<(outs), (ins),
"call (retval0, retval1, retval2), ", [(PrintCall (i32 3))]>;
def PrintCallRetInst4 : NVPTXInst<(outs), (ins),
"call (retval0, retval1, retval2, retval3), ", [(PrintCall (i32 4))]>;
def PrintCallRetInst5 : NVPTXInst<(outs), (ins),
"call (retval0, retval1, retval2, retval3, retval4), ",
[(PrintCall (i32 5))]>;
def PrintCallRetInst6 : NVPTXInst<(outs), (ins),
"call (retval0, retval1, retval2, retval3, retval4, retval5), ",
[(PrintCall (i32 6))]>;
def PrintCallRetInst7 : NVPTXInst<(outs), (ins),
"call (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
[(PrintCall (i32 7))]>;
def PrintCallRetInst8 : NVPTXInst<(outs), (ins),[-!strconcat(-]
"call (retval0, retval1, retval2, retval3, retval4[-",-]
[- "-], retval5, retval6, {+"+}
{+ "+}retval7), "[-)-],
[(PrintCall (i32 8))]>;
def PrintCall{+Uni+}NoRetInst : NVPTXInst<(outs), (ins),
"call{+.uni+} ", [(PrintCall{+Uni+} (i32 0))]>;
def PrintCallUniRetInst1 : NVPTXInst<(outs), (ins),
"call.uni (retval0), ", [(PrintCallUni (i32 1))]>;
def PrintCallUniRetInst2 : NVPTXInst<(outs), (ins),
"call.uni (retval0, retval1), ", [(PrintCallUni (i32 2))]>;
def PrintCallUniRetInst3 : NVPTXInst<(outs), (ins),
"call.uni (retval0, retval1, retval2), ", [(PrintCallUni (i32 3))]>;
def PrintCallUniRetInst4 : NVPTXInst<(outs), (ins),
"call.uni (retval0, retval1, retval2, retval3), ", [(PrintCallUni (i32 4))]>;
def PrintCallUniRetInst5 : NVPTXInst<(outs), (ins),
"call.uni (retval0, retval1, retval2, retval3, retval4), ",
[(PrintCallUni (i32 5))]>;
def PrintCallUniRetInst6 : NVPTXInst<(outs), (ins),
"call.uni (retval0, retval1, retval2, retval3, retval4, retval5), ",
[(PrintCallUni (i32 6))]>;
def PrintCallUniRetInst7 : NVPTXInst<(outs), (ins),
"call.uni (retval0, retval1, retval2, retval3, retval4, retval5, retval6), ",
[(PrintCallUni (i32 7))]>;
def PrintCallUniRetInst8 : NVPTXInst<(outs), (ins),[-!strconcat(-]
"call.uni (retval0, retval1, retval2, retval3, retval4[-",-]
[- "-], retval5, retval6, {+"+}
{+ "+}retval7), "[-)-],
[(PrintCallUni (i32 8))]>;[-def PrintCallUniNoRetInst : NVPTXInst<(outs), (ins), "call.uni ",-]
[- [(PrintCallUni (i32 0))]>;-]
}[-// call instructions-]
def LoadParamMemI64 : LoadParamMemInst<Int64Regs, ".b64">;
def LoadParamMemI32 : LoadParamMemInst<Int32Regs, ".b32">;
@@ -1915,37 +1899,35 @@ def StoreParamV2I8 : StoreParamV2Inst<Int16Regs, ".b8">;
// FIXME: StoreParamV4Inst crashes llvm-tblgen :(
//def StoreParamV4I32 : StoreParamV4Inst<Int32Regs, ".b32">;
def StoreParamV4I32 :
NVPTXInst<(outs), (ins Int32Regs:$val, Int32Regs:$val2, Int32Regs:$val3,
Int32Regs:$val4, i32imm:$a, i32imm:$b),
"st.param.v4.b32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
[]>;
def StoreParamV4I16 :
NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, Int16Regs:$val3,
Int16Regs:$val4, i32imm:$a, i32imm:$b),
"st.param.v4.b16\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
[]>;
def StoreParamV4I8 :
NVPTXInst<(outs), (ins Int16Regs:$val, Int16Regs:$val2, Int16Regs:$val3,
Int16Regs:$val4, i32imm:$a, i32imm:$b),
"st.param.v4.b8\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
[]>;
def StoreParamF32 : StoreParamInst<Float32Regs, ".f32">;
def StoreParamF64 : StoreParamInst<Float64Regs, ".f64">;
def StoreParamV2F32 : StoreParamV2Inst<Float32Regs, ".f32">;
def StoreParamV2F64 : StoreParamV2Inst<Float64Regs, ".f64">;
// FIXME: StoreParamV4Inst crashes llvm-tblgen :(
//def StoreParamV4F32 : StoreParamV4Inst<Float32Regs, ".f32">;
def StoreParamV4F32 :
NVPTXInst<(outs), (ins Float32Regs:$val, Float32Regs:$val2, Float32Regs:$val3,
Float32Regs:$val4, i32imm:$a, i32imm:$b),
"st.param.v4.f32\t[param$a+$b], {{$val, $val2, $val3, $val4}};",
[]>;
def StoreRetvalI64 : StoreRetvalInst<Int64Regs, ".b64">;
def StoreRetvalI32 : StoreRetvalInst<Int32Regs, ".b32">;
@@ -1971,89 +1953,88 @@ def CallArgEndInst0 : NVPTXInst<(outs), (ins), ")", [(CallArgEnd (i32 0))]>;
def RETURNInst : NVPTXInst<(outs), (ins), "ret;", [(RETURNNode)]>;
class CallArgInst<NVPTXRegClass regclass> :
NVPTXInst<(outs), (ins regclass:$a), "$a, ",
[(CallArg (i32 0), regclass:$a)]>;
class LastCallArgInst<NVPTXRegClass regclass> :
NVPTXInst<(outs), (ins regclass:$a), "$a",
[(LastCallArg (i32 0), regclass:$a)]>;
def CallArgI64 : CallArgInst<Int64Regs>;
def CallArgI32 : CallArgInst<Int32Regs>;
def CallArgI16 : CallArgInst<Int16Regs>;
def CallArgF64 : CallArgInst<Float64Regs>;
def CallArgF32 : CallArgInst<Float32Regs>;
def LastCallArgI64 : LastCallArgInst<Int64Regs>;
def LastCallArgI32 : LastCallArgInst<Int32Regs>;
def LastCallArgI16 : LastCallArgInst<Int16Regs>;
def LastCallArgF64 : LastCallArgInst<Float64Regs>;
def LastCallArgF32 : LastCallArgInst<Float32Regs>;
def CallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a, ",
[(CallArg (i32 0), (i32 imm:$a))]>;
def LastCallArgI32imm : NVPTXInst<(outs), (ins i32imm:$a), "$a",
[(LastCallArg (i32 0), (i32 imm:$a))]>;
def CallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a, ",
[(CallArg (i32 1), (i32 imm:$a))]>;
def LastCallArgParam : NVPTXInst<(outs), (ins i32imm:$a), "param$a",
[(LastCallArg (i32 1), (i32 imm:$a))]>;
def CallVoidInst : NVPTXInst<(outs), (ins imem:$addr), "$addr, ",
[(CallVoid (Wrapper tglobaladdr:$addr))]>;
def CallVoidInstReg : NVPTXInst<(outs), (ins Int32Regs:$addr), "$addr, ",
[(CallVoid Int32Regs:$addr)]>;
def CallVoidInstReg64 : NVPTXInst<(outs), (ins Int64Regs:$addr), "$addr, ",
[(CallVoid Int64Regs:$addr)]>;
def PrototypeInst : NVPTXInst<(outs), (ins i32imm:$val), ", prototype_$val;",
[(Prototype (i32 imm:$val))]>;
def DeclareRetMemInst :
NVPTXInst<(outs), (ins i32imm:$align, i32imm:$size, i32imm:$num),
".param .align $align .b8 retval$num[$size];",
[(DeclareRetParam (i32 imm:$align), (i32 imm:$size), (i32 imm:$num))]>;
def DeclareRetScalarInst :
NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
".param .b$size retval$num;",
[(DeclareRet (i32 1), (i32 imm:$size), (i32 imm:$num))]>;
def DeclareRetRegInst :
NVPTXInst<(outs), (ins i32imm:$size, i32imm:$num),
".reg .b$size retval$num;",
[(DeclareRet (i32 2), (i32 imm:$size), (i32 imm:$num))]>;
def DeclareParamInst :
NVPTXInst<(outs), (ins i32imm:$align, i32imm:$a, i32imm:$size),
".param .align $align .b8 param$a[$size];",
[(DeclareParam (i32 imm:$align), (i32 imm:$a), (i32 imm:$size))]>;
def DeclareScalarParamInst :
NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
".param .b$size param$a;",
[(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 0))]>;
def DeclareScalarRegInst :
NVPTXInst<(outs), (ins i32imm:$a, i32imm:$size),
".reg .b$size param$a;",
[(DeclareScalarParam (i32 imm:$a), (i32 imm:$size), (i32 1))]>;
class MoveParamInst<NVPTXRegClass regclass, string asmstr> :
NVPTXInst<(outs regclass:$dst), (ins regclass:$src),
!strconcat([-!strconcat(-]"mov", asmstr[-)-], "\t$dst, $src;"),
[(set regclass:$dst, (MoveParam regclass:$src))]>;
def MoveParamI64 : MoveParamInst<Int64Regs, ".b64">;
def MoveParamI32 : MoveParamInst<Int32Regs, ".b32">;
def MoveParamI16 :
NVPTXInst<(outs Int16Regs:$dst), (ins Int16Regs:$src),
"cvt.u16.u32\t$dst, $src;",
[(set Int16Regs:$dst, (MoveParam Int16Regs:$src))]>;
def MoveParamF64 : MoveParamInst<Float64Regs, ".f64">;
def MoveParamF32 : MoveParamInst<Float32Regs, ".f32">;
class PseudoUseParamInst<NVPTXRegClass regclass> :
NVPTXInst<(outs), (ins regclass:$src),
"// Pseudo use of $src",
[(PseudoUseParam regclass:$src)]>;
def PseudoUseParamI64 : PseudoUseParamInst<Int64Regs>;
def PseudoUseParamI32 : PseudoUseParamInst<Int32Regs>;
@@ -2066,254 +2047,278 @@ def PseudoUseParamF32 : PseudoUseParamInst<Float32Regs>;
// Load / Store Handling
//
multiclass LD<NVPTXRegClass regclass> {
def _avar : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr),[-!strconcat(-]
"ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t$dst, [$addr];"[-)-], []>;
def _areg : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr),[-!strconcat(-]
"ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t$dst, [$addr];"[-)-], []>;
def _areg_64 : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr),[-!strconcat(-]
"ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "[-,-]
"\t$dst, [$addr];"[-)-], []>;
def _ari : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),[-!strconcat(-]
"ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t$dst, [$addr+$offset];"[-)-], []>;
def _ari_64 : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),[-!strconcat(-]
"ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$fromWidth "[-,-]
"\t$dst, [$addr+$offset];"[-)-], []>;
def _asi : NVPTXInst<
(outs regclass:$dst),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$fromWidth, imem:$addr, i32imm:$offset),[-!strconcat(-]
"ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t$dst, [$addr+$offset];"[-)-], []>;
}
let mayLoad=1, hasSideEffects=0 in {
defm LD_i8 : LD<Int16Regs>;
defm LD_i16 : LD<Int16Regs>;
defm LD_i32 : LD<Int32Regs>;
defm LD_i64 : LD<Int64Regs>;
defm LD_f32 : LD<Float32Regs>;
defm LD_f64 : LD<Float64Regs>;
}
multiclass ST<NVPTXRegClass regclass> {
def _avar : NVPTXInst<
(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, imem:$addr),
[-!-]{+"st${isVol:volatile}${add+}s{+p:addsp}${Vec:vec}.${Sign:sign}$toWid+}t{+h"+}
{+ " \t[$addr], $s+}rc{+;", []>;+}
{+ def _areg : NVPTXInst<+}
{+ (+}o{+uts),+}
{+ (i+}n{+s reg+}c{+l+}a{+ss:$src, LdS+}t[-(-]{+Code:$isVol, LdStCode:$addsp,+}
{+ LdStCode:$Vec, LdStCode:$Sign, i32imm:$toWidth, Int32Regs:$addr),+}
"st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"[-,-]
" \t[$addr], $src;"[-)-], []>;
def _areg{+_64+} : NVPTXInst<
(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, Int[-32-]{+64+}Regs:$addr),[-!strconcat(-]
"st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"[-,-]
" \t[$addr], $src;"[-)-], []>;
def _ar[-eg_64-]{+i+} : NVPTXInst<
(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, Int[-64-]{+32+}Regs:$addr[-)-], [-!strc-]{+i32imm:$+}o[-nca-]{+ffse+}t[-(-]{+),+}
"st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"[-,-]
" \t[$addr{++$offset+}], $src;"[-)-], []>;
def _ari{+_64+} : NVPTXInst<
(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth, Int[-32-]{+64+}Regs:$addr, i32imm:$offset),[-!strconcat(-]
"st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"[-,-]
" \t[$addr+$offset], $src;"[-)-], []>;
def _a[-r-]{+s+}i[-_64-] : NVPTXInst<
(outs),
(ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,
LdStCode:$Sign, i32imm:$toWidth,[-Int64Regs:$addr, i32imm:$offset),-]
[- !strconcat("st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth ",-]
[- "\t[$addr+$offset], $src;"), []>;-]
[- def _asi : NVPTXInst<(outs),-]
[- (ins regclass:$src, LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec,-]
[- LdStCode:$Sign, i32imm:$toWidth,-] imem:$addr, i32imm:$offset),[-!strconcat(-]
"st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}$toWidth"[-,-]
" \t[$addr+$offset], $src;"[-)-], []>;
}
let mayStore=1, hasSideEffects=0 in {
defm ST_i8 : ST<Int16Regs>;
defm ST_i16 : ST<Int16Regs>;
defm ST_i32 : ST<Int32Regs>;
defm ST_i64 : ST<Int64Regs>;
defm ST_f32 : ST<Float32Regs>;
defm ST_f64 : ST<Float64Regs>;
}
// The following is used only in and after vector elementizations.[-//-] Vector
{+//+} elementization happens at the machine instruction level, so the[-//-] following
{+//+} instruction[-//-]{+s+} never appear[-s-] in the DAG.
multiclass LD_VEC<NVPTXRegClass regclass> {
def _v2_avar : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr),[-!strconcat(-]
"ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t{{$dst1, $dst2}}, [$addr];"[-)-], []>;
def _v2_areg : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr),[-!strconcat(-]
"ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t{{$dst1, $dst2}}, [$addr];"[-)-], []>;
def _v2_areg_64 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr),[-!strconcat(-]
"ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t{{$dst1, $dst2}}, [$addr];"[-)-], []>;
def _v2_ari : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),[-!strconcat(-]
"ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t{{$dst1, $dst2}}, [$addr+$offset];"[-)-], []>;
def _v2_ari_64 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),[-!strconcat(-]
"ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t{{$dst1, $dst2}}, [$addr+$offset];"[-)-], []>;
def _v2_asi : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr, i32imm:$offset),[-!strconcat(-]
"ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t{{$dst1, $dst2}}, [$addr+$offset];"[-)-], []>;
def _v4_avar : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr),[-!strconcat(-]
"ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"[-)-], []>;
def _v4_areg : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr),[-!strconcat(-]
"ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"[-)-], []>;
def _v4_areg_64 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr),[-!strconcat(-]
"ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr];"[-)-], []>;
def _v4_ari : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),[-!strconcat(-]
"ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"[-)-], []>;
def _v4_ari_64 : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),[-!strconcat(-]
"ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"[-)-], []>;
def _v4_asi : NVPTXInst<
(outs regclass:$dst1, regclass:$dst2, regclass:$dst3, regclass:$dst4),
(ins LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr, i32imm:$offset),[-!strconcat(-]
"ld${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t{{$dst1, $dst2, $dst3, $dst4}}, [$addr+$offset];"[-)-], []>;
}
let mayLoad=1, hasSideEffects=0 in {
defm LDV_i8 : LD_VEC<Int16Regs>;
defm LDV_i16 : LD_VEC<Int16Regs>;
defm LDV_i32 : LD_VEC<Int32Regs>;
defm LDV_i64 : LD_VEC<Int64Regs>;
defm LDV_f32 : LD_VEC<Float32Regs>;
defm LDV_f64 : LD_VEC<Float64Regs>;
}
multiclass ST_VEC<NVPTXRegClass regclass> {
def _v2_avar : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr),[-!strconcat(-]
"st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t[$addr], {{$src1, $src2}};"[-)-], []>;
def _v2_areg : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr),[-!strconcat(-]
"st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t[$addr], {{$src1, $src2}};"[-)-], []>;
def _v2_areg_64 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr),[-!strconcat(-]
"st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t[$addr], {{$src1, $src2}};"[-)-], []>;
def _v2_ari : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int32Regs:$addr,
i32imm:$offset),[-!strconcat(-]
"st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t[$addr+$offset], {{$src1, $src2}};"[-)-], []>;
def _v2_ari_64 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, Int64Regs:$addr,
i32imm:$offset),[-!strconcat(-]
"st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t[$addr+$offset], {{$src1, $src2}};"[-)-], []>;
def _v2_asi : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, LdStCode:$isVol, LdStCode:$addsp,
LdStCode:$Vec, LdStCode:$Sign, i32imm:$fromWidth, imem:$addr,
i32imm:$offset),[-!strconcat(-]
"st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t[$addr+$offset], {{$src1, $src2}};"[-)-], []>;
def _v4_avar : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr),[-!strconcat(-]
"st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t[$addr], {{$src1, $src2, $src3, $src4}};"[-)-], []>;
def _v4_areg : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr),[-!strconcat(-]
"st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t[$addr], {{$src1, $src2, $src3, $src4}};"[-)-], []>;
def _v4_areg_64 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr),[-!strconcat(-]
"st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t[$addr], {{$src1, $src2, $src3, $src4}};"[-)-], []>;
def _v4_ari : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int32Regs:$addr, i32imm:$offset),[-!strconcat(-]
"st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"[-)-], []>;
def _v4_ari_64 : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, Int64Regs:$addr, i32imm:$offset),[-!strconcat(-]
"st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}[-",-]
[- "-]$fromWidth {+"+}
{+ "+}\t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"[-)-], []>;
def _v4_asi : NVPTXInst<
(outs),
(ins regclass:$src1, regclass:$src2, regclass:$src3, regclass:$src4,
LdStCode:$isVol, LdStCode:$addsp, LdStCode:$Vec, LdStCode:$Sign,
i32imm:$fromWidth, imem:$addr, i32imm:$offset),[-!strconcat(-]
"st${isVol:volatile}${addsp:addsp}${Vec:vec}.${Sign:sign}"[-,-]
"$fromWidth \t[$addr+$offset], {{$src1, $src2, $src3, $src4}};"[-)-], []>;
}
let mayStore=1, hasSideEffects=0 in {
defm STV_i8 : ST_VEC<Int16Regs>;
defm STV_i16 : ST_VEC<Int16Regs>;
defm STV_i32 : ST_VEC<Int32Regs>;
defm STV_i64 : ST_VEC<Int64Regs>;
defm STV_f32 : ST_VEC<Float32Regs>;
defm STV_f64 : ST_VEC<Float64Regs>;
}
@@ -2529,60 +2534,47 @@ def : Pat<(select Int32Regs:$pred, Float64Regs:$a, Float64Regs:$b),
// pack a set of smaller int registers to a larger int register
def V4I16toI64 : NVPTXInst<(outs Int64Regs:$d),
(ins Int16Regs:$s1, Int16Regs:$s2,
Int16Regs:$s3, Int16Regs:$s4),
"mov.b64\t$d, {{$s1, $s2, $s3, $s4}};", []>;
def V2I16toI32 : NVPTXInst<(outs Int32Regs:$d),
(ins Int16Regs:$s1, Int16Regs:$s2),
"mov.b32\t$d, {{$s1, $s2}};", []>;
def V2I32toI64 : NVPTXInst<(outs Int64Regs:$d),
(ins Int32Regs:$s1, Int32Regs:$s2),
"mov.b64\t$d, {{$s1, $s2}};", []>;
def V2F32toF64 : NVPTXInst<(outs Float64Regs:$d),
(ins Float32Regs:$s1, Float32Regs:$s2),
"mov.b64\t$d, {{$s1, $s2}};", []>;
// unpack a larger int register to a set of smaller int registers
def I64toV4I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2,
Int16Regs:$d3, Int16Regs:$d4),
(ins Int64Regs:$s),
"mov.b64\t{{$d1, $d2, $d3, $d4}}, $s;", []>;
def I32toV2I16 : NVPTXInst<(outs Int16Regs:$d1, Int16Regs:$d2),
(ins Int32Regs:$s),
"mov.b32\t{{$d1, $d2}}, $s;", []>;
def I64toV2I32 : NVPTXInst<(outs Int32Regs:$d1, Int32Regs:$d2),
(ins Int64Regs:$s),
"mov.b64\t{{$d1, $d2}}, $s;", []>;
def F64toV2F32 : NVPTXInst<(outs Float32Regs:$d1, Float32Regs:$d2),
(ins Float64Regs:$s),
"mov.b64\t{{$d1, $d2}}, $s;", []>;
// Count leading zeros
def CLZr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
"clz.b32\t$d, $a;", []>;
def CLZr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"clz.b64\t$d, $a;", []>;
// 32-bit has a direct PTX instruction
def : Pat<(ctlz Int32Regs:$a), (CLZr32 Int32Regs:$a)>;
def : Pat<(ctlz_zero_undef Int32Regs:$a), (CLZr32 Int32Regs:$a)>;
// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
// to 64-bit to match the LLVM semantics
def : Pat<(ctlz Int64Regs:$a), (CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
def : Pat<(ctlz_zero_undef Int64Regs:$a),
(CVT_u64_u32 (CLZr64 Int64Regs:$a), CvtNONE)>;
@@ -2601,27 +2593,22 @@ def : Pat<(ctlz_zero_undef Int16Regs:$a),
// Population count
def POPCr32 : NVPTXInst<(outs Int32Regs:$d), (ins Int32Regs:$a),
"popc.b32\t$d, $a;", []>;
def POPCr64 : NVPTXInst<(outs Int32Regs:$d), (ins Int64Regs:$a),
"popc.b64\t$d, $a;", []>;
// 32-bit has a direct PTX instruction
def : Pat<(ctpop Int32Regs:$a), (POPCr32 Int32Regs:$a)>;
// For 64-bit, the result in PTX is actually 32-bit so we zero-extend
// to 64-bit to match the LLVM semantics
def : Pat<(ctpop Int64Regs:$a), (CVT_u64_u32 (POPCr64 Int64Regs:$a), CvtNONE)>;
// For 16-bit, we zero-extend to 32-bit, then trunc the result back
// to 16-bits (ctpop of a 16-bit value is guaranteed to require less
// than 16 bits to store)
def : Pat<(ctpop Int16Regs:$a),
(CVT_u16_u32 (POPCr32 (CVT_u32_u16 Int16Regs:$a, CvtNONE)), CvtNONE)>;
// fround f64 -> f32
def : Pat<(f32 (fround Float64Regs:$a)),
@@ -2635,8 +2622,8 @@ def : Pat<(f64 (fextend Float32Regs:$a)),
def : Pat<(f64 (fextend Float32Regs:$a)),
(CVT_f64_f32 Float32Regs:$a, CvtNONE)>;
def retflag : SDNode<"NVPTXISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue]>;
//-----------------------------------
// Control-flow
@@ -2648,56 +2635,48 @@ let isTerminator=1 in {
let isBranch=1 in
def CBranch : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
"@$a bra \t$target;",
[(brcond Int1Regs:$a, bb:$target)]>;
let isBranch=1 in
def CBranchOther : NVPTXInst<(outs), (ins Int1Regs:$a, brtarget:$target),
"@!$a bra \t$target;", []>;
let isBranch=1, isBarrier=1 in
def GOTO : NVPTXInst<(outs), (ins brtarget:$target),
"bra.uni \t$target;", [(br bb:$target)]>;
}
def : Pat<(brcond Int32Regs:$a, bb:$target),
(CBranch (SETP_u32ri Int32Regs:$a, 0, CmpNE), bb:$target)>;
// SelectionDAGBuilder::visitSWitchCase() will invert the condition of a
// conditional branch if[-//-] the target block is the next block so that the code
{+//+} can fall through to the[-//-] target block.[-//-] The invertion is done by 'xor
{+//+} condition, 1', which will be translated to[-//-] (setne condition, -1).[-//-] Since ptx
{+//+} supports '@!pred bra target', we should use it.
def : Pat<(brcond (i1 (setne Int1Regs:$a, -1)), bb:$target),
(CBranchOther Int1Regs:$a, bb:$target)>;
// Call
def SDT_NVPTXCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
def SDT_NVPTXCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_NVPTXCallSeqStart,
[SDNPHasChain, SDNPOutGlue, SDNPSideEffect]>;
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_NVPTXCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
SDNPSideEffect]>;
def SDT_NVPTXCall : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
def call : SDNode<"NVPTXISD::CALL", SDT_NVPTXCall,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
def calltarget : Operand<i32>;
let isCall=1 in {
def CALL : NVPTXInst<(outs), (ins calltarget:$dst), "call \t$dst, (1);", []>;
}
def : Pat<(call tglobaladdr:$dst), (CALL tglobaladdr:$dst)>;
def : Pat<(call texternalsym:$dst), (CALL texternalsym:$dst)>;
// Pseudo instructions.
class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
@@ -2705,31 +2684,34 @@ class Pseudo<dag outs, dag ins, string asmstr, list<dag> pattern>
// @TODO: We use some tricks here to emit curly braces. Can we clean this up
// a bit without TableGen modifications?
def Callseq_Start :
NVPTXInst<(outs), (ins i32imm:$amt),
"// Callseq Start $amt\n{+"+}
{+ "+}\t{{\n{+"+}
{+ "+}\t.reg .b32 temp_param_reg;\n{+"+}
{+ "+}\t// <end>}}",
[(callseq_start timm:$amt)]>;
def Callseq_End :
NVPTXInst<(outs), (ins i32imm:$amt1, i32imm:$amt2),
"\n{+"+}
{+ "+}\t//{{\n{+"+}
{+ "+}\t}}// Callseq End $amt1",
[(callseq_end timm:$amt1, timm:$amt2)]>;
// trap instruction
def trapinst : NVPTXInst<(outs), (ins), "trap;", [(trap)]>;
// Call prototype wrapper
def SDTCallPrototype : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
def CallPrototype :
SDNode<"NVPTXISD::CallPrototype", SDTCallPrototype,
[SDNPHasChain, SDNPOutGlue, SDNPInGlue, SDNPSideEffect]>;
def ProtoIdent : Operand<i32> {
let PrintMethod = "printProtoIdent";
}
def CALL_PROTOTYPE :
NVPTXInst<(outs), (ins ProtoIdent:$ident),
"$ident", [(CallPrototype (i32 texternalsym:$ident))]>;
include "NVPTXIntrinsics.td"
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment