Skip to content

Instantly share code, notes, and snippets.

@msaroufim
Created March 2, 2024 20:52
Show Gist options
  • Save msaroufim/46b16075a27053e00cf9d47ca7398648 to your computer and use it in GitHub Desktop.
Save msaroufim/46b16075a27053e00cf9d47ca7398648 to your computer and use it in GitHub Desktop.
# Compilation provided by Compiler Explorer at https://godbolt.org/
__nv_save_fatbinhandle_for_managed_rt(void**):
pushq %rbp
movq %rsp, %rbp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, __nv_fatbinhandle_for_managed_rt(%rip)
nop
popq %rbp
ret
fatbinData:
.quad 0x00100001ba55ed50,0x0000000000001338,0x0000005001010002,0x0000000000001008
.quad 0x0000000000000000,0x0000003400010007,0x0000000f00000040,0x0000000000000011
.quad 0x0000000000000000,0x0000000000000000,0x6178652f7070612f,0x0075632e656c706d
.quad 0x33010102464c457f,0x0000000000000007,0x0000007b00be0002,0x0000000000000000
.quad 0x0000000000000f60,0x0000000000000be0,0x0038004000340534,0x0001000e00400003
.quad 0x7472747368732e00,0x747274732e006261,0x746d79732e006261,0x746d79732e006261
.quad 0x78646e68735f6261,0x7466752e766e2e00,0x2e007972746e652e,0x006f666e692e766e
.quad 0x5a5f2e747865742e,0x7365636f72703632,0x6957796172724173,0x6772657669446874
.quad 0x0069695065636e65,0x6f666e692e766e2e,0x6f727036325a5f2e,0x6172724173736563
.quad 0x7669446874695779,0x5065636e65677265,0x732e766e2e006969,0x5a5f2e6465726168
.quad 0x7365636f72703632,0x6957796172724173,0x6772657669446874,0x0069695065636e65
.quad 0x736e6f632e766e2e,0x5a5f2e30746e6174,0x7365636f72703632,0x6957796172724173
.quad 0x6772657669446874,0x0069695065636e65,0x6c5f67756265642e,0x6c65722e00656e69
.quad 0x6c5f67756265642e,0x5f766e2e00656e69,0x696c5f6775626564,0x00737361735f656e
.quad 0x5f766e2e6c65722e,0x696c5f6775626564,0x00737361735f656e,0x756265645f766e2e
.quad 0x78745f7874705f67,0x65722e766e2e0074,0x6e6f697463612e6c,0x72747368732e0000
.quad 0x7274732e00626174,0x6d79732e00626174,0x6d79732e00626174,0x646e68735f626174
.quad 0x66752e766e2e0078,0x007972746e652e74,0x6f666e692e766e2e,0x6f727036325a5f00
.quad 0x6172724173736563,0x7669446874695779,0x5065636e65677265,0x747865742e006969
.quad 0x6f727036325a5f2e,0x6172724173736563,0x7669446874695779,0x5065636e65677265
.quad 0x692e766e2e006969,0x36325a5f2e6f666e,0x41737365636f7270,0x6874695779617272
.quad 0x6e65677265766944,0x6e2e006969506563,0x6465726168732e76,0x6f727036325a5f2e
.quad 0x6172724173736563,0x7669446874695779,0x5065636e65677265,0x632e766e2e006969
.quad 0x30746e6174736e6f,0x6f727036325a5f2e,0x6172724173736563,0x7669446874695779
.quad 0x5065636e65677265,0x617261705f006969,0x67756265642e006d,0x722e00656e696c5f
.quad 0x67756265642e6c65,0x6e2e00656e696c5f,0x5f67756265645f76,0x7361735f656e696c
.quad 0x6e2e6c65722e0073,0x5f67756265645f76,0x7361735f656e696c,0x65645f766e2e0073
.quad 0x5f7874705f677562,0x2e766e2e00747874,0x697463612e6c6572,0x0000000000006e6f
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x000d000300000062
.quad 0x0000000000000000,0x0000000000000000,0x000c0003000000e2,0x0000000000000000
.quad 0x0000000000000000,0x0004000300000119,0x0000000000000000,0x0000000000000000
.quad 0x0005000300000135,0x0000000000000000,0x0000000000000000,0x0006000300000161
.quad 0x0000000000000000,0x0000000000000000,0x0009000300000173,0x0000000000000000
.quad 0x0000000000000000,0x000d101200000040,0x0000000000000000,0x0000000000000140
.quad 0x0028000200000062,0x000a0efb01010000,0x0100000001010101,0x786500007070612f
.quad 0x75632e656c706d61,0xa206af8ea3860100,0x0000000209000002,0x0301040000000000
.quad 0x0138020103f00100,0x0203030120020103,0x01280203030100c0,0x0101030108027a03
.quad 0x005c01010000e802,0x0000001000020000,0x0101000a0efb0101,0x0000010000000101
.quad 0x0000000000020900,0x0112030004000000,0x810108020a030802,0xf4f00120020203f0
.quad 0x8080011802010380,0x0403800110020f03,0x0108027103012002,0x30020103f0010403
.quad 0x0000010100280201,0x762e000000000000,0x38206e6f69737265,0x677261742e00332e
.quad 0x32355f6d73207465,0x7365726464612e00,0x3620657a69735f73,0x69762e0000000034
.quad 0x652e20656c626973,0x325a5f207972746e,0x737365636f727036,0x7469577961727241
.quad 0x6567726576694468,0x002869695065636e,0x2e206d617261702e,0x36325a5f20343675
.quad 0x41737365636f7270,0x6874695779617272,0x6e65677265766944,0x61705f6969506563
.quad 0x2e002c305f6d6172,0x752e206d61726170,0x7036325a5f203233,0x7241737365636f72
.quad 0x4468746957796172,0x636e656772657669,0x7261705f69695065,0x7b002900315f6d61
.quad 0x702e206765722e00,0x3c70250920646572,0x6765722e003b3e36,0x2509203233622e20
.quad 0x2e003b3e30313c72,0x3436622e20676572,0x3e353c6472250920,0x2e646c000000003b
.quad 0x36752e6d61726170,0x2c32647225092034,0x727036325a5f5b20,0x727241737365636f
.quad 0x6944687469577961,0x65636e6567726576,0x617261705f696950,0x646c003b5d305f6d
.quad 0x752e6d617261702e,0x2c33722509203233,0x727036325a5f5b20,0x727241737365636f
.quad 0x6944687469577961,0x65636e6567726576,0x617261705f696950,0x6d00003b5d315f6d
.quad 0x09203233752e766f,0x746325202c347225,0x6d003b782e646961,0x09203233752e766f
.quad 0x746e25202c357225,0x6f6d003b782e6469,0x2509203233752e76,0x64697425202c3672
.quad 0x2e64616d003b782e,0x09203233732e6f6c,0x347225202c317225,0x25202c357225202c
.quad 0x74657300003b3672,0x3233732e65672e70,0x25202c3170250920,0x3b337225202c3172
.quad 0x7262203170254000,0x425f5f4c24092061,0x0000003b345f3042,0x2e6f742e61747663
.quad 0x752e6c61626f6c67,0x3364722509203436,0x003b32647225202c,0x6469772e6c756d00
.quad 0x2509203233732e65,0x317225202c346472,0x646461003b34202c,0x722509203436732e
.quad 0x33647225202c3164,0x003b34647225202c,0x61626f6c672e646c,0x2509203233752e6c
.quad 0x6472255b202c3272,0x2e646e61003b5d31,0x7225092020323362,0x202c327225202c37
.quad 0x2e70746573003b31,0x09203233622e7165,0x377225202c327025,0x766f6d003b31202c
.quad 0x250920646572702e,0x78003b30202c3370,0x20646572702e726f,0x25202c3470250920
.quad 0x3b337025202c3270,0x6572702e746f6e00,0x202c357025092064,0x702540003b347025
.quad 0x2409206172622035,0x335f3042425f5f4c,0x6e752e617262003b,0x425f5f4c24092069
.quad 0x2400003b325f3042,0x335f3042425f5f4c,0x622e6c687300003a,0x2c39722509203233
.quad 0x3b31202c32722520,0x626f6c672e747300,0x09203233752e6c61,0x202c5d316472255b
.quad 0x617262003b397225,0x4c240920696e752e,0x3b345f3042425f5f,0x42425f5f4c240000
.quad 0x646100003a325f30,0x2509203233732e64,0x2c327225202c3872,0x672e7473003b3120
.quad 0x33752e6c61626f6c,0x316472255b092032,0x003b387225202c5d,0x3042425f5f4c2400
.quad 0x74657200003a345f,0x000000007d00003b,0x0000000700082f04,0x0008230400000005
.quad 0x0000000000000007,0x0000000700081204,0x0008110400000000,0x0000000000000007
.quad 0x0000007b00043704,0x00002a0100003001,0x0000000200080a04,0x000c1903000c0140
.quad 0x00000000000c1704,0x0011f00000080001,0x00000000000c1704,0x0021f00000000000
.quad 0x00041d0400ff1b03,0x000c1c0400000010,0x000000d000000058,0x0000000000000118
.quad 0x0000000000000073,0x3605002511000000,0x0000000000000035,0x0000000700000002
.quad 0x000000000000001d,0x0000000700000002,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x001cfc00e22007f6,0x4c98078000870001,0xf0c8000002570002,0xf0c8000002170000
.quad 0x001fd842fec20ff1,0x4f107f8000270203,0x4e00000000270200,0x5b30001800370202
.quad 0x001ff400fd4007ed,0x4b6d038005270207,0x50b0000000070f00,0xe30000000000000f
.quad 0x001fc800fec207f1,0x3829000001e70200,0x4c18810005070202,0x4c10080005170003
.quad 0x001fb420fec007b5,0xeed4200000070200,0x0400000000170004,0x366a038000170407
.quad 0x001fbc001e2007e2,0x1c00000000180004,0xeedc200000080204,0x50b0000000070f00
.quad 0x001fc800ffa007e7,0x50b0000000070f00,0xe30000000008000f,0x5c10000000070000
.quad 0x001fbc00fde007f1,0xeedc200000070200,0x50b0000000070f00,0x50b0000000070f00
.quad 0x001ffc00fc2007ef,0x50b0000000070f00,0x50b0000000070f00,0xe30000000007000f
.quad 0x001f8000fc0007ff,0xe2400fffff07000f,0x50b0000000070f00,0x50b0000000070f00
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000000000000,0x0000000000000000,0x0000000000000000,0x0000000000000000
.quad 0x0000000300000001,0x0000000000000000,0x0000000000000000,0x0000000000000040
.quad 0x0000000000000159,0x0000000000000000,0x0000000000000001,0x0000000000000000
.quad 0x000000030000000b,0x0000000000000000,0x0000000000000000,0x0000000000000199
.quad 0x0000000000000182,0x0000000000000000,0x0000000000000001,0x0000000000000000
.quad 0x0000000200000013,0x0000000000000000,0x0000000000000000,0x0000000000000320
.quad 0x00000000000000c0,0x0000000700000002,0x0000000000000008,0x0000000000000018
.quad 0x00000001000000f0,0x0000000000000000,0x0000000000000000,0x00000000000003e0
.quad 0x0000000000000066,0x0000000000000000,0x0000000000000001,0x0000000000000000
.quad 0x000000010000010c,0x0000000000000000,0x0000000000000000,0x0000000000000446
.quad 0x0000000000000060,0x0000000000000000,0x0000000000000001,0x0000000000000000
.quad 0x0000000100000138,0x0000000000000000,0x0000000000000000,0x00000000000004a6
.quad 0x00000000000003e7,0x0000000000000000,0x0000000000000001,0x0000000000000000
.quad 0x7000000000000037,0x0000000000000000,0x0000000000000000,0x0000000000000890
.quad 0x0000000000000030,0x0000000000000003,0x0000000000000004,0x0000000000000000
.quad 0x7000000000000068,0x0000000000000000,0x0000000000000000,0x00000000000008c0
.quad 0x000000000000005c,0x0000000d00000003,0x0000000000000004,0x0000000000000000
.quad 0x7000000b0000014a,0x0000000000000000,0x0000000000000000,0x0000000000000920
.quad 0x0000000000000010,0x0000000000000000,0x0000000000000008,0x0000000000000008
.quad 0x00000009000000fc,0x0000000000000000,0x0000000000000000,0x0000000000000930
.quad 0x0000000000000010,0x0000000400000003,0x0000000000000008,0x0000000000000010
.quad 0x0000000900000120,0x0000000000000000,0x0000000000000000,0x0000000000000940
.quad 0x0000000000000010,0x0000000500000003,0x0000000000000008,0x0000000000000010
.quad 0x00000001000000c0,0x0000000000000002,0x0000000000000000,0x0000000000000950
.quad 0x000000000000014c,0x0000000d00000000,0x0000000000000004,0x0000000000000000
.quad 0x0000000100000040,0x0000000000000006,0x0000000000000000,0x0000000000000aa0
.quad 0x0000000000000140,0x0500000700000003,0x0000000000000020,0x0000000000000000
.quad 0x0000000500000006,0x0000000000000f60,0x0000000000000000,0x0000000000000000
.quad 0x00000000000000a8,0x00000000000000a8,0x0000000000000008,0x0000000500000001
.quad 0x0000000000000950,0x0000000000000000,0x0000000000000000,0x0000000000000290
.quad 0x0000000000000290,0x0000000000000008,0x0000000500000001,0x0000000000000f60
.quad 0x0000000000000000,0x0000000000000000,0x00000000000000a8,0x00000000000000a8
.quad 0x0000000000000008,0x0000007001010001,0x0000000000000270,0x000000400000026d
.quad 0x0000003400080003,0x0000000f00000060,0x0000000000002011,0x0000000000000000
.quad 0x0000000000000439,0x0000001600000048,0x72656e65672d2d20,0x656e696c2d657461
.quad 0x0000206f666e692d,0x6178652f7070612f,0x0075632e656c706d,0x762e1cf200010a13
.quad 0x38206e6f69737265,0x677261742e0a332e,0x32355f6d73207465,0x7365726464612e0a
.quad 0x3620657a69735f73,0x6973692dff002f34,0x746e652e20656c62,0x7036325a5f207972
.quad 0x7241737365636f72,0x4468746957796172,0x636e656772657669,0x702e0a2869695065
.quad 0x36752e206d617261,0x002d5f110f002f34,0x332f00372c305f36,0x0a3108f316003732
.quad 0x6765722e0a7b0a29,0x2520646572702e20,0x9600123b3e363c70,0x313c722520323362
.quad 0x2520343680001230,0x6ce20012353c6472,0x302031203109636f,0x22007f646c0a0a0a
.quad 0x202c324f0022752e,0x403b5d271600bc5b,0x003f331f00740100,0x3273008c5d312518
.quad 0x0045766f6d0a3520,0x61746325202c34c7,0x6c00173b782e6469,0x0016746e25202c35
.quad 0x71001525202c3644,0x18732e6f6c2e6461,0x3801004a2c312300,0x33b2006c36722500
.quad 0x2e707465730a3520,0x2c317034002b6567,0x400a3b3308f80030,0x2061726220317025
.quad 0x5f3042425f5f4c24,0x7663e400a70a3b34,0x6c672e6f742e6174,0x332101376c61626f
.quad 0x34b3006505013d2c,0x772e6c756d0a3920,0x3464723300666469,0x64610a3b34820067
.quad 0x2c3126003a732e64,0x5a04015034110040,0x0001900000360100,0x6e610a3b5d630026
.quad 0x001a2c372201da64,0x71652200ca3b3122,0x001e2c3270320019,0x94021a0301223112
.quad 0x6f780a3b30202c33,0x002c2c3423001172,0x746f6e0a3b337074,0x347021012a000018
.quad 0x33a5010435190104,0x6e752e6172620a3b,0x0a0a3b3245001369,0x358300f83a14001f
.quad 0xa96c68730a333120,0xd9741800a9391700,0x1e001e0000d40200,0x15005b3417005b39
.quad 0x01005b3710005b32,0x5b381f014f010139,0x3415004838170f00,0x312030311af00048
.quad 0x7d0a0a3b7465720a,0x09656c69662e0a0a,0x2f7070612f222031,0x2e656c706d617865
.quad 0x000000000a227563
__fatDeviceText:
.long 1180844977
.long 1
.quad fatbinData
.quad 0
__device_stub__Z26processArrayWithDivergencePii(int*, int):
pushq %rbp
movq %rsp, %rbp
subq $96, %rsp
movq %rdi, -88(%rbp)
movl %esi, -92(%rbp)
movl $0, -4(%rbp)
movl -4(%rbp), %eax
cltq
leaq -88(%rbp), %rdx
movq %rdx, -32(%rbp,%rax,8)
addl $1, -4(%rbp)
movl -4(%rbp), %eax
cltq
leaq -92(%rbp), %rdx
movq %rdx, -32(%rbp,%rax,8)
addl $1, -4(%rbp)
movq $processArrayWithDivergence(int*, int), __device_stub__Z26processArrayWithDivergencePii(int*, int)::__f(%rip)
movl $1, -44(%rbp)
movl $1, -40(%rbp)
movl $1, -36(%rbp)
movl $1, -56(%rbp)
movl $1, -52(%rbp)
movl $1, -48(%rbp)
leaq -72(%rbp), %rcx
leaq -64(%rbp), %rdx
leaq -56(%rbp), %rsi
leaq -44(%rbp), %rax
movq %rax, %rdi
call __cudaPopCallConfiguration
testl %eax, %eax
setne %al
testb %al, %al
jne .L6
cmpl $0, -4(%rbp)
jne .L9
movq -72(%rbp), %rdi
movq -64(%rbp), %rsi
leaq -32(%rbp), %rdx
movl -4(%rbp), %eax
cltq
salq $3, %rax
leaq (%rdx,%rax), %r9
movq -56(%rbp), %rcx
movl -48(%rbp), %r8d
movq -44(%rbp), %rdx
movl -36(%rbp), %eax
pushq %rdi
pushq %rsi
movq %rdx, %rsi
movl %eax, %edx
movl $processArrayWithDivergence(int*, int), %edi
call cudaError cudaLaunchKernel<char>(char const*, dim3, dim3, void**, unsigned long, CUstream_st*)
addq $16, %rsp
jmp .L6
.L9:
movq -72(%rbp), %rdi
movq -64(%rbp), %rsi
leaq -32(%rbp), %r9
movq -56(%rbp), %rcx
movl -48(%rbp), %r8d
movq -44(%rbp), %rdx
movl -36(%rbp), %eax
pushq %rdi
pushq %rsi
movq %rdx, %rsi
movl %eax, %edx
movl $processArrayWithDivergence(int*, int), %edi
call cudaError cudaLaunchKernel<char>(char const*, dim3, dim3, void**, unsigned long, CUstream_st*)
addq $16, %rsp
.L6:
leave
ret
processArrayWithDivergence(int*, int):
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
movq %rdi, -8(%rbp)
movl %esi, -12(%rbp)
movl -12(%rbp), %edx
movq -8(%rbp), %rax
movl %edx, %esi
movq %rax, %rdi
call __device_stub__Z26processArrayWithDivergencePii(int*, int)
nop
leave
ret
.LC0:
.string "processArrayWithDivergence(int*, int)"
__nv_cudaEntityRegisterCallback(void**):
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
movq %rdi, -8(%rbp)
movq -8(%rbp), %rax
movq %rax, __nv_cudaEntityRegisterCallback(void**)::__ref(%rip)
movq -8(%rbp), %rax
movq %rax, %rdi
call __nv_save_fatbinhandle_for_managed_rt(void**)
movq -8(%rbp), %rax
pushq $0
pushq $0
pushq $0
pushq $0
movl $0, %r9d
movl $-1, %r8d
movl $.LC0, %ecx
movl $.LC0, %edx
movl $processArrayWithDivergence(int*, int), %esi
movq %rax, %rdi
call __cudaRegisterFunction
addq $32, %rsp
nop
leave
ret
__sti____cudaRegisterAll():
pushq %rbp
movq %rsp, %rbp
subq $16, %rsp
movl $__fatDeviceText, %edi
call __cudaRegisterFatBinary
movq %rax, __cudaFatCubinHandle(%rip)
movq $__nv_cudaEntityRegisterCallback(void**), -8(%rbp)
movq __cudaFatCubinHandle(%rip), %rax
movq -8(%rbp), %rdx
movq %rax, %rdi
call *%rdx
movq __cudaFatCubinHandle(%rip), %rax
movq %rax, %rdi
call __cudaRegisterFatBinaryEnd
movl $_ZL26__cudaUnregisterBinaryUtilv, %edi
call atexit
nop
leave
ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment