//
// Generated by Microsoft (R) HLSL Shader Compiler 10.1
//
//
// Buffer Definitions:
//
// cbuffer CB_1
// {
//
// uint _const_buffer_1[41]; // Offset: 0 Size: 644
//
// }
//
//
// Resource Bindings:
//
// Name Type Format Dim HLSL Bind Count
// ------------------------------ ---------- ------- ----------- -------------- ------
// _GV_buffer_rw_0 UAV byte r/w u0 1
// _GV_buffer_rw_1 UAV byte r/w u1 1
// _GV_buffer_rw_2 UAV byte r/w u2 1
// CB_1 cbuffer NA NA cb1 1
//
//
//
// Input signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// no Input
//
// Output signature:
//
// Name Index Mask Register SysValue Format Used
// -------------------- ----- ------ -------- -------- ------- ------
// no Output
cs_5_0
dcl_globalFlags refactoringAllowed
dcl_constantbuffer CB1[37], immediateIndexed
dcl_uav_raw u0
dcl_uav_raw u1
dcl_uav_raw u2
dcl_input vThreadID.xyz
dcl_temps 3
dcl_indexableTemp x0[3], 4
dcl_thread_group 32, 8, 1
imul [precise(x)] null, r0.x, vThreadID.z, cb1[0].x
imad [precise(x)] r0.x, vThreadID.y, cb1[1].x, r0.x
iadd [precise(x)] r0.x, r0.x, vThreadID.x
ult [precise(y)] r0.y, r0.x, cb1[2].x
if_nz r0.y
udiv [precise(x)] r0.x, r1.x, r0.x, cb1[4].x //r0.x=n/3,r1.x=n%3
mov [precise(x)] x0[0].x, l(-257992108802048.000000) //0xD76AA478
mov [precise(x)] x0[1].x, l(-7545063049677084100000000.000000) //0xE8C7B756
mov [precise(x)] x0[2].x, l(0.000000) //0x242070DB,这几个常数貌似有问题,用https://github.com/James-Jones/HLSLCrossCompiler里的工具反编译对比后修正一下
iadd [precise(y)] r0.y, r1.x, cb1[11].x //r
iadd [precise(y)] r0.y, r0.y, cb1[14].x //r0.y=r
iadd [precise(z)] r0.z, r0.x, cb1[24].x //d
iadd [precise(z)] r0.z, r0.z, cb1[27].x //r0.z=d
mov [precise(w)] r0.w, l(0) //
mov [precise(y)] r1.y, l(32) //
mov [precise(zw)] r1.zw, r0.yyyz //r1.z=r,r1.w=d
mov [precise(x)] r2.x, l(-1) //
loop
breakc_z r2.x
ishl [precise(yz)] r2.yz, r1.wwzw, l(0, 2, 2, 0) //r2.y=4d,r2.z=4r
ld_raw_indexable [precise(y)](raw_buffer)(mixed,mixed,mixed,mixed) r2.y, r2.y, u1.xxxx //r2.y=u1[d]
ld_raw_indexable [precise(z)](raw_buffer)(mixed,mixed,mixed,mixed) r2.z, r2.z, u0.xxxx //r2.z=u0[r]
imad [precise(w)] r0.w, r2.y, r2.z, r0.w //r0.w+=u1[d] * u0[r]
iadd [precise(w)] r1.w, r1.w, cb1[20].x //r1.w += 0x20
iadd [precise(z)] r1.z, r1.z, cb1[7].x //r1.z += 3
iadd [precise(y)] r1.y, r1.y, l(-1) //r1.y--
ult [precise(x)] r2.x, l(0), r1.y //
endloop
ishl [precise(y)] r0.y, l(1), r0.x //r0.y=1<<d
mov [precise(z)] r0.z, x0[r1.x + 0].x //r0.z=k[r]
and [precise(y)] r0.y, r0.y, r0.z //r0.y &= k[r]
bfi [precise(x)] r0.x, l(1), r0.x, r0.w, l(0) //
xor [precise(x)] r0.x, r0.x, r0.y //
iadd [precise(y)] r0.y, r1.x, cb1[34].x //
iadd [precise(y)] r0.y, r0.y, cb1[36].x //
ishl [precise(y)] r0.y, r0.y, l(2) //
imm_atomic_xor [precise(x)] r0.x, u2, r0.y, r0.x //
endif
ret
// Approximately 39 instruction slots used