mirror of
https://github.com/xmrig/xmrig.git
synced 2025-12-25 05:40:39 -05:00
Optimizations for AMD Bulldozer
- Added support for XOP instructions - Enabled Ryzen code for Bulldozer because it's faster there too
This commit is contained in:
24
src/crypto/randomx/asm/program_loop_load_xop.inc
Normal file
24
src/crypto/randomx/asm/program_loop_load_xop.inc
Normal file
@@ -0,0 +1,24 @@
|
||||
lea rcx, [rsi+rax]
|
||||
mov [rsp+8], rcx
|
||||
xor r8, qword ptr [rcx+0]
|
||||
xor r9, qword ptr [rcx+8]
|
||||
xor r10, qword ptr [rcx+16]
|
||||
xor r11, qword ptr [rcx+24]
|
||||
xor r12, qword ptr [rcx+32]
|
||||
xor r13, qword ptr [rcx+40]
|
||||
xor r14, qword ptr [rcx+48]
|
||||
xor r15, qword ptr [rcx+56]
|
||||
lea rcx, [rsi+rdx]
|
||||
mov [rsp+16], rcx
|
||||
cvtdq2pd xmm0, qword ptr [rcx+0]
|
||||
cvtdq2pd xmm1, qword ptr [rcx+8]
|
||||
cvtdq2pd xmm2, qword ptr [rcx+16]
|
||||
cvtdq2pd xmm3, qword ptr [rcx+24]
|
||||
cvtdq2pd xmm4, qword ptr [rcx+32]
|
||||
cvtdq2pd xmm5, qword ptr [rcx+40]
|
||||
cvtdq2pd xmm6, qword ptr [rcx+48]
|
||||
cvtdq2pd xmm7, qword ptr [rcx+56]
|
||||
vpcmov xmm4, xmm4, xmm14, xmm13
|
||||
vpcmov xmm5, xmm5, xmm14, xmm13
|
||||
vpcmov xmm6, xmm6, xmm14, xmm13
|
||||
vpcmov xmm7, xmm7, xmm14, xmm13
|
||||
@@ -1,5 +1,5 @@
|
||||
mantissaMask:
|
||||
db 255, 255, 255, 255, 255, 255, 255, 0, 255, 255, 255, 255, 255, 255, 255, 0
|
||||
db 0, 0, 192, 255, 255, 255, 255, 0, 0, 0, 192, 255, 255, 255, 255, 0
|
||||
exp240:
|
||||
db 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
|
||||
scaleMask:
|
||||
|
||||
Reference in New Issue
Block a user