Disappointing math performance compared to GDC
Gabor Mezo via digitalmars-d-ldc
digitalmars-d-ldc at puremagic.com
Wed Oct 8 09:23:18 PDT 2014
On Wednesday, 8 October 2014 at 16:02:17 UTC, Trass3r wrote:
> Just check it with '-output-ll' or '-output-s
> -x86-asm-syntax=intel' ;)
I'm not an ASM expert, but as far as I can see it indeed use some
SIMD registers and instructions. For examlple:
.LBB0_16:
mov rcx, qword ptr [rax]
mov rdi, rax
call qword ptr [rcx + 56]
test rax, rax
jne .LBB0_18
movss xmm1, dword ptr [rsp + 116]
jmp .LBB0_20
.align 16, 0x90
.LBB0_18:
mov rcx, rbx
imul rcx, rax
add r12, rcx
movss xmm1, dword ptr [rsp + 116]
.align 16, 0x90
.LBB0_19:
movss xmm0, dword ptr [rdx]
mulss xmm0, dword ptr [r12]
addss xmm1, xmm0
add rdx, 4
add r12, 4
dec rax
jne .LBB0_19
.LBB0_20:
movss dword ptr [rsp + 116], xmm1
inc r14
cmp r14, r15
jne .LBB0_12
.LBB0_21:
mov rax, qword ptr [rsp + 80]
mov rdi, qword ptr [rax]
mov rax, qword ptr [rdi]
call qword ptr [rax + 40]
test eax, eax
mov rbp, qword ptr [rsp + 104]
jne .LBB0_24
movss xmm0, dword ptr [rsp + 92]
movss xmm1, dword ptr [rsp + 116]
call _D8nhelpers7sigmoidFNbffZf
mov rax, qword ptr [rsp + 64]
movss dword ptr [rax + 4*rbp], xmm0
xor edx, edx
xor ecx, ecx
mov r8d, _D11TypeInfo_Af6__initZ
mov rdi, qword ptr [rsp + 48]
mov rsi, qword ptr [rsp + 96]
call _adEq2
test eax, eax
jne .LBB0_27
movss xmm0, dword ptr [rsp + 92]
movss xmm1, dword ptr [rsp + 116]
call _D8nhelpers12sigmoidDerivFNbffZf
mov rax, qword ptr [rsp + 96]
jmp .LBB0_26
.align 16, 0x90
.LBB0_24:
movss xmm0, dword ptr [rsp + 92]
movss xmm1, dword ptr [rsp + 116]
call _D8nhelpers6linearFNbffZf
mov rax, qword ptr [rsp + 64]
movss dword ptr [rax + 4*rbp], xmm0
xor edx, edx
xor ecx, ecx
mov r8d, _D11TypeInfo_Af6__initZ
mov rdi, qword ptr [rsp + 48]
mov rsi, qword ptr [rsp + 96]
call _adEq2
test eax, eax
jne .LBB0_27
mov rax, qword ptr [rsp + 96]
movss xmm0, dword ptr [rsp + 92]
.LBB0_26:
movss dword ptr [rax + 4*rbp], xmm0
.LBB0_27:
inc rbp
add rbx, 4
cmp rbp, qword ptr [rsp + 72]
jne .LBB0_9
.LBB0_28:
mov rax, qword ptr [rsp + 24]
inc rax
cmp rax, qword ptr [rsp + 8]
mov rbp, qword ptr [rsp + 16]
jne .LBB0_1
.LBB0_29:
add rsp, 120
pop rbx
pop r12
pop r13
pop r14
pop r15
pop rbp
ret
More information about the digitalmars-d-ldc
mailing list