From a C++/JS benchmark

bearophile bearophileHUGS at lycos.com
Thu Aug 4 10:29:13 PDT 2011


> Trass3r:
>> are you able and willing to show me the asm produced by gdc? There's a
>> problem there.
> [attach bla.rar]

In the bla.rar attach there's the unstripped Linux binary, so to read the asm I have used the objdump disassembler. But are you willing and able to show me the asm before it gets assembled? (with gcc you do it with the -S switch). (I also suggest to use only the C standard library, with time() and printf() to produce a smaller asm output: http://codepad.org/12EUo16J ).

Using objdump I see it uses 16 xmm registers, this is the main routine. But what's the purpose of those callq? They seem to call the successive asm instruction. The x86 asm of this routine contains jumps only and no "call".
The asm of this routine is also very long, I don't know why yet. I see too many instructions like "movss  0x80(%rsp), %xmm7" this looks like a problem.


_calculateVerticesAndNormals:
push   %r15
push   %r14
push   %r13
push   %r12
push   %rbp
push   %rbx
sub    $0x268, %rsp
mov    0x2a0(%rsp), %rax
mov    %rdi, 0xe8(%rsp)
mov    %rsi, 0xe0(%rsp)
mov    %rcx, 0x128(%rsp)
mov    %r8, 0x138(%rsp)
mov    %rax, 0xf0(%rsp)
mov    0x2a8(%rsp), %rax
mov    %rdi, 0x180(%rsp)
mov    %rsi, 0x188(%rsp)
mov    %rcx, 0x170(%rsp)
mov    %rax, 0xf8(%rsp)
mov    0x2b0(%rsp), %rax
mov    %r8, 0x178(%rsp)
mov    %rax, 0x130(%rsp)
mov    0x2b8(%rsp), %rax
mov    %rax, 0x140(%rsp)
mov    %rcx, %rax
add    %rax, %rax
cmp    0x130(%rsp), %rax
je     74d <_calculateVerticesAndNormals+0xcd>
mov    $0x57, %edx
mov    $0x6, %edi
mov    $0x0, %esi
movq   $0x6, 0x190(%rsp)
movq   $0x0, 0x198(%rsp)
callq  74d <_calculateVerticesAndNormals+0xcd>
cmpq   $0x0, 0x128(%rsp)
je     1317 <_calculateVerticesAndNormals+0xc97>
movq   $0x1, 0x120(%rsp)
xor    %r15d, %r15d
movq   $0x0, 0x100(%rsp)
movslq %r15d, %r12
cmp    %r12, 0xf0(%rsp)
movq   $0x0, 0x108(%rsp)
jbe    f1d <_calculateVerticesAndNormals+0x89d>
nopl   0x0(%rax)
lea    (%r12, %r12, 2), %rax
shl    $0x2, %rax
mov    %rax, 0x148(%rsp)
mov    0xf8(%rsp), %rax
add    0x148(%rsp), %rax
movss  0x4(%rax), %xmm9
movzbl 0x8(%rax), %r13d
movslq (%rax), %rax
cmp    0xe8(%rsp), %rax
jae    f50 <_calculateVerticesAndNormals+0x8d0>
lea    (%rax, %rax, 2), %rax
shl    $0x4, %rax
mov    %rax, 0x110(%rsp)
mov    0xe0(%rsp), %rbx
add    0x110(%rsp), %rbx
je     12af <_calculateVerticesAndNormals+0xc2f>
movss  (%rbx), %xmm7
test   %r13b, %r13b
movss  0x4(%rbx), %xmm8
movss  0x8(%rbx), %xmm6
mulss  %xmm9, %xmm7
movss  0xc(%rbx), %xmm11
mulss  %xmm9, %xmm8
movss  0x10(%rbx), %xmm4
mulss  %xmm9, %xmm6
movss  0x14(%rbx), %xmm5
mulss  %xmm9, %xmm11
movss  0x18(%rbx), %xmm3
mulss  %xmm9, %xmm4
movss  0x1c(%rbx), %xmm10
mulss  %xmm9, %xmm5
movss  0x20(%rbx), %xmm1
mulss  %xmm9, %xmm3
movss  0x24(%rbx), %xmm2
mulss  %xmm9, %xmm10
movss  0x28(%rbx), %xmm0
mulss  %xmm9, %xmm1
mulss  %xmm9, %xmm2
mulss  %xmm9, %xmm0
mulss  0x2c(%rbx), %xmm9
jne    cdb <_calculateVerticesAndNormals+0x65b>
add    $0x1, %r12
mov    %r14, %rax
lea    (%r12, %r12, 2), %r13
shl    $0x2, %r13
jmpq   99e <_calculateVerticesAndNormals+0x31e>
nopl   (%rax)
mov    %r13, %rax
mov    0xf8(%rsp), %rdx
add    %rax, %rdx
movss  0x4(%rdx), %xmm12
movzbl 0x8(%rdx), %r14d
movslq (%rdx), %rdx
cmp    %rdx, 0xe8(%rsp)
jbe    aa0 <_calculateVerticesAndNormals+0x420>
mov    0xe0(%rsp), %rbx
lea    (%rdx, %rdx, 2), %rbp
shl    $0x4, %rbp
add    %rbp, %rbx
je     baf <_calculateVerticesAndNormals+0x52f>
movss  (%rbx), %xmm13
add    $0x1, %r12
add    $0xc, %r13
test   %r14b, %r14b
mulss  %xmm12, %xmm13
addss  %xmm13, %xmm7
movss  0x4(%rbx), %xmm13
mulss  %xmm12, %xmm13
addss  %xmm13, %xmm8
movss  0x8(%rbx), %xmm13
mulss  %xmm12, %xmm13
addss  %xmm13, %xmm6
movss  0xc(%rbx), %xmm13
mulss  %xmm12, %xmm13
addss  %xmm13, %xmm11
movss  0x10(%rbx), %xmm13
mulss  %xmm12, %xmm13
addss  %xmm13, %xmm4
movss  0x14(%rbx), %xmm13
mulss  %xmm12, %xmm13
addss  %xmm13, %xmm5
movss  0x18(%rbx), %xmm13
mulss  %xmm12, %xmm13
addss  %xmm13, %xmm3
movss  0x1c(%rbx), %xmm13
mulss  %xmm12, %xmm13
addss  %xmm13, %xmm10
movss  0x20(%rbx), %xmm13
mulss  %xmm12, %xmm13
addss  %xmm13, %xmm1
movss  0x24(%rbx), %xmm13
mulss  %xmm12, %xmm13
addss  %xmm13, %xmm2
movss  0x28(%rbx), %xmm13
mulss  %xmm12, %xmm13
mulss  0x2c(%rbx), %xmm12
addss  %xmm13, %xmm0
addss  %xmm12, %xmm9
jne    cd8 <_calculateVerticesAndNormals+0x658>
add    $0x1, %r15d
cmp    %r12, 0xf0(%rsp)
ja     890 <_calculateVerticesAndNormals+0x210>
mov    $0x63, %edx
mov    $0x6, %edi
mov    $0x0, %esi
mov    %rax, 0xc8(%rsp)
movss  %xmm0, (%rsp)
movss  %xmm1, 0x20(%rsp)
movss  %xmm2, 0x10(%rsp)
movss  %xmm3, 0x30(%rsp)
movss  %xmm4, 0x50(%rsp)
movss  %xmm5, 0x40(%rsp)
movss  %xmm6, 0x60(%rsp)
movss  %xmm7, 0x80(%rsp)
movss  %xmm8, 0x70(%rsp)
movss  %xmm9, 0x90(%rsp)
movss  %xmm10, 0xa0(%rsp)
movss  %xmm11, 0xb0(%rsp)
movq   $0x6, 0x1c0(%rsp)
movq   $0x0, 0x1c8(%rsp)
callq  a3b <_calculateVerticesAndNormals+0x3bb>
mov    0xc8(%rsp), %rax
movss  (%rsp), %xmm0
movss  0x20(%rsp), %xmm1
movss  0x10(%rsp), %xmm2
movss  0x30(%rsp), %xmm3
movss  0x50(%rsp), %xmm4
movss  0x40(%rsp), %xmm5
movss  0x60(%rsp), %xmm6
movss  0x80(%rsp), %xmm7
movss  0x70(%rsp), %xmm8
movss  0x90(%rsp), %xmm9
movss  0xa0(%rsp), %xmm10
movss  0xb0(%rsp), %xmm11
jmpq   893 <_calculateVerticesAndNormals+0x213>
nop    
mov    $0x65, %edx
mov    $0x6, %edi
mov    $0x0, %esi
mov    %rax, 0xc8(%rsp)
movss  %xmm0, (%rsp)
movss  %xmm1, 0x20(%rsp)
movss  %xmm2, 0x10(%rsp)
movss  %xmm3, 0x30(%rsp)
movss  %xmm4, 0x50(%rsp)
movss  %xmm5, 0x40(%rsp)
movss  %xmm6, 0x60(%rsp)
movss  %xmm7, 0x80(%rsp)
movss  %xmm8, 0x70(%rsp)
movss  %xmm9, 0x90(%rsp)
movss  %xmm10, 0xa0(%rsp)
movss  %xmm11, 0xb0(%rsp)
movss  %xmm12, 0xd0(%rsp)
movq   $0x6, 0x1d0(%rsp)
movq   $0x0, 0x1d8(%rsp)
callq  b35 <_calculateVerticesAndNormals+0x4b5>
mov    0xe0(%rsp), %rbx
movss  0xd0(%rsp), %xmm12
movss  0xb0(%rsp), %xmm11
movss  0xa0(%rsp), %xmm10
add    %rbp, %rbx
movss  0x70(%rsp), %xmm8
movss  0x90(%rsp), %xmm9
movss  0x80(%rsp), %xmm7
movss  0x60(%rsp), %xmm6
movss  0x40(%rsp), %xmm5
movss  0x50(%rsp), %xmm4
movss  0x30(%rsp), %xmm3
movss  0x10(%rsp), %xmm2
movss  0x20(%rsp), %xmm1
movss  (%rsp), %xmm0
mov    0xc8(%rsp), %rax
jne    8d3 <_calculateVerticesAndNormals+0x253>
mov    $0x23, %r8d
mov    $0x6, %edx
mov    $0x0, %ecx
mov    $0x9, %edi
mov    $0x0, %esi
movss  %xmm0, (%rsp)
mov    %rax, 0xc8(%rsp)
movss  %xmm1, 0x20(%rsp)
movss  %xmm2, 0x10(%rsp)
movss  %xmm3, 0x30(%rsp)
movss  %xmm4, 0x50(%rsp)
movss  %xmm5, 0x40(%rsp)
movss  %xmm6, 0x60(%rsp)
movss  %xmm7, 0x80(%rsp)
movss  %xmm8, 0x70(%rsp)
movss  %xmm9, 0x90(%rsp)
movss  %xmm10, 0xa0(%rsp)
movss  %xmm11, 0xb0(%rsp)
movss  %xmm12, 0xd0(%rsp)
movq   $0x6, 0x240(%rsp)
movq   $0x0, 0x248(%rsp)
movq   $0x9, 0x250(%rsp)
movq   $0x0, 0x258(%rsp)
callq  c67 <_calculateVerticesAndNormals+0x5e7>
movss  0x70(%rsp), %xmm8
movss  0xd0(%rsp), %xmm12
movss  0xb0(%rsp), %xmm11
movss  0xa0(%rsp), %xmm10
movss  0x90(%rsp), %xmm9
movss  0x80(%rsp), %xmm7
movss  0x60(%rsp), %xmm6
movss  0x40(%rsp), %xmm5
movss  0x50(%rsp), %xmm4
movss  0x30(%rsp), %xmm3
movss  0x10(%rsp), %xmm2
movss  0x20(%rsp), %xmm1
movss  (%rsp), %xmm0
mov    0xc8(%rsp), %rax
jmpq   8d3 <_calculateVerticesAndNormals+0x253>
nopl   (%rax)
mov    %rax, %r14
mov    0x108(%rsp), %rax
cmp    %rax, 0x128(%rsp)
jbe    11d0 <_calculateVerticesAndNormals+0xb50>
shl    $0x5, %rax
mov    %rax, 0x150(%rsp)
mov    0x100(%rsp), %rax
mov    0x138(%rsp), %rbx
add    %rax, %rax
add    0x150(%rsp), %rbx
cmp    %rax, 0x130(%rsp)
jbe    10e8 <_calculateVerticesAndNormals+0xa68>
mov    0x100(%rsp), %rax
shl    $0x5, %rax
mov    %rax, 0x158(%rsp)
movss  0x8(%rbx), %xmm12
movaps %xmm8, %xmm15
movss  (%rbx), %xmm14
movss  %xmm12, 0x11c(%rsp)
movss  0x4(%rbx), %xmm13
movaps %xmm7, %xmm12
mulss  %xmm14, %xmm12
mov    0x140(%rsp), %rax
mulss  %xmm13, %xmm15
add    0x158(%rsp), %rax
addss  %xmm15, %xmm12
addss  %xmm11, %xmm12
movl   $0x0, 0xc(%rax)
movss  0x11c(%rsp), %xmm11
mulss  %xmm6, %xmm11
addss  %xmm11, %xmm12
movaps %xmm4, %xmm11
mulss  %xmm14, %xmm11
mulss  %xmm1, %xmm14
movss  %xmm12, (%rax)
movaps %xmm5, %xmm12
mulss  %xmm13, %xmm12
mulss  %xmm2, %xmm13
addss  %xmm12, %xmm11
addss  %xmm13, %xmm14
addss  %xmm10, %xmm11
movss  0x11c(%rsp), %xmm10
addss  %xmm9, %xmm14
movss  0x11c(%rsp), %xmm9
mulss  %xmm3, %xmm10
mulss  %xmm0, %xmm9
addss  %xmm10, %xmm11
addss  %xmm9, %xmm14
movss  %xmm11, 0x4(%rax)
movss  %xmm14, 0x8(%rax)
mov    0x108(%rsp), %rax
cmp    %rax, 0x128(%rsp)
jbe    1040 <_calculateVerticesAndNormals+0x9c0>
shl    $0x5, %rax
mov    %rax, 0x160(%rsp)
mov    0x138(%rsp), %rbx
mov    0x120(%rsp), %rax
add    0x160(%rsp), %rbx
cmp    %rax, 0x130(%rsp)
jbe    f98 <_calculateVerticesAndNormals+0x918>
shl    $0x4, %rax
mov    %rax, 0x168(%rsp)
movss  0x10(%rbx), %xmm10
add    $0x1, %r15d
movss  0x14(%rbx), %xmm11
mulss  %xmm10, %xmm7
mov    0x140(%rsp), %rax
mulss  %xmm11, %xmm8
movss  0x18(%rbx), %xmm9
mulss  %xmm11, %xmm5
mulss  %xmm10, %xmm4
mulss  %xmm11, %xmm2
add    0x168(%rsp), %rax
addq   $0x1, 0x100(%rsp)
addss  %xmm7, %xmm8
addq   $0x2, 0x120(%rsp)
addss  %xmm4, %xmm5
mulss  %xmm10, %xmm1
mulss  %xmm9, %xmm6
movl   $0x0, 0xc(%rax)
mulss  %xmm9, %xmm3
mulss  %xmm9, %xmm0
addss  %xmm1, %xmm2
addss  %xmm6, %xmm8
addss  %xmm3, %xmm5
addss  %xmm0, %xmm2
movss  %xmm8, (%rax)
movss  %xmm5, 0x4(%rax)
movss  %xmm2, 0x8(%rax)
mov    0x100(%rsp), %rax
cmp    %rax, 0x128(%rsp)
je     1317 <_calculateVerticesAndNormals+0xc97>
movslq %r15d, %r12
mov    %rax, 0x108(%rsp)
cmp    %r12, 0xf0(%rsp)
ja     798 <_calculateVerticesAndNormals+0x118>
mov    $0x5d, %edx
mov    $0x6, %edi
mov    $0x0, %esi
movq   $0x6, 0x1a0(%rsp)
movq   $0x0, 0x1a8(%rsp)
callq  f49 <_calculateVerticesAndNormals+0x8c9>
jmpq   7a8 <_calculateVerticesAndNormals+0x128>
xchg   %ax, %ax
mov    $0x5f, %edx
mov    $0x6, %edi
mov    $0x0, %esi
movss  %xmm9, 0x90(%rsp)
movq   $0x6, 0x1b0(%rsp)
movq   $0x0, 0x1b8(%rsp)
callq  f86 <_calculateVerticesAndNormals+0x906>
movss  0x90(%rsp), %xmm9
jmpq   7e4 <_calculateVerticesAndNormals+0x164>
nopl   (%rax)
mov    $0x69, %edx
mov    $0x6, %edi
mov    $0x0, %esi
movss  %xmm0, (%rsp)
movss  %xmm1, 0x20(%rsp)
movss  %xmm2, 0x10(%rsp)
movss  %xmm3, 0x30(%rsp)
movss  %xmm4, 0x50(%rsp)
movss  %xmm5, 0x40(%rsp)
movss  %xmm6, 0x60(%rsp)
movss  %xmm7, 0x80(%rsp)
movss  %xmm8, 0x70(%rsp)
movq   $0x6, 0x210(%rsp)
movq   $0x0, 0x218(%rsp)
callq  ffd <_calculateVerticesAndNormals+0x97d>
movss  0x70(%rsp), %xmm8
movss  0x80(%rsp), %xmm7
movss  0x60(%rsp), %xmm6
movss  0x40(%rsp), %xmm5
movss  0x50(%rsp), %xmm4
movss  0x30(%rsp), %xmm3
movss  0x10(%rsp), %xmm2
movss  0x20(%rsp), %xmm1
movss  (%rsp), %xmm0
jmpq   e59 <_calculateVerticesAndNormals+0x7d9>
nopl   0x0(%rax, %rax, 1)
mov    $0x69, %edx
mov    $0x6, %edi
mov    $0x0, %esi
movss  %xmm0, (%rsp)
movss  %xmm1, 0x20(%rsp)
movss  %xmm2, 0x10(%rsp)
movss  %xmm3, 0x30(%rsp)
movss  %xmm4, 0x50(%rsp)
movss  %xmm5, 0x40(%rsp)
movss  %xmm6, 0x60(%rsp)
movss  %xmm7, 0x80(%rsp)
movss  %xmm8, 0x70(%rsp)
movq   $0x6, 0x200(%rsp)
movq   $0x0, 0x208(%rsp)
callq  10a5 <_calculateVerticesAndNormals+0xa25>
movss  0x70(%rsp), %xmm8
movss  0x80(%rsp), %xmm7
movss  0x60(%rsp), %xmm6
movss  0x40(%rsp), %xmm5
movss  0x50(%rsp), %xmm4
movss  0x30(%rsp), %xmm3
movss  0x10(%rsp), %xmm2
movss  0x20(%rsp), %xmm1
movss  (%rsp), %xmm0
jmpq   e27 <_calculateVerticesAndNormals+0x7a7>
nopl   0x0(%rax, %rax, 1)
mov    $0x68, %edx
mov    $0x6, %edi
mov    $0x0, %esi
movss  %xmm0, (%rsp)
movss  %xmm1, 0x20(%rsp)
movss  %xmm2, 0x10(%rsp)
movss  %xmm3, 0x30(%rsp)
movss  %xmm4, 0x50(%rsp)
movss  %xmm5, 0x40(%rsp)
movss  %xmm6, 0x60(%rsp)
movss  %xmm7, 0x80(%rsp)
movss  %xmm8, 0x70(%rsp)
movss  %xmm9, 0x90(%rsp)
movss  %xmm10, 0xa0(%rsp)
movss  %xmm11, 0xb0(%rsp)
movq   $0x6, 0x1f0(%rsp)
movq   $0x0, 0x1f8(%rsp)
callq  116b <_calculateVerticesAndNormals+0xaeb>
movss  0x70(%rsp), %xmm8
movss  0xb0(%rsp), %xmm11
movss  0xa0(%rsp), %xmm10
movss  0x90(%rsp), %xmm9
movss  0x80(%rsp), %xmm7
movss  0x60(%rsp), %xmm6
movss  0x40(%rsp), %xmm5
movss  0x50(%rsp), %xmm4
movss  0x30(%rsp), %xmm3
movss  0x10(%rsp), %xmm2
movss  0x20(%rsp), %xmm1
movss  (%rsp), %xmm0
jmpq   d3a <_calculateVerticesAndNormals+0x6ba>
nopw   0x0(%rax, %rax, 1)
mov    $0x68, %edx
mov    $0x6, %edi
mov    $0x0, %esi
movss  %xmm0, (%rsp)
movss  %xmm1, 0x20(%rsp)
movss  %xmm2, 0x10(%rsp)
movss  %xmm3, 0x30(%rsp)
movss  %xmm4, 0x50(%rsp)
movss  %xmm5, 0x40(%rsp)
movss  %xmm6, 0x60(%rsp)
movss  %xmm7, 0x80(%rsp)
movss  %xmm8, 0x70(%rsp)
movss  %xmm9, 0x90(%rsp)
movss  %xmm10, 0xa0(%rsp)
movss  %xmm11, 0xb0(%rsp)
movq   $0x6, 0x1e0(%rsp)
movq   $0x0, 0x1e8(%rsp)
callq  1253 <_calculateVerticesAndNormals+0xbd3>
movss  0x70(%rsp), %xmm8
movss  0xb0(%rsp), %xmm11
movss  0xa0(%rsp), %xmm10
movss  0x90(%rsp), %xmm9
movss  0x80(%rsp), %xmm7
movss  0x60(%rsp), %xmm6
movss  0x40(%rsp), %xmm5
movss  0x50(%rsp), %xmm4
movss  0x30(%rsp), %xmm3
movss  0x10(%rsp), %xmm2
movss  0x20(%rsp), %xmm1
movss  (%rsp), %xmm0
jmpq   cfd <_calculateVerticesAndNormals+0x67d>
mov    $0x12, %r8d
mov    $0x6, %edx
mov    $0x0, %ecx
mov    $0x9, %edi
mov    $0x0, %esi
movss  %xmm9, 0x90(%rsp)
movq   $0x6, 0x220(%rsp)
movq   $0x0, 0x228(%rsp)
movq   $0x9, 0x230(%rsp)
movq   $0x0, 0x238(%rsp)
callq  1308 <_calculateVerticesAndNormals+0xc88>
movss  0x90(%rsp), %xmm9
jmpq   7fa <_calculateVerticesAndNormals+0x17a>
add    $0x268, %rsp
pop    %rbx
pop    %rbp
pop    %r12
pop    %r13
pop    %r14
pop    %r15
retq   
nopl   0x0(%rax)


Bye,
bearophile


More information about the Digitalmars-d mailing list