From a C++/JS benchmark
bearophile
bearophileHUGS at lycos.com
Thu Aug 4 10:29:13 PDT 2011
> Trass3r:
>> are you able and willing to show me the asm produced by gdc? There's a
>> problem there.
> [attach bla.rar]
In the bla.rar attach there's the unstripped Linux binary, so to read the asm I have used the objdump disassembler. But are you willing and able to show me the asm before it gets assembled? (with gcc you do it with the -S switch). (I also suggest to use only the C standard library, with time() and printf() to produce a smaller asm output: http://codepad.org/12EUo16J ).
Using objdump I see it uses 16 xmm registers, this is the main routine. But what's the purpose of those callq? They seem to call the successive asm instruction. The x86 asm of this routine contains jumps only and no "call".
The asm of this routine is also very long, I don't know why yet. I see too many instructions like "movss 0x80(%rsp), %xmm7" this looks like a problem.
_calculateVerticesAndNormals:
push %r15
push %r14
push %r13
push %r12
push %rbp
push %rbx
sub $0x268, %rsp
mov 0x2a0(%rsp), %rax
mov %rdi, 0xe8(%rsp)
mov %rsi, 0xe0(%rsp)
mov %rcx, 0x128(%rsp)
mov %r8, 0x138(%rsp)
mov %rax, 0xf0(%rsp)
mov 0x2a8(%rsp), %rax
mov %rdi, 0x180(%rsp)
mov %rsi, 0x188(%rsp)
mov %rcx, 0x170(%rsp)
mov %rax, 0xf8(%rsp)
mov 0x2b0(%rsp), %rax
mov %r8, 0x178(%rsp)
mov %rax, 0x130(%rsp)
mov 0x2b8(%rsp), %rax
mov %rax, 0x140(%rsp)
mov %rcx, %rax
add %rax, %rax
cmp 0x130(%rsp), %rax
je 74d <_calculateVerticesAndNormals+0xcd>
mov $0x57, %edx
mov $0x6, %edi
mov $0x0, %esi
movq $0x6, 0x190(%rsp)
movq $0x0, 0x198(%rsp)
callq 74d <_calculateVerticesAndNormals+0xcd>
cmpq $0x0, 0x128(%rsp)
je 1317 <_calculateVerticesAndNormals+0xc97>
movq $0x1, 0x120(%rsp)
xor %r15d, %r15d
movq $0x0, 0x100(%rsp)
movslq %r15d, %r12
cmp %r12, 0xf0(%rsp)
movq $0x0, 0x108(%rsp)
jbe f1d <_calculateVerticesAndNormals+0x89d>
nopl 0x0(%rax)
lea (%r12, %r12, 2), %rax
shl $0x2, %rax
mov %rax, 0x148(%rsp)
mov 0xf8(%rsp), %rax
add 0x148(%rsp), %rax
movss 0x4(%rax), %xmm9
movzbl 0x8(%rax), %r13d
movslq (%rax), %rax
cmp 0xe8(%rsp), %rax
jae f50 <_calculateVerticesAndNormals+0x8d0>
lea (%rax, %rax, 2), %rax
shl $0x4, %rax
mov %rax, 0x110(%rsp)
mov 0xe0(%rsp), %rbx
add 0x110(%rsp), %rbx
je 12af <_calculateVerticesAndNormals+0xc2f>
movss (%rbx), %xmm7
test %r13b, %r13b
movss 0x4(%rbx), %xmm8
movss 0x8(%rbx), %xmm6
mulss %xmm9, %xmm7
movss 0xc(%rbx), %xmm11
mulss %xmm9, %xmm8
movss 0x10(%rbx), %xmm4
mulss %xmm9, %xmm6
movss 0x14(%rbx), %xmm5
mulss %xmm9, %xmm11
movss 0x18(%rbx), %xmm3
mulss %xmm9, %xmm4
movss 0x1c(%rbx), %xmm10
mulss %xmm9, %xmm5
movss 0x20(%rbx), %xmm1
mulss %xmm9, %xmm3
movss 0x24(%rbx), %xmm2
mulss %xmm9, %xmm10
movss 0x28(%rbx), %xmm0
mulss %xmm9, %xmm1
mulss %xmm9, %xmm2
mulss %xmm9, %xmm0
mulss 0x2c(%rbx), %xmm9
jne cdb <_calculateVerticesAndNormals+0x65b>
add $0x1, %r12
mov %r14, %rax
lea (%r12, %r12, 2), %r13
shl $0x2, %r13
jmpq 99e <_calculateVerticesAndNormals+0x31e>
nopl (%rax)
mov %r13, %rax
mov 0xf8(%rsp), %rdx
add %rax, %rdx
movss 0x4(%rdx), %xmm12
movzbl 0x8(%rdx), %r14d
movslq (%rdx), %rdx
cmp %rdx, 0xe8(%rsp)
jbe aa0 <_calculateVerticesAndNormals+0x420>
mov 0xe0(%rsp), %rbx
lea (%rdx, %rdx, 2), %rbp
shl $0x4, %rbp
add %rbp, %rbx
je baf <_calculateVerticesAndNormals+0x52f>
movss (%rbx), %xmm13
add $0x1, %r12
add $0xc, %r13
test %r14b, %r14b
mulss %xmm12, %xmm13
addss %xmm13, %xmm7
movss 0x4(%rbx), %xmm13
mulss %xmm12, %xmm13
addss %xmm13, %xmm8
movss 0x8(%rbx), %xmm13
mulss %xmm12, %xmm13
addss %xmm13, %xmm6
movss 0xc(%rbx), %xmm13
mulss %xmm12, %xmm13
addss %xmm13, %xmm11
movss 0x10(%rbx), %xmm13
mulss %xmm12, %xmm13
addss %xmm13, %xmm4
movss 0x14(%rbx), %xmm13
mulss %xmm12, %xmm13
addss %xmm13, %xmm5
movss 0x18(%rbx), %xmm13
mulss %xmm12, %xmm13
addss %xmm13, %xmm3
movss 0x1c(%rbx), %xmm13
mulss %xmm12, %xmm13
addss %xmm13, %xmm10
movss 0x20(%rbx), %xmm13
mulss %xmm12, %xmm13
addss %xmm13, %xmm1
movss 0x24(%rbx), %xmm13
mulss %xmm12, %xmm13
addss %xmm13, %xmm2
movss 0x28(%rbx), %xmm13
mulss %xmm12, %xmm13
mulss 0x2c(%rbx), %xmm12
addss %xmm13, %xmm0
addss %xmm12, %xmm9
jne cd8 <_calculateVerticesAndNormals+0x658>
add $0x1, %r15d
cmp %r12, 0xf0(%rsp)
ja 890 <_calculateVerticesAndNormals+0x210>
mov $0x63, %edx
mov $0x6, %edi
mov $0x0, %esi
mov %rax, 0xc8(%rsp)
movss %xmm0, (%rsp)
movss %xmm1, 0x20(%rsp)
movss %xmm2, 0x10(%rsp)
movss %xmm3, 0x30(%rsp)
movss %xmm4, 0x50(%rsp)
movss %xmm5, 0x40(%rsp)
movss %xmm6, 0x60(%rsp)
movss %xmm7, 0x80(%rsp)
movss %xmm8, 0x70(%rsp)
movss %xmm9, 0x90(%rsp)
movss %xmm10, 0xa0(%rsp)
movss %xmm11, 0xb0(%rsp)
movq $0x6, 0x1c0(%rsp)
movq $0x0, 0x1c8(%rsp)
callq a3b <_calculateVerticesAndNormals+0x3bb>
mov 0xc8(%rsp), %rax
movss (%rsp), %xmm0
movss 0x20(%rsp), %xmm1
movss 0x10(%rsp), %xmm2
movss 0x30(%rsp), %xmm3
movss 0x50(%rsp), %xmm4
movss 0x40(%rsp), %xmm5
movss 0x60(%rsp), %xmm6
movss 0x80(%rsp), %xmm7
movss 0x70(%rsp), %xmm8
movss 0x90(%rsp), %xmm9
movss 0xa0(%rsp), %xmm10
movss 0xb0(%rsp), %xmm11
jmpq 893 <_calculateVerticesAndNormals+0x213>
nop
mov $0x65, %edx
mov $0x6, %edi
mov $0x0, %esi
mov %rax, 0xc8(%rsp)
movss %xmm0, (%rsp)
movss %xmm1, 0x20(%rsp)
movss %xmm2, 0x10(%rsp)
movss %xmm3, 0x30(%rsp)
movss %xmm4, 0x50(%rsp)
movss %xmm5, 0x40(%rsp)
movss %xmm6, 0x60(%rsp)
movss %xmm7, 0x80(%rsp)
movss %xmm8, 0x70(%rsp)
movss %xmm9, 0x90(%rsp)
movss %xmm10, 0xa0(%rsp)
movss %xmm11, 0xb0(%rsp)
movss %xmm12, 0xd0(%rsp)
movq $0x6, 0x1d0(%rsp)
movq $0x0, 0x1d8(%rsp)
callq b35 <_calculateVerticesAndNormals+0x4b5>
mov 0xe0(%rsp), %rbx
movss 0xd0(%rsp), %xmm12
movss 0xb0(%rsp), %xmm11
movss 0xa0(%rsp), %xmm10
add %rbp, %rbx
movss 0x70(%rsp), %xmm8
movss 0x90(%rsp), %xmm9
movss 0x80(%rsp), %xmm7
movss 0x60(%rsp), %xmm6
movss 0x40(%rsp), %xmm5
movss 0x50(%rsp), %xmm4
movss 0x30(%rsp), %xmm3
movss 0x10(%rsp), %xmm2
movss 0x20(%rsp), %xmm1
movss (%rsp), %xmm0
mov 0xc8(%rsp), %rax
jne 8d3 <_calculateVerticesAndNormals+0x253>
mov $0x23, %r8d
mov $0x6, %edx
mov $0x0, %ecx
mov $0x9, %edi
mov $0x0, %esi
movss %xmm0, (%rsp)
mov %rax, 0xc8(%rsp)
movss %xmm1, 0x20(%rsp)
movss %xmm2, 0x10(%rsp)
movss %xmm3, 0x30(%rsp)
movss %xmm4, 0x50(%rsp)
movss %xmm5, 0x40(%rsp)
movss %xmm6, 0x60(%rsp)
movss %xmm7, 0x80(%rsp)
movss %xmm8, 0x70(%rsp)
movss %xmm9, 0x90(%rsp)
movss %xmm10, 0xa0(%rsp)
movss %xmm11, 0xb0(%rsp)
movss %xmm12, 0xd0(%rsp)
movq $0x6, 0x240(%rsp)
movq $0x0, 0x248(%rsp)
movq $0x9, 0x250(%rsp)
movq $0x0, 0x258(%rsp)
callq c67 <_calculateVerticesAndNormals+0x5e7>
movss 0x70(%rsp), %xmm8
movss 0xd0(%rsp), %xmm12
movss 0xb0(%rsp), %xmm11
movss 0xa0(%rsp), %xmm10
movss 0x90(%rsp), %xmm9
movss 0x80(%rsp), %xmm7
movss 0x60(%rsp), %xmm6
movss 0x40(%rsp), %xmm5
movss 0x50(%rsp), %xmm4
movss 0x30(%rsp), %xmm3
movss 0x10(%rsp), %xmm2
movss 0x20(%rsp), %xmm1
movss (%rsp), %xmm0
mov 0xc8(%rsp), %rax
jmpq 8d3 <_calculateVerticesAndNormals+0x253>
nopl (%rax)
mov %rax, %r14
mov 0x108(%rsp), %rax
cmp %rax, 0x128(%rsp)
jbe 11d0 <_calculateVerticesAndNormals+0xb50>
shl $0x5, %rax
mov %rax, 0x150(%rsp)
mov 0x100(%rsp), %rax
mov 0x138(%rsp), %rbx
add %rax, %rax
add 0x150(%rsp), %rbx
cmp %rax, 0x130(%rsp)
jbe 10e8 <_calculateVerticesAndNormals+0xa68>
mov 0x100(%rsp), %rax
shl $0x5, %rax
mov %rax, 0x158(%rsp)
movss 0x8(%rbx), %xmm12
movaps %xmm8, %xmm15
movss (%rbx), %xmm14
movss %xmm12, 0x11c(%rsp)
movss 0x4(%rbx), %xmm13
movaps %xmm7, %xmm12
mulss %xmm14, %xmm12
mov 0x140(%rsp), %rax
mulss %xmm13, %xmm15
add 0x158(%rsp), %rax
addss %xmm15, %xmm12
addss %xmm11, %xmm12
movl $0x0, 0xc(%rax)
movss 0x11c(%rsp), %xmm11
mulss %xmm6, %xmm11
addss %xmm11, %xmm12
movaps %xmm4, %xmm11
mulss %xmm14, %xmm11
mulss %xmm1, %xmm14
movss %xmm12, (%rax)
movaps %xmm5, %xmm12
mulss %xmm13, %xmm12
mulss %xmm2, %xmm13
addss %xmm12, %xmm11
addss %xmm13, %xmm14
addss %xmm10, %xmm11
movss 0x11c(%rsp), %xmm10
addss %xmm9, %xmm14
movss 0x11c(%rsp), %xmm9
mulss %xmm3, %xmm10
mulss %xmm0, %xmm9
addss %xmm10, %xmm11
addss %xmm9, %xmm14
movss %xmm11, 0x4(%rax)
movss %xmm14, 0x8(%rax)
mov 0x108(%rsp), %rax
cmp %rax, 0x128(%rsp)
jbe 1040 <_calculateVerticesAndNormals+0x9c0>
shl $0x5, %rax
mov %rax, 0x160(%rsp)
mov 0x138(%rsp), %rbx
mov 0x120(%rsp), %rax
add 0x160(%rsp), %rbx
cmp %rax, 0x130(%rsp)
jbe f98 <_calculateVerticesAndNormals+0x918>
shl $0x4, %rax
mov %rax, 0x168(%rsp)
movss 0x10(%rbx), %xmm10
add $0x1, %r15d
movss 0x14(%rbx), %xmm11
mulss %xmm10, %xmm7
mov 0x140(%rsp), %rax
mulss %xmm11, %xmm8
movss 0x18(%rbx), %xmm9
mulss %xmm11, %xmm5
mulss %xmm10, %xmm4
mulss %xmm11, %xmm2
add 0x168(%rsp), %rax
addq $0x1, 0x100(%rsp)
addss %xmm7, %xmm8
addq $0x2, 0x120(%rsp)
addss %xmm4, %xmm5
mulss %xmm10, %xmm1
mulss %xmm9, %xmm6
movl $0x0, 0xc(%rax)
mulss %xmm9, %xmm3
mulss %xmm9, %xmm0
addss %xmm1, %xmm2
addss %xmm6, %xmm8
addss %xmm3, %xmm5
addss %xmm0, %xmm2
movss %xmm8, (%rax)
movss %xmm5, 0x4(%rax)
movss %xmm2, 0x8(%rax)
mov 0x100(%rsp), %rax
cmp %rax, 0x128(%rsp)
je 1317 <_calculateVerticesAndNormals+0xc97>
movslq %r15d, %r12
mov %rax, 0x108(%rsp)
cmp %r12, 0xf0(%rsp)
ja 798 <_calculateVerticesAndNormals+0x118>
mov $0x5d, %edx
mov $0x6, %edi
mov $0x0, %esi
movq $0x6, 0x1a0(%rsp)
movq $0x0, 0x1a8(%rsp)
callq f49 <_calculateVerticesAndNormals+0x8c9>
jmpq 7a8 <_calculateVerticesAndNormals+0x128>
xchg %ax, %ax
mov $0x5f, %edx
mov $0x6, %edi
mov $0x0, %esi
movss %xmm9, 0x90(%rsp)
movq $0x6, 0x1b0(%rsp)
movq $0x0, 0x1b8(%rsp)
callq f86 <_calculateVerticesAndNormals+0x906>
movss 0x90(%rsp), %xmm9
jmpq 7e4 <_calculateVerticesAndNormals+0x164>
nopl (%rax)
mov $0x69, %edx
mov $0x6, %edi
mov $0x0, %esi
movss %xmm0, (%rsp)
movss %xmm1, 0x20(%rsp)
movss %xmm2, 0x10(%rsp)
movss %xmm3, 0x30(%rsp)
movss %xmm4, 0x50(%rsp)
movss %xmm5, 0x40(%rsp)
movss %xmm6, 0x60(%rsp)
movss %xmm7, 0x80(%rsp)
movss %xmm8, 0x70(%rsp)
movq $0x6, 0x210(%rsp)
movq $0x0, 0x218(%rsp)
callq ffd <_calculateVerticesAndNormals+0x97d>
movss 0x70(%rsp), %xmm8
movss 0x80(%rsp), %xmm7
movss 0x60(%rsp), %xmm6
movss 0x40(%rsp), %xmm5
movss 0x50(%rsp), %xmm4
movss 0x30(%rsp), %xmm3
movss 0x10(%rsp), %xmm2
movss 0x20(%rsp), %xmm1
movss (%rsp), %xmm0
jmpq e59 <_calculateVerticesAndNormals+0x7d9>
nopl 0x0(%rax, %rax, 1)
mov $0x69, %edx
mov $0x6, %edi
mov $0x0, %esi
movss %xmm0, (%rsp)
movss %xmm1, 0x20(%rsp)
movss %xmm2, 0x10(%rsp)
movss %xmm3, 0x30(%rsp)
movss %xmm4, 0x50(%rsp)
movss %xmm5, 0x40(%rsp)
movss %xmm6, 0x60(%rsp)
movss %xmm7, 0x80(%rsp)
movss %xmm8, 0x70(%rsp)
movq $0x6, 0x200(%rsp)
movq $0x0, 0x208(%rsp)
callq 10a5 <_calculateVerticesAndNormals+0xa25>
movss 0x70(%rsp), %xmm8
movss 0x80(%rsp), %xmm7
movss 0x60(%rsp), %xmm6
movss 0x40(%rsp), %xmm5
movss 0x50(%rsp), %xmm4
movss 0x30(%rsp), %xmm3
movss 0x10(%rsp), %xmm2
movss 0x20(%rsp), %xmm1
movss (%rsp), %xmm0
jmpq e27 <_calculateVerticesAndNormals+0x7a7>
nopl 0x0(%rax, %rax, 1)
mov $0x68, %edx
mov $0x6, %edi
mov $0x0, %esi
movss %xmm0, (%rsp)
movss %xmm1, 0x20(%rsp)
movss %xmm2, 0x10(%rsp)
movss %xmm3, 0x30(%rsp)
movss %xmm4, 0x50(%rsp)
movss %xmm5, 0x40(%rsp)
movss %xmm6, 0x60(%rsp)
movss %xmm7, 0x80(%rsp)
movss %xmm8, 0x70(%rsp)
movss %xmm9, 0x90(%rsp)
movss %xmm10, 0xa0(%rsp)
movss %xmm11, 0xb0(%rsp)
movq $0x6, 0x1f0(%rsp)
movq $0x0, 0x1f8(%rsp)
callq 116b <_calculateVerticesAndNormals+0xaeb>
movss 0x70(%rsp), %xmm8
movss 0xb0(%rsp), %xmm11
movss 0xa0(%rsp), %xmm10
movss 0x90(%rsp), %xmm9
movss 0x80(%rsp), %xmm7
movss 0x60(%rsp), %xmm6
movss 0x40(%rsp), %xmm5
movss 0x50(%rsp), %xmm4
movss 0x30(%rsp), %xmm3
movss 0x10(%rsp), %xmm2
movss 0x20(%rsp), %xmm1
movss (%rsp), %xmm0
jmpq d3a <_calculateVerticesAndNormals+0x6ba>
nopw 0x0(%rax, %rax, 1)
mov $0x68, %edx
mov $0x6, %edi
mov $0x0, %esi
movss %xmm0, (%rsp)
movss %xmm1, 0x20(%rsp)
movss %xmm2, 0x10(%rsp)
movss %xmm3, 0x30(%rsp)
movss %xmm4, 0x50(%rsp)
movss %xmm5, 0x40(%rsp)
movss %xmm6, 0x60(%rsp)
movss %xmm7, 0x80(%rsp)
movss %xmm8, 0x70(%rsp)
movss %xmm9, 0x90(%rsp)
movss %xmm10, 0xa0(%rsp)
movss %xmm11, 0xb0(%rsp)
movq $0x6, 0x1e0(%rsp)
movq $0x0, 0x1e8(%rsp)
callq 1253 <_calculateVerticesAndNormals+0xbd3>
movss 0x70(%rsp), %xmm8
movss 0xb0(%rsp), %xmm11
movss 0xa0(%rsp), %xmm10
movss 0x90(%rsp), %xmm9
movss 0x80(%rsp), %xmm7
movss 0x60(%rsp), %xmm6
movss 0x40(%rsp), %xmm5
movss 0x50(%rsp), %xmm4
movss 0x30(%rsp), %xmm3
movss 0x10(%rsp), %xmm2
movss 0x20(%rsp), %xmm1
movss (%rsp), %xmm0
jmpq cfd <_calculateVerticesAndNormals+0x67d>
mov $0x12, %r8d
mov $0x6, %edx
mov $0x0, %ecx
mov $0x9, %edi
mov $0x0, %esi
movss %xmm9, 0x90(%rsp)
movq $0x6, 0x220(%rsp)
movq $0x0, 0x228(%rsp)
movq $0x9, 0x230(%rsp)
movq $0x0, 0x238(%rsp)
callq 1308 <_calculateVerticesAndNormals+0xc88>
movss 0x90(%rsp), %xmm9
jmpq 7fa <_calculateVerticesAndNormals+0x17a>
add $0x268, %rsp
pop %rbx
pop %rbp
pop %r12
pop %r13
pop %r14
pop %r15
retq
nopl 0x0(%rax)
Bye,
bearophile
More information about the Digitalmars-d
mailing list