.file "cppver.cpp" .intel_syntax noprefix .text .p2align 4,,15 .globl _Z31calculateVerticesAndNormals_x87PK13BoneTransformiPK6VertexPK9InfluenceP10CalVector4 .type _Z31calculateVerticesAndNormals_x87PK13BoneTransformiPK6VertexPK9InfluenceP10CalVector4, @function _Z31calculateVerticesAndNormals_x87PK13BoneTransformiPK6VertexPK9InfluenceP10CalVector4: .LFB1414: .cfi_startproc test esi, esi je .L1 dec esi sal rsi, 5 lea rsi, [rdx+rsi] .p2align 4,,10 .p2align 3 .L5: movss xmm9, DWORD PTR [rcx+4] mov eax, DWORD PTR [rcx] lea rax, [rax+rax*2] sal rax, 4 lea rax, [rdi+rax] movss xmm7, DWORD PTR [rax] movss xmm8, DWORD PTR [rax+4] movss xmm6, DWORD PTR [rax+8] movss xmm11, DWORD PTR [rax+12] movss xmm4, DWORD PTR [rax+16] movss xmm5, DWORD PTR [rax+20] movss xmm3, DWORD PTR [rax+24] movss xmm10, DWORD PTR [rax+28] movss xmm1, DWORD PTR [rax+32] movss xmm2, DWORD PTR [rax+36] movss xmm0, DWORD PTR [rax+40] mulss xmm7, xmm9 mulss xmm8, xmm9 mulss xmm6, xmm9 mulss xmm11, xmm9 mulss xmm4, xmm9 mulss xmm5, xmm9 mulss xmm3, xmm9 mulss xmm10, xmm9 mulss xmm1, xmm9 mulss xmm2, xmm9 mulss xmm0, xmm9 mulss xmm9, DWORD PTR [rax+44] mov eax, DWORD PTR [rcx+8] add rcx, 12 test eax, eax jne .L3 .p2align 4,,10 .p2align 3 .L6: movss xmm12, DWORD PTR [rcx+4] mov eax, DWORD PTR [rcx] lea rax, [rax+rax*2] sal rax, 4 lea rax, [rdi+rax] movss xmm13, DWORD PTR [rax] mulss xmm13, xmm12 addss xmm7, xmm13 movss xmm13, DWORD PTR [rax+4] mulss xmm13, xmm12 addss xmm8, xmm13 movss xmm13, DWORD PTR [rax+8] mulss xmm13, xmm12 addss xmm6, xmm13 movss xmm13, DWORD PTR [rax+12] mulss xmm13, xmm12 addss xmm11, xmm13 movss xmm13, DWORD PTR [rax+16] mulss xmm13, xmm12 addss xmm4, xmm13 movss xmm13, DWORD PTR [rax+20] mulss xmm13, xmm12 addss xmm5, xmm13 movss xmm13, DWORD PTR [rax+24] mulss xmm13, xmm12 addss xmm3, xmm13 movss xmm13, DWORD PTR [rax+28] mulss xmm13, xmm12 addss xmm10, xmm13 movss xmm13, DWORD PTR [rax+32] mulss xmm13, xmm12 addss xmm1, xmm13 movss xmm13, DWORD PTR [rax+36] mulss xmm13, xmm12 addss xmm2, xmm13 movss xmm13, DWORD PTR [rax+40] mulss xmm13, xmm12 mulss xmm12, DWORD PTR [rax+44] addss xmm0, xmm13 mov eax, DWORD PTR [rcx+8] addss xmm9, xmm12 add rcx, 12 test eax, eax je .L6 .L3: movss xmm14, DWORD PTR [rdx] movss xmm13, DWORD PTR [rdx+4] movaps xmm12, xmm7 movaps xmm15, xmm8 mulss xmm12, xmm14 mulss xmm15, xmm13 cmp rdx, rsi addss xmm12, xmm15 addss xmm12, xmm11 movss xmm11, DWORD PTR [rdx+8] mulss xmm11, xmm6 addss xmm12, xmm11 movaps xmm11, xmm4 movss DWORD PTR [r8], xmm12 mulss xmm11, xmm14 movaps xmm12, xmm5 mulss xmm14, xmm1 mulss xmm12, xmm13 mulss xmm7, DWORD PTR [rdx+16] mulss xmm13, xmm2 addss xmm11, xmm12 addss xmm14, xmm13 addss xmm11, xmm10 addss xmm14, xmm9 movss xmm10, DWORD PTR [rdx+8] movss xmm9, DWORD PTR [rdx+8] mulss xmm10, xmm3 mulss xmm9, xmm0 addss xmm11, xmm10 addss xmm14, xmm9 movss DWORD PTR [r8+4], xmm11 movss DWORD PTR [r8+8], xmm14 movss xmm10, DWORD PTR [rdx+20] movss xmm9, DWORD PTR [rdx+24] mulss xmm8, xmm10 mulss xmm6, xmm9 addss xmm8, xmm7 mulss xmm5, xmm10 addss xmm8, xmm6 mulss xmm3, xmm9 movss DWORD PTR [r8+16], xmm8 mulss xmm0, xmm9 movss xmm6, DWORD PTR [rdx+16] mulss xmm4, xmm6 mulss xmm1, xmm6 addss xmm5, xmm4 addss xmm5, xmm3 movss DWORD PTR [r8+20], xmm5 mulss xmm2, DWORD PTR [rdx+20] addss xmm2, xmm1 addss xmm2, xmm0 movss DWORD PTR [r8+24], xmm2 je .L1 add rdx, 32 add r8, 32 jmp .L5 .L1: rep ret .cfi_endproc .LFE1414: .size _Z31calculateVerticesAndNormals_x87PK13BoneTransformiPK6VertexPK9InfluenceP10CalVector4, .-_Z31calculateVerticesAndNormals_x87PK13BoneTransformiPK6VertexPK9InfluenceP10CalVector4 .section .rodata.str1.8,"aMS",@progbits,1 .align 8 .LC3: .string "Skinned vertices per second: %d, blah=%f\n" .text .p2align 4,,15 .globl main .type main, @function main: .LFB1418: .cfi_startproc push r15 .cfi_def_cfa_offset 16 xorps xmm0, xmm0 push r14 .cfi_def_cfa_offset 24 push r13 .cfi_def_cfa_offset 32 push r12 .cfi_def_cfa_offset 40 push rbp .cfi_def_cfa_offset 48 push rbx .cfi_def_cfa_offset 56 sub rsp, 760072 .cfi_def_cfa_offset 760128 lea rax, [rsp+320016] movaps xmm1, XMMWORD PTR .LC0[rip] mov rdx, rax add rdx, 320000 .p2align 4,,10 .p2align 3 .L10: movaps XMMWORD PTR [rax], xmm1 movaps XMMWORD PTR [rax+16], xmm0 add rax, 32 cmp rdx, rax jne .L10 .cfi_offset 3, -56 .cfi_offset 6, -48 .cfi_offset 12, -40 .cfi_offset 13, -32 .cfi_offset 14, -24 .cfi_offset 15, -16 lea rdx, [rsp+760016] lea rax, [rsp+640016] .p2align 4,,10 .p2align 3 .L11: mov DWORD PTR [rax], -1 mov DWORD PTR [rax+4], 0x00000000 mov DWORD PTR [rax+8], 0 add rax, 12 cmp rax, rdx jne .L11 lea rax, [rsp+320032] lea rdx, [rsp+640016] lea rcx, [rsp+640032] .p2align 4,,10 .p2align 3 .L12: mov DWORD PTR [rax-4], 0x3f800000 mov DWORD PTR [rax], 0x00000000 mov DWORD PTR [rax+4], 0x00000000 mov DWORD PTR [rax+8], 0x3f800000 mov DWORD PTR [rax+12], 0x00000000 mov DWORD PTR [rdx], 0 mov DWORD PTR [rdx+4], 0x3f800000 mov DWORD PTR [rdx+8], 1 add rax, 32 add rdx, 12 cmp rax, rcx jne .L12 xor eax, eax lea rbp, [rsp+16] lea rbx, [rsp+760016] mov ecx, 12 mov rdi, rbx mov r12, rbp rep stosd add r12, 320000 mov rax, rbp xorps xmm0, xmm0 .p2align 4,,10 .p2align 3 .L13: movaps XMMWORD PTR [rax], xmm0 add rax, 16 cmp rax, r12 jne .L13 call clock xor r14d, r14d mov r15, rax mov QWORD PTR [rsp+8], rax add r15, 999999 call clock lea r13, [rsp+639984] cmp rax, r15 jg .L18 .L29: lea rsi, [rsp+16] lea rcx, [rsp+320016] lea rdx, [rsp+640016] .p2align 4,,10 .p2align 3 .L19: movss xmm9, DWORD PTR [rdx+4] mov eax, DWORD PTR [rdx] lea rax, [rax+rax*2] sal rax, 4 lea rax, [rbx+rax] movss xmm7, DWORD PTR [rax] movss xmm8, DWORD PTR [rax+4] movss xmm6, DWORD PTR [rax+8] movss xmm11, DWORD PTR [rax+12] movss xmm4, DWORD PTR [rax+16] movss xmm5, DWORD PTR [rax+20] movss xmm3, DWORD PTR [rax+24] movss xmm10, DWORD PTR [rax+28] movss xmm1, DWORD PTR [rax+32] movss xmm2, DWORD PTR [rax+36] movss xmm0, DWORD PTR [rax+40] mulss xmm7, xmm9 mulss xmm8, xmm9 mulss xmm6, xmm9 mulss xmm11, xmm9 mulss xmm4, xmm9 mulss xmm5, xmm9 mulss xmm3, xmm9 mulss xmm10, xmm9 mulss xmm1, xmm9 mulss xmm2, xmm9 mulss xmm0, xmm9 mulss xmm9, DWORD PTR [rax+44] mov eax, DWORD PTR [rdx+8] add rdx, 12 test eax, eax jne .L15 .p2align 4,,10 .p2align 3 .L21: movss xmm12, DWORD PTR [rdx+4] mov eax, DWORD PTR [rdx] lea rax, [rax+rax*2] sal rax, 4 lea rax, [rbx+rax] movss xmm13, DWORD PTR [rax] mulss xmm13, xmm12 addss xmm7, xmm13 movss xmm13, DWORD PTR [rax+4] mulss xmm13, xmm12 addss xmm8, xmm13 movss xmm13, DWORD PTR [rax+8] mulss xmm13, xmm12 addss xmm6, xmm13 movss xmm13, DWORD PTR [rax+12] mulss xmm13, xmm12 addss xmm11, xmm13 movss xmm13, DWORD PTR [rax+16] mulss xmm13, xmm12 addss xmm4, xmm13 movss xmm13, DWORD PTR [rax+20] mulss xmm13, xmm12 addss xmm5, xmm13 movss xmm13, DWORD PTR [rax+24] mulss xmm13, xmm12 addss xmm3, xmm13 movss xmm13, DWORD PTR [rax+28] mulss xmm13, xmm12 addss xmm10, xmm13 movss xmm13, DWORD PTR [rax+32] mulss xmm13, xmm12 addss xmm1, xmm13 movss xmm13, DWORD PTR [rax+36] mulss xmm13, xmm12 addss xmm2, xmm13 movss xmm13, DWORD PTR [rax+40] mulss xmm13, xmm12 mulss xmm12, DWORD PTR [rax+44] addss xmm0, xmm13 mov eax, DWORD PTR [rdx+8] addss xmm9, xmm12 add rdx, 12 test eax, eax je .L21 .L15: movss xmm14, DWORD PTR [rcx] movss xmm13, DWORD PTR [rcx+4] movaps xmm12, xmm7 movaps xmm15, xmm8 mulss xmm12, xmm14 mulss xmm15, xmm13 cmp rcx, r13 addss xmm12, xmm15 addss xmm12, xmm11 movss xmm11, DWORD PTR [rcx+8] mulss xmm11, xmm6 addss xmm12, xmm11 movaps xmm11, xmm4 movss DWORD PTR [rsi], xmm12 mulss xmm11, xmm14 movaps xmm12, xmm5 mulss xmm14, xmm1 mulss xmm12, xmm13 mulss xmm13, xmm2 addss xmm11, xmm12 addss xmm14, xmm13 addss xmm11, xmm10 addss xmm14, xmm9 movss xmm10, DWORD PTR [rcx+8] movss xmm9, DWORD PTR [rcx+8] mulss xmm10, xmm3 mulss xmm9, xmm0 addss xmm11, xmm10 addss xmm14, xmm9 movss xmm10, DWORD PTR [rcx+16] movss DWORD PTR [rsi+4], xmm11 movss DWORD PTR [rsi+8], xmm14 movss xmm11, DWORD PTR [rcx+20] movss xmm9, DWORD PTR [rcx+24] mulss xmm8, xmm11 mulss xmm7, xmm10 mulss xmm6, xmm9 addss xmm8, xmm7 mulss xmm5, xmm11 addss xmm8, xmm6 mulss xmm4, xmm10 mulss xmm3, xmm9 addss xmm5, xmm4 mulss xmm2, xmm11 addss xmm5, xmm3 mulss xmm1, xmm10 mulss xmm0, xmm9 addss xmm2, xmm1 movss DWORD PTR [rsi+16], xmm8 addss xmm2, xmm0 movss DWORD PTR [rsi+20], xmm5 movss DWORD PTR [rsi+24], xmm2 je .L28 add rcx, 32 add rsi, 32 jmp .L19 .L28: add r14, 10000 call clock cmp rax, r15 jle .L29 .L18: call clock xorps xmm5, xmm5 mov rcx, rax sub rcx, QWORD PTR [rsp+8] .p2align 4,,10 .p2align 3 .L20: movaps xmm3, XMMWORD PTR [rbp+16] movaps xmm0, XMMWORD PTR [rbp+48] movaps xmm2, XMMWORD PTR [rbp+0] movaps xmm1, XMMWORD PTR [rbp+32] movaps xmm4, xmm2 add rbp, 64 shufps xmm4, xmm3, 136 shufps xmm2, xmm3, 221 movaps xmm6, xmm4 movaps xmm3, xmm1 cmp r12, rbp shufps xmm3, xmm0, 136 shufps xmm1, xmm0, 221 shufps xmm6, xmm3, 136 movaps xmm0, xmm2 shufps xmm4, xmm3, 221 shufps xmm0, xmm1, 136 shufps xmm2, xmm1, 221 addps xmm0, xmm6 addps xmm0, xmm4 addps xmm0, xmm2 addps xmm5, xmm0 jne .L20 imul rax, r14, 1000000 haddps xmm5, xmm5 mov rdx, rax haddps xmm5, xmm5 sar rdx, 63 movaps xmm0, xmm5 idiv rcx cvtss2sd xmm0, xmm0 mov edx, eax mov esi, OFFSET FLAT:.LC3 mov edi, 1 mov eax, 1 call __printf_chk add rsp, 760072 .cfi_def_cfa_offset 56 xor eax, eax pop rbx .cfi_def_cfa_offset 48 pop rbp .cfi_def_cfa_offset 40 pop r12 .cfi_def_cfa_offset 32 pop r13 .cfi_def_cfa_offset 24 pop r14 .cfi_def_cfa_offset 16 pop r15 .cfi_def_cfa_offset 8 ret .cfi_endproc .LFE1418: .size main, .-main .section .rodata.cst16,"aM",@progbits,16 .align 16 .LC0: .long 0 .long 0 .long 0 .long 1065353216 .ident "GCC: (Ubuntu/Linaro 4.5.2-8ubuntu4) 4.5.2" .section .note.GNU-stack,"",@progbits