[phobos] std.algorithm.sort slow as molasses
David Simcha
dsimcha at gmail.com
Fri Jul 2 14:27:41 PDT 2010
Sorry, the list wouldn't post my accidental double paste anyhow because it
was too big. Here's the disassembly of partition, showing only back() is
getting called and pred seems to be getting inlined, contrary to what I had
believed before. So changing enforce() in back() to assert() gets rid of
about half the overhead, but I have no idea where the other half is coming
from.
_D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2062VAyaa1_610889BE3F182A6EA5575A2D298A20D100
PROC NEAR
; COMDEF
_D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2062VAyaa1_610889BE3F182A6EA5575A2D298A20D100
sub esp, 36 ; 0000 _ 83. EC, 24
mov ecx, dword ptr [esp+2CH] ; 0003 _ 8B. 4C 24,
2C
push ebx ; 0007 _ 53
mov ebx, dword ptr [esp+2CH] ; 0008 _ 8B. 5C 24,
2C
test ebx, ebx ; 000C _ 85. DB
push ebp ; 000E _ 55
push esi ; 000F _ 56
mov esi, eax ; 0010 _ 89. C6
push edi ; 0012 _ 57
jnz ?_020 ; 0013 _ 75, 0E
pop edi ; 0015 _ 5F
mov eax, ebx ; 0016 _ 8B. C3
mov edx, ecx ; 0018 _ 8B. D1
pop esi ; 001A _ 5E
pop ebp ; 001B _ 5D
pop ebx ; 001C _ 5B
add esp, 36 ; 001D _ 83. C4, 24
ret 8 ; 0020 _ C2, 0008
?_020: mov dword ptr [esp+3CH], ecx ; 0023 _ 89. 4C 24,
3C
mov edx, dword ptr [esp+3CH] ; 0027 _ 8B. 54 24,
3C
mov dword ptr [esp+38H], ebx ; 002B _ 89. 5C 24,
38
mov ebx, dword ptr [esp+38H] ; 002F _ 8B. 5C 24,
38
mov dword ptr [esp+14H], ebx ; 0033 _ 89. 5C 24,
14
mov dword ptr [esp+18H], edx ; 0037 _ 89. 54 24,
18
cmp dword ptr [esp+38H], 0 ; 003B _ 83. 7C 24,
38, 00
je ?_023 ; 0040 _ 0F 84,
00000088
?_021: mov edx, dword ptr [esp+3CH] ; 0046 _ 8B. 54 24,
3C
mov eax, dword ptr [esp+38H] ; 004A _ 8B. 44 24,
38
mov ebx, dword ptr [edx] ; 004E _ 8B. 1A
push dword ptr [esi+0CH] ; 0050 _ FF. 76, 0C
push dword ptr [esi+8H] ; 0053 _ FF. 76, 08
mov dword ptr [esp+18H], edx ; 0056 _ 89. 54 24,
18
call _D3std5array12__T4backTAiZ4backFNcAiZi ; 005A _ E8,
00000000(rel)
cmp dword ptr [eax], ebx ; 005F _ 39. 18
jle ?_022 ; 0061 _ 7E, 31
mov edi, dword ptr [esp+38H] ; 0063 _ 8B. 7C 24,
38
mov ecx, dword ptr [esp+10H] ; 0067 _ 8B. 4C 24,
10
dec edi ; 006B _ 4F
lea edx, [ecx+4H] ; 006C _ 8D. 51, 04
mov ebx, dword ptr [esp+14H] ; 006F _ 8B. 5C 24,
14
dec ebx ; 0073 _ 4B
mov dword ptr [esp+3CH], edx ; 0074 _ 89. 54 24,
3C
mov edx, dword ptr [esp+18H] ; 0078 _ 8B. 54 24,
18
mov eax, dword ptr [esp+14H] ; 007C _ 8B. 44 24,
14
mov dword ptr [esp+38H], edi ; 0080 _ 89. 7C 24,
38
add edx, 4 ; 0084 _ 83. C2, 04
mov dword ptr [esp+14H], ebx ; 0087 _ 89. 5C 24,
14
mov dword ptr [esp+18H], edx ; 008B _ 89. 54 24,
18
jmp ?_025 ; 008F _ E9,
000000AA
?_022: push dword ptr [esp+3CH] ; 0094 _ FF. 74 24,
3C
push dword ptr [esp+3CH] ; 0098 _ FF. 74 24,
3C
call _D3std5array12__T4backTAiZ4backFNcAiZi ; 009C _ E8,
00000000(rel)
mov ebx, dword ptr [eax] ; 00A1 _ 8B. 18
push dword ptr [esi+0CH] ; 00A3 _ FF. 76, 0C
push dword ptr [esi+8H] ; 00A6 _ FF. 76, 08
call _D3std5array12__T4backTAiZ4backFNcAiZi ; 00A9 _ E8,
00000000(rel)
cmp dword ptr [eax], ebx ; 00AE _ 39. 18
jg ?_024 ; 00B0 _ 7F, 2E
mov edi, dword ptr [esp+38H] ; 00B2 _ 8B. 7C 24,
38
dec edi ; 00B6 _ 4F
mov eax, dword ptr [esp+38H] ; 00B7 _ 8B. 44 24,
38
mov dword ptr [esp+38H], edi ; 00BB _ 89. 7C 24,
38
mov edx, dword ptr [esp+3CH] ; 00BF _ 8B. 54 24,
3C
mov dword ptr [esp+3CH], edx ; 00C3 _ 89. 54 24,
3C
cmp dword ptr [esp+38H], 0 ; 00C7 _ 83. 7C 24,
38, 00
jnz ?_022 ; 00CC _ 75, C6
?_023: mov edx, dword ptr [esp+18H] ; 00CE _ 8B. 54 24,
18
mov eax, dword ptr [esp+14H] ; 00D2 _ 8B. 44 24,
14
pop edi ; 00D6 _ 5F
pop esi ; 00D7 _ 5E
pop ebp ; 00D8 _ 5D
pop ebx ; 00D9 _ 5B
add esp, 36 ; 00DA _ 83. C4, 24
ret 8 ; 00DD _ C2, 0008
?_024: push dword ptr [esp+3CH] ; 00E0 _ FF. 74 24,
3C
push dword ptr [esp+3CH] ; 00E4 _ FF. 74 24,
3C
call _D3std5array12__T4backTAiZ4backFNcAiZi ; 00E8 _ E8,
00000000(rel)
mov edi, eax ; 00ED _ 89. C7
mov edx, dword ptr [esp+3CH] ; 00EF _ 8B. 54 24,
3C
mov ebp, edx ; 00F3 _ 89. D5
mov eax, dword ptr [esp+38H] ; 00F5 _ 8B. 44 24,
38
; Note: Zero displacement could be omitted
mov ecx, dword ptr [edx] ; 00F9 _ 8B. 4A, 00
mov ebx, dword ptr [edi] ; 00FC _ 8B. 1F
lea edx, [edx+4H] ; 00FE _ 8D. 52, 04
mov dword ptr [ebp], ebx ; 0101 _ 89. 5D, 00
mov eax, dword ptr [esp+38H] ; 0104 _ 8B. 44 24,
38
dec eax ; 0108 _ 48
mov dword ptr [edi], ecx ; 0109 _ 89. 0F
mov ebx, dword ptr [esp+14H] ; 010B _ 8B. 5C 24,
14
mov ecx, dword ptr [esp+18H] ; 010F _ 8B. 4C 24,
18
mov dword ptr [esp+38H], eax ; 0113 _ 89. 44 24,
38
dec ebx ; 0117 _ 4B
mov eax, dword ptr [esp+14H] ; 0118 _ 8B. 44 24,
14
mov dword ptr [esp+14H], ebx ; 011C _ 89. 5C 24,
14
add ecx, 4 ; 0120 _ 83. C1, 04
mov ebx, dword ptr [esp+38H] ; 0123 _ 8B. 5C 24,
38
mov dword ptr [esp+18H], ecx ; 0127 _ 89. 4C 24,
18
dec ebx ; 012B _ 4B
mov eax, dword ptr [esp+38H] ; 012C _ 8B. 44 24,
38
mov dword ptr [esp+3CH], edx ; 0130 _ 89. 54 24,
3C
mov ecx, edx ; 0134 _ 8B. CA
mov dword ptr [esp+38H], ebx ; 0136 _ 89. 5C 24,
38
mov dword ptr [esp+3CH], ecx ; 013A _ 89. 4C 24,
3C
?_025: cmp dword ptr [esp+38H], 0 ; 013E _ 83. 7C 24,
38, 00
jne ?_021 ; 0143 _ 0F 85,
FFFFFEFD
; Note: Immediate operand could be made smaller by sign extension
jmp ?_023 ; 0149 _ E9,
FFFFFF80
_D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2062VAyaa1_610889BE3F182A6EA5575A2D298A20D100
ENDP
On Fri, Jul 2, 2010 at 5:25 PM, David Simcha <dsimcha at gmail.com> wrote:
> Sorry for the double paste. Please ignore everything before the second
> time you see
> _D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2...
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.puremagic.com/pipermail/phobos/attachments/20100702/11298817/attachment-0001.html>
More information about the phobos
mailing list