[phobos] std.algorithm.sort slow as molasses
Andrei Alexandrescu
andrei at erdani.com
Fri Jul 2 14:34:09 PDT 2010
Couldn't some of the overhead come from the overhead of calling back()?
Andrei
David Simcha wrote:
> Sorry, the list wouldn't post my accidental double paste anyhow because
> it was too big. Here's the disassembly of partition, showing only
> back() is getting called and pred seems to be getting inlined, contrary
> to what I had believed before. So changing enforce() in back() to
> assert() gets rid of about half the overhead, but I have no idea where
> the other half is coming from.
>
> _D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2062VAyaa1_610889BE3F182A6EA5575A2D298A20D100
> PROC NEAR
> ; COMDEF
> _D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2062VAyaa1_610889BE3F182A6EA5575A2D298A20D100
> sub esp, 36 ; 0000 _ 83. EC, 24
> mov ecx, dword ptr [esp+2CH] ; 0003 _ 8B. 4C
> 24, 2C
> push ebx ; 0007 _ 53
> mov ebx, dword ptr [esp+2CH] ; 0008 _ 8B. 5C
> 24, 2C
> test ebx, ebx ; 000C _ 85. DB
> push ebp ; 000E _ 55
> push esi ; 000F _ 56
> mov esi, eax ; 0010 _ 89. C6
> push edi ; 0012 _ 57
> jnz ?_020 ; 0013 _ 75, 0E
> pop edi ; 0015 _ 5F
> mov eax, ebx ; 0016 _ 8B. C3
> mov edx, ecx ; 0018 _ 8B. D1
> pop esi ; 001A _ 5E
> pop ebp ; 001B _ 5D
> pop ebx ; 001C _ 5B
> add esp, 36 ; 001D _ 83. C4, 24
> ret 8 ; 0020 _ C2, 0008
>
> ?_020: mov dword ptr [esp+3CH], ecx ; 0023 _ 89. 4C
> 24, 3C
> mov edx, dword ptr [esp+3CH] ; 0027 _ 8B. 54
> 24, 3C
> mov dword ptr [esp+38H], ebx ; 002B _ 89. 5C
> 24, 38
> mov ebx, dword ptr [esp+38H] ; 002F _ 8B. 5C
> 24, 38
> mov dword ptr [esp+14H], ebx ; 0033 _ 89. 5C
> 24, 14
> mov dword ptr [esp+18H], edx ; 0037 _ 89. 54
> 24, 18
> cmp dword ptr [esp+38H], 0 ; 003B _ 83. 7C
> 24, 38, 00
> je ?_023 ; 0040 _ 0F 84,
> 00000088
> ?_021: mov edx, dword ptr [esp+3CH] ; 0046 _ 8B. 54
> 24, 3C
> mov eax, dword ptr [esp+38H] ; 004A _ 8B. 44
> 24, 38
> mov ebx, dword ptr [edx] ; 004E _ 8B. 1A
> push dword ptr [esi+0CH] ; 0050 _ FF. 76, 0C
> push dword ptr [esi+8H] ; 0053 _ FF. 76, 08
> mov dword ptr [esp+18H], edx ; 0056 _ 89. 54
> 24, 18
> call _D3std5array12__T4backTAiZ4backFNcAiZi ; 005A _ E8,
> 00000000(rel)
> cmp dword ptr [eax], ebx ; 005F _ 39. 18
> jle ?_022 ; 0061 _ 7E, 31
> mov edi, dword ptr [esp+38H] ; 0063 _ 8B. 7C
> 24, 38
> mov ecx, dword ptr [esp+10H] ; 0067 _ 8B. 4C
> 24, 10
> dec edi ; 006B _ 4F
> lea edx, [ecx+4H] ; 006C _ 8D. 51, 04
> mov ebx, dword ptr [esp+14H] ; 006F _ 8B. 5C
> 24, 14
> dec ebx ; 0073 _ 4B
> mov dword ptr [esp+3CH], edx ; 0074 _ 89. 54
> 24, 3C
> mov edx, dword ptr [esp+18H] ; 0078 _ 8B. 54
> 24, 18
> mov eax, dword ptr [esp+14H] ; 007C _ 8B. 44
> 24, 14
> mov dword ptr [esp+38H], edi ; 0080 _ 89. 7C
> 24, 38
> add edx, 4 ; 0084 _ 83. C2, 04
> mov dword ptr [esp+14H], ebx ; 0087 _ 89. 5C
> 24, 14
> mov dword ptr [esp+18H], edx ; 008B _ 89. 54
> 24, 18
> jmp ?_025 ; 008F _ E9,
> 000000AA
>
> ?_022: push dword ptr [esp+3CH] ; 0094 _ FF. 74
> 24, 3C
> push dword ptr [esp+3CH] ; 0098 _ FF. 74
> 24, 3C
> call _D3std5array12__T4backTAiZ4backFNcAiZi ; 009C _ E8,
> 00000000(rel)
> mov ebx, dword ptr [eax] ; 00A1 _ 8B. 18
> push dword ptr [esi+0CH] ; 00A3 _ FF. 76, 0C
> push dword ptr [esi+8H] ; 00A6 _ FF. 76, 08
> call _D3std5array12__T4backTAiZ4backFNcAiZi ; 00A9 _ E8,
> 00000000(rel)
> cmp dword ptr [eax], ebx ; 00AE _ 39. 18
> jg ?_024 ; 00B0 _ 7F, 2E
> mov edi, dword ptr [esp+38H] ; 00B2 _ 8B. 7C
> 24, 38
> dec edi ; 00B6 _ 4F
> mov eax, dword ptr [esp+38H] ; 00B7 _ 8B. 44
> 24, 38
> mov dword ptr [esp+38H], edi ; 00BB _ 89. 7C
> 24, 38
> mov edx, dword ptr [esp+3CH] ; 00BF _ 8B. 54
> 24, 3C
> mov dword ptr [esp+3CH], edx ; 00C3 _ 89. 54
> 24, 3C
> cmp dword ptr [esp+38H], 0 ; 00C7 _ 83. 7C
> 24, 38, 00
> jnz ?_022 ; 00CC _ 75, C6
> ?_023: mov edx, dword ptr [esp+18H] ; 00CE _ 8B. 54
> 24, 18
> mov eax, dword ptr [esp+14H] ; 00D2 _ 8B. 44
> 24, 14
> pop edi ; 00D6 _ 5F
> pop esi ; 00D7 _ 5E
> pop ebp ; 00D8 _ 5D
> pop ebx ; 00D9 _ 5B
> add esp, 36 ; 00DA _ 83. C4, 24
> ret 8 ; 00DD _ C2, 0008
>
> ?_024: push dword ptr [esp+3CH] ; 00E0 _ FF. 74
> 24, 3C
> push dword ptr [esp+3CH] ; 00E4 _ FF. 74
> 24, 3C
> call _D3std5array12__T4backTAiZ4backFNcAiZi ; 00E8 _ E8,
> 00000000(rel)
> mov edi, eax ; 00ED _ 89. C7
> mov edx, dword ptr [esp+3CH] ; 00EF _ 8B. 54
> 24, 3C
> mov ebp, edx ; 00F3 _ 89. D5
> mov eax, dword ptr [esp+38H] ; 00F5 _ 8B. 44
> 24, 38
> ; Note: Zero displacement could be omitted
> mov ecx, dword ptr [edx] ; 00F9 _ 8B. 4A, 00
> mov ebx, dword ptr [edi] ; 00FC _ 8B. 1F
> lea edx, [edx+4H] ; 00FE _ 8D. 52, 04
> mov dword ptr [ebp], ebx ; 0101 _ 89. 5D, 00
> mov eax, dword ptr [esp+38H] ; 0104 _ 8B. 44
> 24, 38
> dec eax ; 0108 _ 48
> mov dword ptr [edi], ecx ; 0109 _ 89. 0F
> mov ebx, dword ptr [esp+14H] ; 010B _ 8B. 5C
> 24, 14
> mov ecx, dword ptr [esp+18H] ; 010F _ 8B. 4C
> 24, 18
> mov dword ptr [esp+38H], eax ; 0113 _ 89. 44
> 24, 38
> dec ebx ; 0117 _ 4B
> mov eax, dword ptr [esp+14H] ; 0118 _ 8B. 44
> 24, 14
> mov dword ptr [esp+14H], ebx ; 011C _ 89. 5C
> 24, 14
> add ecx, 4 ; 0120 _ 83. C1, 04
> mov ebx, dword ptr [esp+38H] ; 0123 _ 8B. 5C
> 24, 38
> mov dword ptr [esp+18H], ecx ; 0127 _ 89. 4C
> 24, 18
> dec ebx ; 012B _ 4B
> mov eax, dword ptr [esp+38H] ; 012C _ 8B. 44
> 24, 38
> mov dword ptr [esp+3CH], edx ; 0130 _ 89. 54
> 24, 3C
> mov ecx, edx ; 0134 _ 8B. CA
> mov dword ptr [esp+38H], ebx ; 0136 _ 89. 5C
> 24, 38
> mov dword ptr [esp+3CH], ecx ; 013A _ 89. 4C
> 24, 3C
> ?_025: cmp dword ptr [esp+38H], 0 ; 013E _ 83. 7C
> 24, 38, 00
> jne ?_021 ; 0143 _ 0F 85,
> FFFFFEFD
> ; Note: Immediate operand could be made smaller by sign extension
> jmp ?_023 ; 0149 _ E9,
> FFFFFF80
> _D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2062VAyaa1_610889BE3F182A6EA5575A2D298A20D100
> ENDP
>
> On Fri, Jul 2, 2010 at 5:25 PM, David Simcha <dsimcha at gmail.com
> <mailto:dsimcha at gmail.com>> wrote:
>
> Sorry for the double paste. Please ignore everything before the
> second time you see
> _D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2...
>
>
>
>
> ------------------------------------------------------------------------
>
> _______________________________________________
> phobos mailing list
> phobos at puremagic.com
> http://lists.puremagic.com/mailman/listinfo/phobos
More information about the phobos
mailing list