[phobos] std.algorithm.sort slow as molasses

Andrei Alexandrescu andrei at erdani.com
Fri Jul 2 14:34:09 PDT 2010


Couldn't some of the overhead come from the overhead of calling back()?

Andrei

David Simcha wrote:
> Sorry, the list wouldn't post my accidental double paste anyhow because 
> it was too big.  Here's the disassembly of partition, showing only 
> back() is getting called and pred seems to be getting inlined, contrary 
> to what I had believed before.  So changing enforce() in back() to 
> assert() gets rid of about half the overhead, but I have no idea where 
> the other half is coming from.
> 
> _D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2062VAyaa1_610889BE3F182A6EA5575A2D298A20D100 
> PROC NEAR
> ;  COMDEF 
> _D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2062VAyaa1_610889BE3F182A6EA5575A2D298A20D100
>         sub     esp, 36                                 ; 0000 _ 83. EC, 24
>         mov     ecx, dword ptr [esp+2CH]                ; 0003 _ 8B. 4C 
> 24, 2C
>         push    ebx                                     ; 0007 _ 53
>         mov     ebx, dword ptr [esp+2CH]                ; 0008 _ 8B. 5C 
> 24, 2C
>         test    ebx, ebx                                ; 000C _ 85. DB
>         push    ebp                                     ; 000E _ 55
>         push    esi                                     ; 000F _ 56
>         mov     esi, eax                                ; 0010 _ 89. C6
>         push    edi                                     ; 0012 _ 57
>         jnz     ?_020                                   ; 0013 _ 75, 0E
>         pop     edi                                     ; 0015 _ 5F
>         mov     eax, ebx                                ; 0016 _ 8B. C3
>         mov     edx, ecx                                ; 0018 _ 8B. D1
>         pop     esi                                     ; 001A _ 5E
>         pop     ebp                                     ; 001B _ 5D
>         pop     ebx                                     ; 001C _ 5B
>         add     esp, 36                                 ; 001D _ 83. C4, 24
>         ret     8                                       ; 0020 _ C2, 0008
> 
> ?_020:  mov     dword ptr [esp+3CH], ecx                ; 0023 _ 89. 4C 
> 24, 3C
>         mov     edx, dword ptr [esp+3CH]                ; 0027 _ 8B. 54 
> 24, 3C
>         mov     dword ptr [esp+38H], ebx                ; 002B _ 89. 5C 
> 24, 38
>         mov     ebx, dword ptr [esp+38H]                ; 002F _ 8B. 5C 
> 24, 38
>         mov     dword ptr [esp+14H], ebx                ; 0033 _ 89. 5C 
> 24, 14
>         mov     dword ptr [esp+18H], edx                ; 0037 _ 89. 54 
> 24, 18
>         cmp     dword ptr [esp+38H], 0                  ; 003B _ 83. 7C 
> 24, 38, 00
>         je      ?_023                                   ; 0040 _ 0F 84, 
> 00000088
> ?_021:  mov     edx, dword ptr [esp+3CH]                ; 0046 _ 8B. 54 
> 24, 3C
>         mov     eax, dword ptr [esp+38H]                ; 004A _ 8B. 44 
> 24, 38
>         mov     ebx, dword ptr [edx]                    ; 004E _ 8B. 1A
>         push    dword ptr [esi+0CH]                     ; 0050 _ FF. 76, 0C
>         push    dword ptr [esi+8H]                      ; 0053 _ FF. 76, 08
>         mov     dword ptr [esp+18H], edx                ; 0056 _ 89. 54 
> 24, 18
>         call    _D3std5array12__T4backTAiZ4backFNcAiZi  ; 005A _ E8, 
> 00000000(rel)
>         cmp     dword ptr [eax], ebx                    ; 005F _ 39. 18
>         jle     ?_022                                   ; 0061 _ 7E, 31
>         mov     edi, dword ptr [esp+38H]                ; 0063 _ 8B. 7C 
> 24, 38
>         mov     ecx, dword ptr [esp+10H]                ; 0067 _ 8B. 4C 
> 24, 10
>         dec     edi                                     ; 006B _ 4F
>         lea     edx, [ecx+4H]                           ; 006C _ 8D. 51, 04
>         mov     ebx, dword ptr [esp+14H]                ; 006F _ 8B. 5C 
> 24, 14
>         dec     ebx                                     ; 0073 _ 4B
>         mov     dword ptr [esp+3CH], edx                ; 0074 _ 89. 54 
> 24, 3C
>         mov     edx, dword ptr [esp+18H]                ; 0078 _ 8B. 54 
> 24, 18
>         mov     eax, dword ptr [esp+14H]                ; 007C _ 8B. 44 
> 24, 14
>         mov     dword ptr [esp+38H], edi                ; 0080 _ 89. 7C 
> 24, 38
>         add     edx, 4                                  ; 0084 _ 83. C2, 04
>         mov     dword ptr [esp+14H], ebx                ; 0087 _ 89. 5C 
> 24, 14
>         mov     dword ptr [esp+18H], edx                ; 008B _ 89. 54 
> 24, 18
>         jmp     ?_025                                   ; 008F _ E9, 
> 000000AA
> 
> ?_022:  push    dword ptr [esp+3CH]                     ; 0094 _ FF. 74 
> 24, 3C
>         push    dword ptr [esp+3CH]                     ; 0098 _ FF. 74 
> 24, 3C
>         call    _D3std5array12__T4backTAiZ4backFNcAiZi  ; 009C _ E8, 
> 00000000(rel)
>         mov     ebx, dword ptr [eax]                    ; 00A1 _ 8B. 18
>         push    dword ptr [esi+0CH]                     ; 00A3 _ FF. 76, 0C
>         push    dword ptr [esi+8H]                      ; 00A6 _ FF. 76, 08
>         call    _D3std5array12__T4backTAiZ4backFNcAiZi  ; 00A9 _ E8, 
> 00000000(rel)
>         cmp     dword ptr [eax], ebx                    ; 00AE _ 39. 18
>         jg      ?_024                                   ; 00B0 _ 7F, 2E
>         mov     edi, dword ptr [esp+38H]                ; 00B2 _ 8B. 7C 
> 24, 38
>         dec     edi                                     ; 00B6 _ 4F
>         mov     eax, dword ptr [esp+38H]                ; 00B7 _ 8B. 44 
> 24, 38
>         mov     dword ptr [esp+38H], edi                ; 00BB _ 89. 7C 
> 24, 38
>         mov     edx, dword ptr [esp+3CH]                ; 00BF _ 8B. 54 
> 24, 3C
>         mov     dword ptr [esp+3CH], edx                ; 00C3 _ 89. 54 
> 24, 3C
>         cmp     dword ptr [esp+38H], 0                  ; 00C7 _ 83. 7C 
> 24, 38, 00
>         jnz     ?_022                                   ; 00CC _ 75, C6
> ?_023:  mov     edx, dword ptr [esp+18H]                ; 00CE _ 8B. 54 
> 24, 18
>         mov     eax, dword ptr [esp+14H]                ; 00D2 _ 8B. 44 
> 24, 14
>         pop     edi                                     ; 00D6 _ 5F
>         pop     esi                                     ; 00D7 _ 5E
>         pop     ebp                                     ; 00D8 _ 5D
>         pop     ebx                                     ; 00D9 _ 5B
>         add     esp, 36                                 ; 00DA _ 83. C4, 24
>         ret     8                                       ; 00DD _ C2, 0008
> 
> ?_024:  push    dword ptr [esp+3CH]                     ; 00E0 _ FF. 74 
> 24, 3C
>         push    dword ptr [esp+3CH]                     ; 00E4 _ FF. 74 
> 24, 3C
>         call    _D3std5array12__T4backTAiZ4backFNcAiZi  ; 00E8 _ E8, 
> 00000000(rel)
>         mov     edi, eax                                ; 00ED _ 89. C7
>         mov     edx, dword ptr [esp+3CH]                ; 00EF _ 8B. 54 
> 24, 3C
>         mov     ebp, edx                                ; 00F3 _ 89. D5
>         mov     eax, dword ptr [esp+38H]                ; 00F5 _ 8B. 44 
> 24, 38
> ; Note: Zero displacement could be omitted
>         mov     ecx, dword ptr [edx]                    ; 00F9 _ 8B. 4A, 00
>         mov     ebx, dword ptr [edi]                    ; 00FC _ 8B. 1F
>         lea     edx, [edx+4H]                           ; 00FE _ 8D. 52, 04
>         mov     dword ptr [ebp], ebx                    ; 0101 _ 89. 5D, 00
>         mov     eax, dword ptr [esp+38H]                ; 0104 _ 8B. 44 
> 24, 38
>         dec     eax                                     ; 0108 _ 48
>         mov     dword ptr [edi], ecx                    ; 0109 _ 89. 0F
>         mov     ebx, dword ptr [esp+14H]                ; 010B _ 8B. 5C 
> 24, 14
>         mov     ecx, dword ptr [esp+18H]                ; 010F _ 8B. 4C 
> 24, 18
>         mov     dword ptr [esp+38H], eax                ; 0113 _ 89. 44 
> 24, 38
>         dec     ebx                                     ; 0117 _ 4B
>         mov     eax, dword ptr [esp+14H]                ; 0118 _ 8B. 44 
> 24, 14
>         mov     dword ptr [esp+14H], ebx                ; 011C _ 89. 5C 
> 24, 14
>         add     ecx, 4                                  ; 0120 _ 83. C1, 04
>         mov     ebx, dword ptr [esp+38H]                ; 0123 _ 8B. 5C 
> 24, 38
>         mov     dword ptr [esp+18H], ecx                ; 0127 _ 89. 4C 
> 24, 18
>         dec     ebx                                     ; 012B _ 4B
>         mov     eax, dword ptr [esp+38H]                ; 012C _ 8B. 44 
> 24, 38
>         mov     dword ptr [esp+3CH], edx                ; 0130 _ 89. 54 
> 24, 3C
>         mov     ecx, edx                                ; 0134 _ 8B. CA
>         mov     dword ptr [esp+38H], ebx                ; 0136 _ 89. 5C 
> 24, 38
>         mov     dword ptr [esp+3CH], ecx                ; 013A _ 89. 4C 
> 24, 3C
> ?_025:  cmp     dword ptr [esp+38H], 0                  ; 013E _ 83. 7C 
> 24, 38, 00
>         jne     ?_021                                   ; 0143 _ 0F 85, 
> FFFFFEFD
> ; Note: Immediate operand could be made smaller by sign extension
>         jmp     ?_023                                   ; 0149 _ E9, 
> FFFFFF80
> _D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2062VAyaa1_610889BE3F182A6EA5575A2D298A20D100 
> ENDP
> 
> On Fri, Jul 2, 2010 at 5:25 PM, David Simcha <dsimcha at gmail.com 
> <mailto:dsimcha at gmail.com>> wrote:
> 
>     Sorry for the double paste.  Please ignore everything before the
>     second time you see
>     _D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2...
> 
> 
> 
> 
> ------------------------------------------------------------------------
> 
> _______________________________________________
> phobos mailing list
> phobos at puremagic.com
> http://lists.puremagic.com/mailman/listinfo/phobos


More information about the phobos mailing list