[phobos] std.algorithm.sort slow as molasses

David Simcha dsimcha at gmail.com
Fri Jul 2 14:27:41 PDT 2010


Sorry, the list wouldn't post my accidental double paste anyhow because it
was too big.  Here's the disassembly of partition, showing only back() is
getting called and pred seems to be getting inlined, contrary to what I had
believed before.  So changing enforce() in back() to assert() gets rid of
about half the overhead, but I have no idea where the other half is coming
from.

_D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2062VAyaa1_610889BE3F182A6EA5575A2D298A20D100
PROC NEAR
;  COMDEF
_D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2062VAyaa1_610889BE3F182A6EA5575A2D298A20D100
        sub     esp, 36                                 ; 0000 _ 83. EC, 24
        mov     ecx, dword ptr [esp+2CH]                ; 0003 _ 8B. 4C 24,
2C
        push    ebx                                     ; 0007 _ 53
        mov     ebx, dword ptr [esp+2CH]                ; 0008 _ 8B. 5C 24,
2C
        test    ebx, ebx                                ; 000C _ 85. DB
        push    ebp                                     ; 000E _ 55
        push    esi                                     ; 000F _ 56
        mov     esi, eax                                ; 0010 _ 89. C6
        push    edi                                     ; 0012 _ 57
        jnz     ?_020                                   ; 0013 _ 75, 0E
        pop     edi                                     ; 0015 _ 5F
        mov     eax, ebx                                ; 0016 _ 8B. C3
        mov     edx, ecx                                ; 0018 _ 8B. D1
        pop     esi                                     ; 001A _ 5E
        pop     ebp                                     ; 001B _ 5D
        pop     ebx                                     ; 001C _ 5B
        add     esp, 36                                 ; 001D _ 83. C4, 24
        ret     8                                       ; 0020 _ C2, 0008

?_020:  mov     dword ptr [esp+3CH], ecx                ; 0023 _ 89. 4C 24,
3C
        mov     edx, dword ptr [esp+3CH]                ; 0027 _ 8B. 54 24,
3C
        mov     dword ptr [esp+38H], ebx                ; 002B _ 89. 5C 24,
38
        mov     ebx, dword ptr [esp+38H]                ; 002F _ 8B. 5C 24,
38
        mov     dword ptr [esp+14H], ebx                ; 0033 _ 89. 5C 24,
14
        mov     dword ptr [esp+18H], edx                ; 0037 _ 89. 54 24,
18
        cmp     dword ptr [esp+38H], 0                  ; 003B _ 83. 7C 24,
38, 00
        je      ?_023                                   ; 0040 _ 0F 84,
00000088
?_021:  mov     edx, dword ptr [esp+3CH]                ; 0046 _ 8B. 54 24,
3C
        mov     eax, dword ptr [esp+38H]                ; 004A _ 8B. 44 24,
38
        mov     ebx, dword ptr [edx]                    ; 004E _ 8B. 1A
        push    dword ptr [esi+0CH]                     ; 0050 _ FF. 76, 0C
        push    dword ptr [esi+8H]                      ; 0053 _ FF. 76, 08
        mov     dword ptr [esp+18H], edx                ; 0056 _ 89. 54 24,
18
        call    _D3std5array12__T4backTAiZ4backFNcAiZi  ; 005A _ E8,
00000000(rel)
        cmp     dword ptr [eax], ebx                    ; 005F _ 39. 18
        jle     ?_022                                   ; 0061 _ 7E, 31
        mov     edi, dword ptr [esp+38H]                ; 0063 _ 8B. 7C 24,
38
        mov     ecx, dword ptr [esp+10H]                ; 0067 _ 8B. 4C 24,
10
        dec     edi                                     ; 006B _ 4F
        lea     edx, [ecx+4H]                           ; 006C _ 8D. 51, 04
        mov     ebx, dword ptr [esp+14H]                ; 006F _ 8B. 5C 24,
14
        dec     ebx                                     ; 0073 _ 4B
        mov     dword ptr [esp+3CH], edx                ; 0074 _ 89. 54 24,
3C
        mov     edx, dword ptr [esp+18H]                ; 0078 _ 8B. 54 24,
18
        mov     eax, dword ptr [esp+14H]                ; 007C _ 8B. 44 24,
14
        mov     dword ptr [esp+38H], edi                ; 0080 _ 89. 7C 24,
38
        add     edx, 4                                  ; 0084 _ 83. C2, 04
        mov     dword ptr [esp+14H], ebx                ; 0087 _ 89. 5C 24,
14
        mov     dword ptr [esp+18H], edx                ; 008B _ 89. 54 24,
18
        jmp     ?_025                                   ; 008F _ E9,
000000AA

?_022:  push    dword ptr [esp+3CH]                     ; 0094 _ FF. 74 24,
3C
        push    dword ptr [esp+3CH]                     ; 0098 _ FF. 74 24,
3C
        call    _D3std5array12__T4backTAiZ4backFNcAiZi  ; 009C _ E8,
00000000(rel)
        mov     ebx, dword ptr [eax]                    ; 00A1 _ 8B. 18
        push    dword ptr [esi+0CH]                     ; 00A3 _ FF. 76, 0C
        push    dword ptr [esi+8H]                      ; 00A6 _ FF. 76, 08
        call    _D3std5array12__T4backTAiZ4backFNcAiZi  ; 00A9 _ E8,
00000000(rel)
        cmp     dword ptr [eax], ebx                    ; 00AE _ 39. 18
        jg      ?_024                                   ; 00B0 _ 7F, 2E
        mov     edi, dword ptr [esp+38H]                ; 00B2 _ 8B. 7C 24,
38
        dec     edi                                     ; 00B6 _ 4F
        mov     eax, dword ptr [esp+38H]                ; 00B7 _ 8B. 44 24,
38
        mov     dword ptr [esp+38H], edi                ; 00BB _ 89. 7C 24,
38
        mov     edx, dword ptr [esp+3CH]                ; 00BF _ 8B. 54 24,
3C
        mov     dword ptr [esp+3CH], edx                ; 00C3 _ 89. 54 24,
3C
        cmp     dword ptr [esp+38H], 0                  ; 00C7 _ 83. 7C 24,
38, 00
        jnz     ?_022                                   ; 00CC _ 75, C6
?_023:  mov     edx, dword ptr [esp+18H]                ; 00CE _ 8B. 54 24,
18
        mov     eax, dword ptr [esp+14H]                ; 00D2 _ 8B. 44 24,
14
        pop     edi                                     ; 00D6 _ 5F
        pop     esi                                     ; 00D7 _ 5E
        pop     ebp                                     ; 00D8 _ 5D
        pop     ebx                                     ; 00D9 _ 5B
        add     esp, 36                                 ; 00DA _ 83. C4, 24
        ret     8                                       ; 00DD _ C2, 0008

?_024:  push    dword ptr [esp+3CH]                     ; 00E0 _ FF. 74 24,
3C
        push    dword ptr [esp+3CH]                     ; 00E4 _ FF. 74 24,
3C
        call    _D3std5array12__T4backTAiZ4backFNcAiZi  ; 00E8 _ E8,
00000000(rel)
        mov     edi, eax                                ; 00ED _ 89. C7
        mov     edx, dword ptr [esp+3CH]                ; 00EF _ 8B. 54 24,
3C
        mov     ebp, edx                                ; 00F3 _ 89. D5
        mov     eax, dword ptr [esp+38H]                ; 00F5 _ 8B. 44 24,
38
; Note: Zero displacement could be omitted
        mov     ecx, dword ptr [edx]                    ; 00F9 _ 8B. 4A, 00
        mov     ebx, dword ptr [edi]                    ; 00FC _ 8B. 1F
        lea     edx, [edx+4H]                           ; 00FE _ 8D. 52, 04
        mov     dword ptr [ebp], ebx                    ; 0101 _ 89. 5D, 00
        mov     eax, dword ptr [esp+38H]                ; 0104 _ 8B. 44 24,
38
        dec     eax                                     ; 0108 _ 48
        mov     dword ptr [edi], ecx                    ; 0109 _ 89. 0F
        mov     ebx, dword ptr [esp+14H]                ; 010B _ 8B. 5C 24,
14
        mov     ecx, dword ptr [esp+18H]                ; 010F _ 8B. 4C 24,
18
        mov     dword ptr [esp+38H], eax                ; 0113 _ 89. 44 24,
38
        dec     ebx                                     ; 0117 _ 4B
        mov     eax, dword ptr [esp+14H]                ; 0118 _ 8B. 44 24,
14
        mov     dword ptr [esp+14H], ebx                ; 011C _ 89. 5C 24,
14
        add     ecx, 4                                  ; 0120 _ 83. C1, 04
        mov     ebx, dword ptr [esp+38H]                ; 0123 _ 8B. 5C 24,
38
        mov     dword ptr [esp+18H], ecx                ; 0127 _ 89. 4C 24,
18
        dec     ebx                                     ; 012B _ 4B
        mov     eax, dword ptr [esp+38H]                ; 012C _ 8B. 44 24,
38
        mov     dword ptr [esp+3CH], edx                ; 0130 _ 89. 54 24,
3C
        mov     ecx, edx                                ; 0134 _ 8B. CA
        mov     dword ptr [esp+38H], ebx                ; 0136 _ 89. 5C 24,
38
        mov     dword ptr [esp+3CH], ecx                ; 013A _ 89. 4C 24,
3C
?_025:  cmp     dword ptr [esp+38H], 0                  ; 013E _ 83. 7C 24,
38, 00
        jne     ?_021                                   ; 0143 _ 0F 85,
FFFFFEFD
; Note: Immediate operand could be made smaller by sign extension
        jmp     ?_023                                   ; 0149 _ E9,
FFFFFF80
_D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2062VAyaa1_610889BE3F182A6EA5575A2D298A20D100
ENDP

On Fri, Jul 2, 2010 at 5:25 PM, David Simcha <dsimcha at gmail.com> wrote:

> Sorry for the double paste.  Please ignore everything before the second
> time you see
> _D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2...
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.puremagic.com/pipermail/phobos/attachments/20100702/11298817/attachment-0001.html>


More information about the phobos mailing list