[Issue 9629] toUpperInPlace doesn't work properly with unicode characters

d-bugmail at puremagic.com d-bugmail at puremagic.com
Mon Mar 4 20:56:00 PST 2013


http://d.puremagic.com/issues/show_bug.cgi?id=9629



--- Comment #2 from Andrej Mitrovic <andrej.mitrovich at gmail.com> 2013-03-04 20:55:59 PST ---
This seems like a codegen bug:

import std.ascii;
import std.conv;
import std.stdio;
import std.utf;

void upper(C)(ref C[] s)
{
    for (size_t i = 0; i < s.length; )
    {
        immutable c = s[i];
        if ('a' <= c && c <= 'z')
        {
            s[i++] = cast(C) (c - (cast(C)'a' - 'A'));
        }
        else if (!std.ascii.isASCII(c))
        {
            size_t j = i;
            dchar dc = decode(s, j);
            auto toAdd = to!(C[])(std.uni.toUpper(dc));
            s = s[0 .. i] ~ toAdd  ~ s[j .. $];
            i += toAdd.length;
        }
        else
        {
            ++i;
        }
    }

    writefln("Inside: %s", s);
}

void main()
{
    wchar[] s1 = "þ abcdef"w.dup;
    upper(s1[]);
    writefln("Outside: %s\n", s1);  // Þ ABCDEF
}

If you change the call "upper(s1[]);" to "upper(s1[0..$]);" you get "Þ abcdef".

The assembly looks significantly different between the two calls (removing all
writefln calls first).

The [] version:

__Dmain:; Function begin, communal
        push    ebp                                     ; 0000 _ 55
        mov     ebp, esp                                ; 0001 _ 8B. EC
        sub     esp, 8                                  ; 0003 _ 83. EC, 08
        push    dword [?_0003]                          ; 0006 _ FF. 35,
0000001C(segrel)
        push    dword [?_0002]                          ; 000C _ FF. 35,
00000018(segrel)
        mov     eax, FLAT:_D12TypeInfo_Ayu6__initZ      ; 0012 _ B8,
00000000(segrel)
        push    eax                                     ; 0017 _ 50
        call    __adDupT                                ; 0018 _ E8,
00000000(rel)
        mov     dword [ebp-8H], eax                     ; 001D _ 89. 45, F8
        mov     dword [ebp-4H], edx                     ; 0020 _ 89. 55, FC
        lea     eax, [ebp-8H]                           ; 0023 _ 8D. 45, F8
        call    _D5upper12__T5upperTuZ5upperFKAuZv      ; 0026 _ E8,
00000000(rel)
        xor     eax, eax                                ; 002B _ 31. C0
        add     esp, 12                                 ; 002D _ 83. C4, 0C
        leave                                           ; 0030 _ C9
        ret                                             ; 0031 _ C3
; __Dmain End of function


And the [0..$] version:


__Dmain:; Function begin, communal
        push    ebp                                     ; 0000 _ 55
        mov     ebp, esp                                ; 0001 _ 8B. EC
        sub     esp, 24                                 ; 0003 _ 83. EC, 18
        push    ebx                                     ; 0006 _ 53
        push    dword [?_0003]                          ; 0007 _ FF. 35,
0000001C(segrel)
        push    dword [?_0002]                          ; 000D _ FF. 35,
00000018(segrel)
        mov     eax, FLAT:_D12TypeInfo_Ayu6__initZ      ; 0013 _ B8,
00000000(segrel)
        push    eax                                     ; 0018 _ 50
        call    __adDupT                                ; 0019 _ E8,
00000000(rel)
        mov     dword [ebp-18H], eax                    ; 001E _ 89. 45, E8
        mov     dword [ebp-14H], edx                    ; 0021 _ 89. 55, EC
        mov     ecx, dword [ebp-18H]                    ; 0024 _ 8B. 4D, E8
        mov     dword [ebp-10H], ecx                    ; 0027 _ 89. 4D, F0
        cmp     ecx, ecx                                ; 002A _ 39. C9
        jbe     ?_0417                                  ; 002C _ 76, 0A
        mov     eax, 37                                 ; 002E _ B8, 00000025
        call    _D5upper7__arrayZ                       ; 0033 _ E8,
00000000(rel)
?_0417: mov     ebx, dword [ebp-10H]                    ; 0038 _ 8B. 5D, F0
        mov     edx, dword [ebp-14H]                    ; 003B _ 8B. 55, EC
        mov     eax, dword [ebp-18H]                    ; 003E _ 8B. 45, E8
        mov     dword [ebp-8H], ebx                     ; 0041 _ 89. 5D, F8
        mov     dword [ebp-4H], edx                     ; 0044 _ 89. 55, FC
        lea     eax, [ebp-8H]                           ; 0047 _ 8D. 45, F8
        call    _D5upper12__T5upperTuZ5upperFKAuZv      ; 004A _ E8,
00000000(rel)
        xor     eax, eax                                ; 004F _ 31. C0
        add     esp, 12                                 ; 0051 _ 83. C4, 0C
        pop     ebx                                     ; 0054 _ 5B
        leave                                           ; 0055 _ C9
        ret                                             ; 0056 _ C3
; __Dmain End of function

-- 
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------


More information about the Digitalmars-d-bugs mailing list