[Issue 9629] toUpperInPlace doesn't work properly with unicode characters
d-bugmail at puremagic.com
d-bugmail at puremagic.com
Mon Mar 4 20:56:00 PST 2013
http://d.puremagic.com/issues/show_bug.cgi?id=9629
--- Comment #2 from Andrej Mitrovic <andrej.mitrovich at gmail.com> 2013-03-04 20:55:59 PST ---
This seems like a codegen bug:
import std.ascii;
import std.conv;
import std.stdio;
import std.utf;
void upper(C)(ref C[] s)
{
for (size_t i = 0; i < s.length; )
{
immutable c = s[i];
if ('a' <= c && c <= 'z')
{
s[i++] = cast(C) (c - (cast(C)'a' - 'A'));
}
else if (!std.ascii.isASCII(c))
{
size_t j = i;
dchar dc = decode(s, j);
auto toAdd = to!(C[])(std.uni.toUpper(dc));
s = s[0 .. i] ~ toAdd ~ s[j .. $];
i += toAdd.length;
}
else
{
++i;
}
}
writefln("Inside: %s", s);
}
void main()
{
wchar[] s1 = "þ abcdef"w.dup;
upper(s1[]);
writefln("Outside: %s\n", s1); // Þ ABCDEF
}
If you change the call "upper(s1[]);" to "upper(s1[0..$]);" you get "Þ abcdef".
The assembly looks significantly different between the two calls (removing all
writefln calls first).
The [] version:
__Dmain:; Function begin, communal
push ebp ; 0000 _ 55
mov ebp, esp ; 0001 _ 8B. EC
sub esp, 8 ; 0003 _ 83. EC, 08
push dword [?_0003] ; 0006 _ FF. 35,
0000001C(segrel)
push dword [?_0002] ; 000C _ FF. 35,
00000018(segrel)
mov eax, FLAT:_D12TypeInfo_Ayu6__initZ ; 0012 _ B8,
00000000(segrel)
push eax ; 0017 _ 50
call __adDupT ; 0018 _ E8,
00000000(rel)
mov dword [ebp-8H], eax ; 001D _ 89. 45, F8
mov dword [ebp-4H], edx ; 0020 _ 89. 55, FC
lea eax, [ebp-8H] ; 0023 _ 8D. 45, F8
call _D5upper12__T5upperTuZ5upperFKAuZv ; 0026 _ E8,
00000000(rel)
xor eax, eax ; 002B _ 31. C0
add esp, 12 ; 002D _ 83. C4, 0C
leave ; 0030 _ C9
ret ; 0031 _ C3
; __Dmain End of function
And the [0..$] version:
__Dmain:; Function begin, communal
push ebp ; 0000 _ 55
mov ebp, esp ; 0001 _ 8B. EC
sub esp, 24 ; 0003 _ 83. EC, 18
push ebx ; 0006 _ 53
push dword [?_0003] ; 0007 _ FF. 35,
0000001C(segrel)
push dword [?_0002] ; 000D _ FF. 35,
00000018(segrel)
mov eax, FLAT:_D12TypeInfo_Ayu6__initZ ; 0013 _ B8,
00000000(segrel)
push eax ; 0018 _ 50
call __adDupT ; 0019 _ E8,
00000000(rel)
mov dword [ebp-18H], eax ; 001E _ 89. 45, E8
mov dword [ebp-14H], edx ; 0021 _ 89. 55, EC
mov ecx, dword [ebp-18H] ; 0024 _ 8B. 4D, E8
mov dword [ebp-10H], ecx ; 0027 _ 89. 4D, F0
cmp ecx, ecx ; 002A _ 39. C9
jbe ?_0417 ; 002C _ 76, 0A
mov eax, 37 ; 002E _ B8, 00000025
call _D5upper7__arrayZ ; 0033 _ E8,
00000000(rel)
?_0417: mov ebx, dword [ebp-10H] ; 0038 _ 8B. 5D, F0
mov edx, dword [ebp-14H] ; 003B _ 8B. 55, EC
mov eax, dword [ebp-18H] ; 003E _ 8B. 45, E8
mov dword [ebp-8H], ebx ; 0041 _ 89. 5D, F8
mov dword [ebp-4H], edx ; 0044 _ 89. 55, FC
lea eax, [ebp-8H] ; 0047 _ 8D. 45, F8
call _D5upper12__T5upperTuZ5upperFKAuZv ; 004A _ E8,
00000000(rel)
xor eax, eax ; 004F _ 31. C0
add esp, 12 ; 0051 _ 83. C4, 0C
pop ebx ; 0054 _ 5B
leave ; 0055 _ C9
ret ; 0056 _ C3
; __Dmain End of function
--
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
More information about the Digitalmars-d-bugs
mailing list