Reducing the cost of autodecoding

Stefan Koch via Digitalmars-d digitalmars-d at puremagic.com
Fri Oct 14 17:50:08 PDT 2016


On Friday, 14 October 2016 at 20:47:39 UTC, Stefan Koch wrote:
> On Thursday, 13 October 2016 at 21:49:22 UTC, safety0ff wrote:
>>> Bad benchmark! Bad! -- Andrei
>>
>> Also, I suspect a benchmark with a larger loop body might not 
>> benefit as significantly from branch hints as this one.
>
> I disagree in longer loops code compactness is as important as 
> in small ones.
>
> This is about the smallest inline version of decode I could 
> come up with :
>
> __gshared static immutable ubyte[] charWidthTab = [
>             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
>             2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
>             3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
>             4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 1, 1
> ];
>
> dchar myFront(ref char[] str) pure nothrow
> {
>     dchar c = cast(dchar) str[0];
>     if ((c & 128))
>     {
>         if (c & 64)
>         	final switch(charWidthTab[c - 192])
>         {
>             case 2 :
>                 c |= ((str[1] & 0x80) >> 5);
>             break;
>             case 3 :
>                c |= ((str[1] & 0x80) >> 4);
>                c |= ((str[2] & 0x80) >> 10);
>             break;
>             case 4 :
>                c |= ((str[1] & 0x80) >> 3);
>                c |= ((str[2] & 0x80) >> 9);
>                c |= ((str[3] & 0x80) >> 15);
>             break;
>             case 5,6,1 :
>               goto Linvalid;
>         }
>         else
>         Linvalid :
>         	c = dchar.init;
>
>     }
> 	return c;
> }

Disregard all that code.
It is horribly wrong!

This is more correct : (Tough for some reason it does not pass 
the unittests)

dchar myFront(ref char[] str) pure
{
     dchar c = cast(dchar) str.ptr[0];
     if (c & 128)
     {
         if (c & 64)
         {
             auto l = charWidthTab.ptr[c - 192];
             if (str.length < l)
                 goto Linvalid;

             final switch (l)
             {
             case 2:
                 c = ((c & ~(64 | 128)) << 6);
                 c |= (str.ptr[1] & ~0x80);
                 break;
             case 3:
                 c = ((c & ~(32 | 64 | 128)) << 12);
                 c |= ((str.ptr[1] & ~0x80) << 6);
                 c |= ((str.ptr[2] & ~0x80));
                 break;
             case 4:
                 c = ((c & ~(16 | 32 | 64 | 128)) << 18);
                 c |= ((str.ptr[1] & ~0x80) << 12);
                 c |= ((str.ptr[2] & ~0x80) << 6);
                 c |= ((str.ptr[3] & ~0x80));
                 break;
             case 5, 6, 1:
                 goto Linvalid;
             }
         }
         else
     Linvalid : throw new Exception("yadayada");

     }
     return c;
}


More information about the Digitalmars-d mailing list