System programming in D (Was: The God Language)

Thu Dec 29 15:47:07 PST 2011

On 12/29/2011 12:19 PM, Vladimir Panteleev wrote:
> Before you disagree with any of the above, first
> (for starters) I'd like to invite you to translate Daniel Vik's C memcpy
> implementation to D:
> http://www.danielvik.com/2010/02/fast-memcpy-in-c.html . It doesn't even
> use inline assembler or compiler intrinsics.

Ok, I have performed a direct translation (with all the preprocessor 
stuff replaced by string mixins). However, I think I could do a lot 
better starting from scratch in D. I have performed some basic testing 
with all the configuration options, and it seems to work correctly.

// File: memcpy.d direct translation of memcpy.c

/********************************************************************
  ** File:     memcpy.c
  **
  ** Copyright (C) 1999-2010 Daniel Vik
  **
  ** This software is provided 'as-is', without any express or implied
  ** warranty. In no event will the authors be held liable for any
  ** damages arising from the use of this software.
  ** Permission is granted to anyone to use this software for any
  ** purpose, including commercial applications, and to alter it and
  ** redistribute it freely, subject to the following restrictions:
  **
  ** 1. The origin of this software must not be misrepresented; you
  **    must not claim that you wrote the original software. If you
  **    use this software in a product, an acknowledgment in the
  **    use this software in a product, an acknowledgment in the
  **    product documentation would be appreciated but is not
  **    required.
  **
  ** 2. Altered source versions must be plainly marked as such, and
  **    must not be misrepresented as being the original software.
  **
  ** 3. This notice may not be removed or altered from any source
  **    distribution.
  **
  **
  ** Description: Implementation of the standard library function memcpy.
  **             This implementation of memcpy() is ANSI-C89 compatible.
  **
  **             The following configuration options can be set:
  **
  **           LITTLE_ENDIAN   - Uses processor with little endian
  **                             addressing. Default is big endian.
  **
  **           PRE_INC_PTRS    - Use pre increment of pointers.
  **                             Default is post increment of
  **                             pointers.
  **
  **           INDEXED_COPY    - Copying data using array indexing.
  **                             Using this option, disables the
  **                             PRE_INC_PTRS option.
  **
  **           MEMCPY_64BIT    - Compiles memcpy for 64 bit
  **                             architectures
  **
  **
  ** Best Settings:
  **
  ** Intel x86:  LITTLE_ENDIAN and INDEXED_COPY
  **
  *******************************************************************/

/********************************************************************
  ** Configuration definitions.
  *******************************************************************/

version = LITTLE_ENDIAN;
version = INDEXED_COPY;

/********************************************************************
  ** Includes for size_t definition
  *******************************************************************/

/********************************************************************
  ** Typedefs
  *******************************************************************/

version(MEMCPY_64BIT) version(D_LP32) static assert(0, "not a 64 bit 
compile");
version(D_LP64){
     alias ulong              UIntN;
     enum TYPE_WIDTH =        8;
}else{
     alias uint               UIntN;
     enum TYPE_WIDTH =        4;
}

/********************************************************************
  ** Remove definitions when INDEXED_COPY is defined.
  *******************************************************************/

version(INDEXED_COPY){
     version(PRE_INC_PTRS)
         static assert(0, "cannot use INDEXED_COPY together with 
PRE_INC_PTRS!");
}

/********************************************************************
  ** The X template
  *******************************************************************/

string Ximpl(string x){
     import utf = std.utf;
     string r=`"`;
     for(typeof(x.length) 
i=0;i<x.length;r~=x[i..i+utf.stride(x,i)],i+=utf.stride(x,i)){
         if(x[i]=='@'&&x[i+1]=='('){
             auto start = ++i; int nest=1;
             while(nest){
                 i+=utf.stride(x,i);
                 if(x[i]=='(') nest++;
                 else if(x[i]==')') nest--;
             }
             i++;
             r~=`"~`~x[start..i]~`~"`;
             if(i==x.length) break;
         }
         if(x[i]=='"'||x[i]=='\\'){r~="\\"; continue;}
     }
     return r~`"`;
}

template X(string x){
     enum X = Ximpl(x);
}

/********************************************************************
  ** Definitions for pre and post increment of pointers.
  *******************************************************************/

// uses *(*&x)++ and similar to work around a bug in the parser

version(PRE_INC_PTRS){
     string START_VAL(string x)           {return mixin(X!q{(*&@(x))--;});}
     string INC_VAL(string x)             {return mixin(X!q{*++(*&@(x))});}
     string CAST_TO_U8(string p, string o){
         return mixin(X!q{(cast(ubyte*)@(p) + @(o) + TYPE_WIDTH)});
     }
     enum WHILE_DEST_BREAK  =                     (TYPE_WIDTH - 1);
     enum PRE_LOOP_ADJUST   =                     q{- (TYPE_WIDTH - 1)};
     enum PRE_SWITCH_ADJUST =                     q{+ 1};
}else{
     string START_VAL(string x)           {return q{};}
     string INC_VAL(string x)             {return mixin(X!q{*(*&@(x))++});}
     string CAST_TO_U8(string p, string o){
         return mixin(X!q{(cast(ubyte*)@(p) + @(o))});
     }
     enum WHILE_DEST_BREAK  =                     0;
     enum PRE_LOOP_ADJUST   =                     q{};
     enum PRE_SWITCH_ADJUST =                     q{};
}

/********************************************************************
  ** Definitions for endians
  *******************************************************************/

version(LITTLE_ENDIAN){
     enum SHL = q{>>};
     enum SHR = q{<<};
}else{
     enum SHL = q{<<};
     enum SHR = q{>>};
}

/********************************************************************
  ** Macros for copying words of  different alignment.
  ** Uses incremening pointers.
  *******************************************************************/

string CP_INCR() {
     return mixin(X!q{
         @(INC_VAL(q{dstN})) = @(INC_VAL(q{srcN}));
     });
}

string CP_INCR_SH(string shl, string shr) {
     return mixin(X!q{
         dstWord   = srcWord @(SHL) @(shl);
         srcWord   = @(INC_VAL(q{srcN}));
         dstWord  |= srcWord @(SHR) @(shr);
         @(INC_VAL(q{dstN})) = dstWord;
     });
}

/********************************************************************
  ** Macros for copying words of  different alignment.
  ** Uses array indexes.
  *******************************************************************/

string CP_INDEX(string idx) {
     return mixin(X!q{
         dstN[@(idx)] = srcN[@(idx)];
     });
}

string CP_INDEX_SH(string x, string shl, string shr) {
     return mixin(X!q{
         dstWord   = srcWord @(SHL) @(shl);
         srcWord   = srcN[@(x)];
         dstWord  |= srcWord @(SHR) @(shr);
         dstN[@(x)]= dstWord;
     });
}

/********************************************************************
  ** Macros for copying words of different alignment.
  ** Uses incremening pointers or array indexes depending on
  ** configuration.
  *******************************************************************/

version(INDEXED_COPY){
     alias CP_INDEX CP;
     alias CP_INDEX_SH CP_SH;
     string INC_INDEX(string p, string o){
         return mixin(X!q{
             ((@(p)) += (@(o)));
         });
     }
}else{
     string CP(string idx) {return mixin(X!q{@(CP_INCR())});}
     string CP_SH(string idx, string shl, string shr){
         return mixin(X!q{
             @(CP_INCR_SH(mixin(X!q{@(shl)}), mixin(X!q{@(shr)})));
         });
     }
     string INC_INDEX(string p, string o){return q{};}
}

string COPY_REMAINING(string count) {
     return mixin(X!q{
         @(START_VAL(q{dst8}));
         @(START_VAL(q{src8}));

         switch (@(count)) {
         case 7: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
         case 6: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
         case 5: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
         case 4: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
         case 3: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
         case 2: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
         case 1: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
         case 0:
         default: break;
         }
     });
}

string COPY_NO_SHIFT() {
     return mixin(X!q{
         UIntN* dstN = cast(UIntN*)(dst8 @(PRE_LOOP_ADJUST));
         UIntN* srcN = cast(UIntN*)(src8 @(PRE_LOOP_ADJUST));
         size_t length = count / TYPE_WIDTH;

         while (length & 7) {
             @(CP_INCR());
             length--;
         }

         length /= 8;

         while (length--) {
             @(CP(q{0}));
             @(CP(q{1}));
             @(CP(q{2}));
             @(CP(q{3}));
             @(CP(q{4}));
             @(CP(q{5}));
             @(CP(q{6}));
             @(CP(q{7}));

             @(INC_INDEX(q{dstN}, q{8}));
             @(INC_INDEX(q{srcN}, q{8}));
         }

         src8 = @(CAST_TO_U8(q{srcN}, q{0}));
         dst8 = @(CAST_TO_U8(q{dstN}, q{0}));

         @(COPY_REMAINING(q{count & (TYPE_WIDTH - 1)}));

         return dest;
     });
}

string COPY_SHIFT(string shift) {
     return mixin(X!q{
         UIntN* dstN  = cast(UIntN*)(((cast(UIntN)dst8) 
@(PRE_LOOP_ADJUST)) &
                                     ~(TYPE_WIDTH - 1));
         UIntN* srcN  = cast(UIntN*)(((cast(UIntN)src8) 
@(PRE_LOOP_ADJUST)) &
                                     ~(TYPE_WIDTH - 1));
         size_t length  = count / TYPE_WIDTH;
         UIntN srcWord = @(INC_VAL(q{srcN}));
         UIntN dstWord;

         while (length & 7) {
             @(CP_INCR_SH(mixin(X!q{8 * @(shift)}), mixin(X!q{8 * 
(TYPE_WIDTH - @(shift))})));
             length--;
         }

         length /= 8;

         while (length--) {
             @(CP_SH(q{0}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 * 
(TYPE_WIDTH - @(shift))})));
             @(CP_SH(q{1}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 * 
(TYPE_WIDTH - @(shift))})));
             @(CP_SH(q{2}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 * 
(TYPE_WIDTH - @(shift))})));
             @(CP_SH(q{3}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 * 
(TYPE_WIDTH - @(shift))})));
             @(CP_SH(q{4}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 * 
(TYPE_WIDTH - @(shift))})));
             @(CP_SH(q{5}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 * 
(TYPE_WIDTH - @(shift))})));
             @(CP_SH(q{6}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 * 
(TYPE_WIDTH - @(shift))})));
             @(CP_SH(q{7}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 * 
(TYPE_WIDTH - @(shift))})));

             @(INC_INDEX(q{dstN}, q{8}));
             @(INC_INDEX(q{srcN}, q{8}));
         }

         src8 = @(CAST_TO_U8(q{srcN}, mixin(X!q{(@(shift) - 
TYPE_WIDTH)})));
         dst8 = @(CAST_TO_U8(q{dstN}, q{0}));

         @(COPY_REMAINING(q{count & (TYPE_WIDTH - 1)}));

         return dest;
     });
}

/********************************************************************
  **
  ** void *memcpy(void *dest, const void *src, size_t count)
  **
  ** Args:     dest        - pointer to destination buffer
  **           src         - pointer to source buffer
  **           count       - number of bytes to copy
  **
  ** Return:   A pointer to destination buffer
  **
  ** Purpose:  Copies count bytes from src to dest.
  **           No overlap check is performed.
  **
  *******************************************************************/

void *memcpy(void *dest, const void *src, size_t count)
{
     ubyte* dst8 = cast(ubyte*)dest;
     ubyte* src8 = cast(ubyte*)src;
     if (count < 8) {
         mixin(COPY_REMAINING(q{count}));
         return dest;
     }

     mixin(START_VAL(q{dst8}));
     mixin(START_VAL(q{src8}));

     while ((cast(UIntN)dst8 & (TYPE_WIDTH - 1)) != WHILE_DEST_BREAK) {
         mixin(INC_VAL(q{dst8})) = mixin(INC_VAL(q{src8}));
         count--;
     }
     switch ((mixin(`(cast(UIntN)src8)`~ PRE_SWITCH_ADJUST)) & 
(TYPE_WIDTH - 1)) {
     // { } required to work around DMD bug
     case 0: {mixin(COPY_NO_SHIFT());} break;
     case 1: {mixin(COPY_SHIFT(q{1}));}   break;
     case 2: {mixin(COPY_SHIFT(q{2}));}   break;
     case 3: {mixin(COPY_SHIFT(q{3}));}   break;
static if(TYPE_WIDTH > 4){ // was TYPE_WIDTH >= 4. bug in original code.
     case 4: {mixin(COPY_SHIFT(q{4}));}   break;
     case 5: {mixin(COPY_SHIFT(q{5}));}   break;
     case 6: {mixin(COPY_SHIFT(q{6}));}   break;
     case 7: {mixin(COPY_SHIFT(q{7}));}   break;
}
     default: assert(0);
     }
}

void main(){
     int[13] x = [1,2,3,4,5,6,7,8,9,0,1,2,3];
     int[13] y;
     memcpy(y.ptr, x.ptr, x.sizeof);
     import std.stdio;   writeln(y);
}