System programming in D (Was: The God Language)
Timon Gehr
timon.gehr at gmx.ch
Thu Dec 29 15:47:07 PST 2011
On 12/29/2011 12:19 PM, Vladimir Panteleev wrote:
> Before you disagree with any of the above, first
> (for starters) I'd like to invite you to translate Daniel Vik's C memcpy
> implementation to D:
> http://www.danielvik.com/2010/02/fast-memcpy-in-c.html . It doesn't even
> use inline assembler or compiler intrinsics.
Ok, I have performed a direct translation (with all the preprocessor
stuff replaced by string mixins). However, I think I could do a lot
better starting from scratch in D. I have performed some basic testing
with all the configuration options, and it seems to work correctly.
// File: memcpy.d direct translation of memcpy.c
/********************************************************************
** File: memcpy.c
**
** Copyright (C) 1999-2010 Daniel Vik
**
** This software is provided 'as-is', without any express or implied
** warranty. In no event will the authors be held liable for any
** damages arising from the use of this software.
** Permission is granted to anyone to use this software for any
** purpose, including commercial applications, and to alter it and
** redistribute it freely, subject to the following restrictions:
**
** 1. The origin of this software must not be misrepresented; you
** must not claim that you wrote the original software. If you
** use this software in a product, an acknowledgment in the
** use this software in a product, an acknowledgment in the
** product documentation would be appreciated but is not
** required.
**
** 2. Altered source versions must be plainly marked as such, and
** must not be misrepresented as being the original software.
**
** 3. This notice may not be removed or altered from any source
** distribution.
**
**
** Description: Implementation of the standard library function memcpy.
** This implementation of memcpy() is ANSI-C89 compatible.
**
** The following configuration options can be set:
**
** LITTLE_ENDIAN - Uses processor with little endian
** addressing. Default is big endian.
**
** PRE_INC_PTRS - Use pre increment of pointers.
** Default is post increment of
** pointers.
**
** INDEXED_COPY - Copying data using array indexing.
** Using this option, disables the
** PRE_INC_PTRS option.
**
** MEMCPY_64BIT - Compiles memcpy for 64 bit
** architectures
**
**
** Best Settings:
**
** Intel x86: LITTLE_ENDIAN and INDEXED_COPY
**
*******************************************************************/
/********************************************************************
** Configuration definitions.
*******************************************************************/
version = LITTLE_ENDIAN;
version = INDEXED_COPY;
/********************************************************************
** Includes for size_t definition
*******************************************************************/
/********************************************************************
** Typedefs
*******************************************************************/
version(MEMCPY_64BIT) version(D_LP32) static assert(0, "not a 64 bit
compile");
version(D_LP64){
alias ulong UIntN;
enum TYPE_WIDTH = 8;
}else{
alias uint UIntN;
enum TYPE_WIDTH = 4;
}
/********************************************************************
** Remove definitions when INDEXED_COPY is defined.
*******************************************************************/
version(INDEXED_COPY){
version(PRE_INC_PTRS)
static assert(0, "cannot use INDEXED_COPY together with
PRE_INC_PTRS!");
}
/********************************************************************
** The X template
*******************************************************************/
string Ximpl(string x){
import utf = std.utf;
string r=`"`;
for(typeof(x.length)
i=0;i<x.length;r~=x[i..i+utf.stride(x,i)],i+=utf.stride(x,i)){
if(x[i]=='@'&&x[i+1]=='('){
auto start = ++i; int nest=1;
while(nest){
i+=utf.stride(x,i);
if(x[i]=='(') nest++;
else if(x[i]==')') nest--;
}
i++;
r~=`"~`~x[start..i]~`~"`;
if(i==x.length) break;
}
if(x[i]=='"'||x[i]=='\\'){r~="\\"; continue;}
}
return r~`"`;
}
template X(string x){
enum X = Ximpl(x);
}
/********************************************************************
** Definitions for pre and post increment of pointers.
*******************************************************************/
// uses *(*&x)++ and similar to work around a bug in the parser
version(PRE_INC_PTRS){
string START_VAL(string x) {return mixin(X!q{(*&@(x))--;});}
string INC_VAL(string x) {return mixin(X!q{*++(*&@(x))});}
string CAST_TO_U8(string p, string o){
return mixin(X!q{(cast(ubyte*)@(p) + @(o) + TYPE_WIDTH)});
}
enum WHILE_DEST_BREAK = (TYPE_WIDTH - 1);
enum PRE_LOOP_ADJUST = q{- (TYPE_WIDTH - 1)};
enum PRE_SWITCH_ADJUST = q{+ 1};
}else{
string START_VAL(string x) {return q{};}
string INC_VAL(string x) {return mixin(X!q{*(*&@(x))++});}
string CAST_TO_U8(string p, string o){
return mixin(X!q{(cast(ubyte*)@(p) + @(o))});
}
enum WHILE_DEST_BREAK = 0;
enum PRE_LOOP_ADJUST = q{};
enum PRE_SWITCH_ADJUST = q{};
}
/********************************************************************
** Definitions for endians
*******************************************************************/
version(LITTLE_ENDIAN){
enum SHL = q{>>};
enum SHR = q{<<};
}else{
enum SHL = q{<<};
enum SHR = q{>>};
}
/********************************************************************
** Macros for copying words of different alignment.
** Uses incremening pointers.
*******************************************************************/
string CP_INCR() {
return mixin(X!q{
@(INC_VAL(q{dstN})) = @(INC_VAL(q{srcN}));
});
}
string CP_INCR_SH(string shl, string shr) {
return mixin(X!q{
dstWord = srcWord @(SHL) @(shl);
srcWord = @(INC_VAL(q{srcN}));
dstWord |= srcWord @(SHR) @(shr);
@(INC_VAL(q{dstN})) = dstWord;
});
}
/********************************************************************
** Macros for copying words of different alignment.
** Uses array indexes.
*******************************************************************/
string CP_INDEX(string idx) {
return mixin(X!q{
dstN[@(idx)] = srcN[@(idx)];
});
}
string CP_INDEX_SH(string x, string shl, string shr) {
return mixin(X!q{
dstWord = srcWord @(SHL) @(shl);
srcWord = srcN[@(x)];
dstWord |= srcWord @(SHR) @(shr);
dstN[@(x)]= dstWord;
});
}
/********************************************************************
** Macros for copying words of different alignment.
** Uses incremening pointers or array indexes depending on
** configuration.
*******************************************************************/
version(INDEXED_COPY){
alias CP_INDEX CP;
alias CP_INDEX_SH CP_SH;
string INC_INDEX(string p, string o){
return mixin(X!q{
((@(p)) += (@(o)));
});
}
}else{
string CP(string idx) {return mixin(X!q{@(CP_INCR())});}
string CP_SH(string idx, string shl, string shr){
return mixin(X!q{
@(CP_INCR_SH(mixin(X!q{@(shl)}), mixin(X!q{@(shr)})));
});
}
string INC_INDEX(string p, string o){return q{};}
}
string COPY_REMAINING(string count) {
return mixin(X!q{
@(START_VAL(q{dst8}));
@(START_VAL(q{src8}));
switch (@(count)) {
case 7: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
case 6: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
case 5: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
case 4: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
case 3: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
case 2: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
case 1: @(INC_VAL(q{dst8})) = @(INC_VAL(q{src8}));
case 0:
default: break;
}
});
}
string COPY_NO_SHIFT() {
return mixin(X!q{
UIntN* dstN = cast(UIntN*)(dst8 @(PRE_LOOP_ADJUST));
UIntN* srcN = cast(UIntN*)(src8 @(PRE_LOOP_ADJUST));
size_t length = count / TYPE_WIDTH;
while (length & 7) {
@(CP_INCR());
length--;
}
length /= 8;
while (length--) {
@(CP(q{0}));
@(CP(q{1}));
@(CP(q{2}));
@(CP(q{3}));
@(CP(q{4}));
@(CP(q{5}));
@(CP(q{6}));
@(CP(q{7}));
@(INC_INDEX(q{dstN}, q{8}));
@(INC_INDEX(q{srcN}, q{8}));
}
src8 = @(CAST_TO_U8(q{srcN}, q{0}));
dst8 = @(CAST_TO_U8(q{dstN}, q{0}));
@(COPY_REMAINING(q{count & (TYPE_WIDTH - 1)}));
return dest;
});
}
string COPY_SHIFT(string shift) {
return mixin(X!q{
UIntN* dstN = cast(UIntN*)(((cast(UIntN)dst8)
@(PRE_LOOP_ADJUST)) &
~(TYPE_WIDTH - 1));
UIntN* srcN = cast(UIntN*)(((cast(UIntN)src8)
@(PRE_LOOP_ADJUST)) &
~(TYPE_WIDTH - 1));
size_t length = count / TYPE_WIDTH;
UIntN srcWord = @(INC_VAL(q{srcN}));
UIntN dstWord;
while (length & 7) {
@(CP_INCR_SH(mixin(X!q{8 * @(shift)}), mixin(X!q{8 *
(TYPE_WIDTH - @(shift))})));
length--;
}
length /= 8;
while (length--) {
@(CP_SH(q{0}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 *
(TYPE_WIDTH - @(shift))})));
@(CP_SH(q{1}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 *
(TYPE_WIDTH - @(shift))})));
@(CP_SH(q{2}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 *
(TYPE_WIDTH - @(shift))})));
@(CP_SH(q{3}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 *
(TYPE_WIDTH - @(shift))})));
@(CP_SH(q{4}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 *
(TYPE_WIDTH - @(shift))})));
@(CP_SH(q{5}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 *
(TYPE_WIDTH - @(shift))})));
@(CP_SH(q{6}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 *
(TYPE_WIDTH - @(shift))})));
@(CP_SH(q{7}, mixin(X!q{8 * @(shift)}), mixin(X!q{8 *
(TYPE_WIDTH - @(shift))})));
@(INC_INDEX(q{dstN}, q{8}));
@(INC_INDEX(q{srcN}, q{8}));
}
src8 = @(CAST_TO_U8(q{srcN}, mixin(X!q{(@(shift) -
TYPE_WIDTH)})));
dst8 = @(CAST_TO_U8(q{dstN}, q{0}));
@(COPY_REMAINING(q{count & (TYPE_WIDTH - 1)}));
return dest;
});
}
/********************************************************************
**
** void *memcpy(void *dest, const void *src, size_t count)
**
** Args: dest - pointer to destination buffer
** src - pointer to source buffer
** count - number of bytes to copy
**
** Return: A pointer to destination buffer
**
** Purpose: Copies count bytes from src to dest.
** No overlap check is performed.
**
*******************************************************************/
void *memcpy(void *dest, const void *src, size_t count)
{
ubyte* dst8 = cast(ubyte*)dest;
ubyte* src8 = cast(ubyte*)src;
if (count < 8) {
mixin(COPY_REMAINING(q{count}));
return dest;
}
mixin(START_VAL(q{dst8}));
mixin(START_VAL(q{src8}));
while ((cast(UIntN)dst8 & (TYPE_WIDTH - 1)) != WHILE_DEST_BREAK) {
mixin(INC_VAL(q{dst8})) = mixin(INC_VAL(q{src8}));
count--;
}
switch ((mixin(`(cast(UIntN)src8)`~ PRE_SWITCH_ADJUST)) &
(TYPE_WIDTH - 1)) {
// { } required to work around DMD bug
case 0: {mixin(COPY_NO_SHIFT());} break;
case 1: {mixin(COPY_SHIFT(q{1}));} break;
case 2: {mixin(COPY_SHIFT(q{2}));} break;
case 3: {mixin(COPY_SHIFT(q{3}));} break;
static if(TYPE_WIDTH > 4){ // was TYPE_WIDTH >= 4. bug in original code.
case 4: {mixin(COPY_SHIFT(q{4}));} break;
case 5: {mixin(COPY_SHIFT(q{5}));} break;
case 6: {mixin(COPY_SHIFT(q{6}));} break;
case 7: {mixin(COPY_SHIFT(q{7}));} break;
}
default: assert(0);
}
}
void main(){
int[13] x = [1,2,3,4,5,6,7,8,9,0,1,2,3];
int[13] y;
memcpy(y.ptr, x.ptr, x.sizeof);
import std.stdio; writeln(y);
}
More information about the Digitalmars-d
mailing list