fun with mixins
Bill Baxter
dnewsgroup at billbaxter.com
Fri Jan 26 18:27:55 PST 2007
I confess I'm not sure what all is going on in your code there. At a
glance it looks like there's a lot of hard coded 8/16/24/32's in there.
*Seems* like you should be able to make something even more general
than that and perhaps in the process make it even leaner and meaner. :-)
I'm looking forward to the day when someone cranks out something like
GIL using D.
http://opensource.adobe.com/gil/presentation/index.htm
And a better AGG using D uber-templates would be nice too.
http://www.antigrain.com/
--bb
Chad J wrote:
> Here's the jist of the attached source:
>
> template doSomething()
> {
> auto dummy = value = some + expression;
> }
>
> uint func( uint some, uint expression )
> {
> uint value = 52;
> mixin doSomething!();
> return value;
> }
>
> It seems pretty hackish to me, yet useful.
>
> Attached is a really long-winded alpha blending routine. The advantage
> is that it's perhaps the most generalized alpha blending routine I've
> ever written that is still decently fast (yeah, could be a lot better
> with simd, gpu usage, or <insert common optimization that doesn't work
> in general on my pda>). It could soon do things totally unrelated to
> alpha blending. It seems kinda like something that the C preprocessor
> would be used for, though the thought of using C kinda scares me, and I
> have hope that D templates/mixins are up to the job. Maybe someday when
> I have a lot of time on my hands I can figure out how to make the
> templates generate runtime for-loops, complete with custom-tailored
> innerloop code, which would make it a lot easier to optimize edge cases
> like sourcePixel[i+1] where there may or may not be an i+1'th pixel and
> I don't want to afford an 'if'.
>
> I have to wonder, has someone done this stuff already (the mixin trick,
> or some sort of graphics routine framework in D)?
>
>
> ------------------------------------------------------------------------
>
> /+ Alpha blended blitting routine. +/
>
> import std.stdio;
>
> version = SDL;
> version( SDL )
> {
> import derelict.sdl.sdl;
> }
>
> // TODO: RGB32?
> enum : uint
> {
> INVALID = 0,
> RGBA32,
> RGB24,
> RGB16_555,
> RGB16_565,
> RGBA8_I32, // indexed to 32 bit values
> A8,
> }
>
> private template readSource( uint RGBA )
> {
> static if ( RGBA == RGBA32 )
> {
> uint readS_dummy1 = srgb = source[si];
> uint readS_dummy2 = alpha = srgb & sourceAMask;
> uint readS_dummy3 = srgb = srgb & ~sourceAMask;
> }
> else static if ( RGBA == RGB24 )
> {
> // There is no such thing as an array with 24-bit elements, so we have
> // to use pointers.
> uint readS_dummy1 = srgb = *(cast(uint*)(source + si));
> }
> else static if ( RGBA == RGB16_555 || RGBA == RGB16_565 )
> {
> // cast(uint) is not necessary in all cases, only if dest is 32 bpp
> uint readS_dummy1 = srgb = cast(uint)source[si];
> }
> else static if ( RGBA == RGBA8_I32 )
> {
> uint readS_dummy1 = srgb = rgbaTable[source[si]];
> uint readS_dummy2 = alpha = srgb & sourceAMask;
> uint readS_dummy3 = srgb = srgb & ~sourceAMask;
> }
> else static if ( RGBA == A8 )
> {
> uint readS_dummy1 = alpha = cast(uint)source[si];
> }
> else
> {
> pragma(msg,"Invalid source RGBA format for reading.");
> static assert(0);
> }
> }
>
> private template readDestination( uint RGBA, ubyte half16bpp = NOT_APPLICABLE )
> {
> static if ( RGBA == RGBA32 )
> {
> uint readD_dummy1 = drgb = dest[di];
> }
> else static if ( RGBA == RGB24 )
> {
> // There is no such thing as an array with 24-bit elements, so we have
> // to use pointers.
> uint readD_dummy1 = drgb = *(cast(uint*)(dest + di));
>
> // Since we can't write 24 bits, we can either write 3 bytes (slow),
> // or we can overwrite 8 bits of the next pixel. The latter is
> // faster and can be done safely if we overwrite those 8 bits with
> // their previous contents.
> uint drgbOriginal = drgb;
> }
> else static if ( RGBA == RGB16_555 || RGBA == RGB16_565 )
> {
> uint readD_dummy1 = drgb = dest[di];
>
> static if ( half16bpp == LOW_ADDRESS_HALF ||
> half16bpp == HIGH_ADDRESS_HALF )
> {
> // Store the original values of both pixels being read.
> // When reading and writing 2 pixels at a time, it is impossible
> // to prevent overwriting a pixel that we don't want to. At
> // least not without some rather complicated code. So instead,
> // we just make sure that the pixel we don't want to overwrite
> // is overwritten with it's original value. The original value
> // is stored here.
> uint drgbOriginal = destReadResult;
> }
> }
> else static if ( RGBA == RGBA8_I32 )
> {
> uint readD_dummy1 = drgb = rgbaTable[dest[di]];
> }
> else
> {
> pragma(msg,"Invalid destination RGBA format for reading.");
> static assert(0);
> }
> }
>
> private template read( uint sourceRGBA, uint destRGBA,
> ubyte half16bpp = NOT_APPLICABLE )
> {
> mixin readSource!( sourceRGBA );
> mixin readDestination!( destRGBA, half16bpp );
> }
>
> private template convert( uint sourceRGBA, uint destRGBA )
> {
> static if ( sourceRGBA == RGBA32 || sourceRGBA == RGB24 || sourceRGBA == RGBA8_I32 )
> {
> static if ( destRGBA == RGBA32 || destRGBA == RGB24 )
> {
> alias sourceReadResult srgb; // do nothing
> }
> static if ( destRGBA == RGB16_565 )
> {
> // Here we must shrink a 32 bit pixel from the source into a
> // 16 bit pixel.
> // in this situation we write the 16 bit resultant pixels one at
> // a time so the extra 16 bits will be safely discarded.
> uint convert_dummy1 =
> srgb = ((0xf800 & (sourceReadResult >> 8 )) +
> (0x07e0 & (sourceReadResult >> 5 )) +
> (0x001f & (sourceReadResult >> 3 )));
> }
> else static assert(0);
> }
> else static if ( sourceRGBA == RGB16_565 )
> {
> static if ( destRGBA == RGBA32 || destRGBA == RGB24 )
> {
> // Here we must expand a 16 bit pixel from the source into a
> // 32 bit pixel.
> // In this situation we read the 16 bit pixels one at a time
> // so the extra 16 bits can be safely discarded.
> uint convert_dummy1 =
> srgb = (((sourceReadResult & 0xf800) << 8 ) +
> ((sourceReadResult & 0x07e0) << 5 ) +
> ((sourceReadResult & 0x001f) << 3 ));
> }
> else static if ( destRGBA == RGB16_565 )
> {
> //alias sourceReadResult srgb; // do nothing
> }
> else static assert(0);
> }
> else static if ( sourceRGBA == A8 )
> {
> //alias srcColor srgb;
> }
> else static assert(0);
> }
>
> private template blend( uint RGBA )
> {
> // Note that this will get it right regardless of which color is in which
> // channel. Of course, the channels' placements must be correct.
> // It also preserves the destination's alpha channel, if present.
> static if ( RGBA == RGBA32 || RGBA == RGB24 || RGBA == RGBA8_I32 ||
> RGBA == RGB16_565 || RGBA == RGB16_555 )
> {
> static if ( RGBA == RGBA32 || RGBA == RGB24 || RGBA == RGBA8_I32 )
> {
> const shift = 8;
> const evenMask = 0x00ff00ff;
> }
> else
> {
> // For 16bpp formats:
> // alpha must be a 5 bit value (the 3 hi bits MUST be clear)
> // this does 2 16bit pixels at a time in one 32 bit word.
> // endianness doesn't matter on 565 formats due to symmetry
> // TODO: take into account endianness on 555 formats
> // (probably only noticable on big endian machines)
> const shift = 5;
> const evenMask = 0x07e0f81f;
> }
> const oddMask = ~evenMask;
>
> static if ( RGBA == RGBA32 || RGBA == RGBA8_I32 )
> uint originalDestAlpha = drgb & sourceAMask;
>
> static if ( destbpp == 16 )
> uint blend_dummy1 = alpha = alpha >> 3;
>
>
> static if ( destbpp == 16 && sourceRGBA == A8 )
> {
> // Extract the middle channel and shift it into the high 16 bits, giving
> // at least 5 bits above it to hold the multiplication overflow, and at
> // least 5 bits below it to hold the high channel's multiplication overflow.
> uint sourceChannels = ((srgb << 16) | srgb) & evenMask;
> uint destChannels = ((drgb << 16) | drgb) & evenMask;
>
> // do the blending
> uint blend_temp =
> (((sourceChannels - destChannels) * alpha) >> shift) + destChannels;
>
> // Now we move the middle channel from the high 16 bits, back into its
> // rightful place in the middle.
> uint blend_dummy2 =
> drgb = (blend_temp & (evenMask & 0x0000ffff)) |
> ((blend_temp & (evenMask & 0xffff0000)) >> 16 );
> }
> else
> {
> uint blend_dummy2 =
> drgb =
> ((((((srgb & evenMask)-(drgb & evenMask)) * alpha) >> shift) + drgb) & evenMask) |
> ((((((srgb & oddMask )-(drgb & oddMask )) >> shift) * alpha) + drgb) & oddMask);
> }
>
> static if ( RGBA == RGBA32 || RGBA == RGBA8_I32 ) // preserve alpha
> uint blend_dummy3 = drgb = (drgb & ~sourceAMask) | originalDestAlpha;
> }
> else
> {
> pragma(msg,"Invalid RGBA format for alpha blending.");
> static assert(0);
> }
> }
>
>
> private template write( uint RGBA, ubyte half16bpp = NOT_APPLICABLE )
> {
>
> static if ( RGBA == RGBA32 )
> {
> uint write_dummy1 = dest[di] = drgb;
> }
> else static if ( RGBA == RGB24 )
> {
> uint* address = cast(uint*)(dest + di);
>
> version ( BigEndian )
> uint write_dummy1 = *address = (drgb & 0xffffff00) | (drgbOriginal & 0x000000ff);
> else
> uint write_dummy1 = *address = (drgb & 0x00ffffff) | (drgbOriginal & 0xff000000);
> }
> else static if ( RGBA == RGB16_565 || RGBA == RGB16_555 )
> {
> // for selecting the lowest or highest pixel in terms of
> // address in memory rather than place in the word/register
> version ( BigEndian )
> const writeMask = 0x0000ffff;
> else
> const writeMask = 0xffff0000;
>
> static if ( half16bpp == HIGH_ADDRESS_HALF )
> uint write_dummy1 = dest[di] = (drgb & writeMask) | (drgbOriginal & ~writeMask);
> else static if ( half16bpp == LOW_ADDRESS_HALF )
> uint write_dummy1 = dest[di] = (drgb & ~writeMask) | (drgbOriginal & writeMask);
> else
> uint write_dummy1 = dest[di] = drgb;
> }
> // TODO: writing RGBA8_I32. needs an algo to reverse a 32 bpp value into
> // the an 8 bit indexed value.
> else
> {
> pragma(msg,"Invalid RGBA format for alpha blending.");
> static assert(0);
> }
>
> uint write_dummy2 = si = si + sourceIncrement;
> uint write_dummy3 = di = di + destIncrement;
> }
>
> private enum : ubyte
> {
> NOT_APPLICABLE = 0,
> LOW_ADDRESS_HALF,
> HIGH_ADDRESS_HALF,
> }
>
> private template innerLoop( uint sourceRGBA, uint destRGBA,
> ubyte half16bpp = NOT_APPLICABLE )
> {
> static if ( !(destRGBA == RGB16_565 || destRGBA == RGB16_555) && half16bpp > 0 )
> {
> pragma(msg,"The half16bpp argument is only to be used when the "
> "destination format is 16 bits per pixel.");
> static assert(0);
> }
>
> mixin read!(sourceRGBA,destRGBA,half16bpp);
> mixin convert!(sourceRGBA,destRGBA);
> mixin blend!(destRGBA);
> mixin write!(destRGBA,half16bpp);
> }
>
> private template calculatePaddingAndArrays( bool isSource )
> {
>
> static if ( isSource )
> {
> alias srcSurface surface;
> alias sourcebpp bpp;
> alias destbpp otherbpp;
> }
> else
> {
> alias dstSurface surface;
> alias destbpp bpp;
> alias sourcebpp otherbpp;
> }
>
> // Padding is the amount of extra data at the end of a scanline used to
> // ensure that the end of the scanline lines up on a 32 bit boundary.
> // spadding = source padding
> // dpadding = dest padding
> // In this case, the units padding is measured in change depending on
> // the source and destination format.
> // The amount of data that is handled in each iteration also changes,
> // and is reflected by the different types of arrays.
>
> static if ( bpp == 32 )
> {
> auto padding = 0;
> uint[] pixelData = cast(uint[])surface.pixels;
> }
> else static if ( bpp == 24 )
> {
> // padding measured in bytes
> auto padding = surface.pitch - (surface.width * 3);
> ubyte* pixelData = surface.pixels.ptr;
> }
> else static if ( bpp == 16 )
> {
> static if ( otherbpp != 16 /+otherbpp == 32 || otherbpp == 24 || otherbpp == 8+/ )
> {
> // padding measured in shorts
> auto padding = (surface.pitch >> 1) - surface.width;
> ushort[] pixelData = cast(ushort[])surface.pixels;
> }
> else
> {
> auto padding = 0;
> uint[] pixelData = cast(uint[])surface.pixels;
> }
> }
> else static if ( bpp == 8 )
> {
> auto padding = surface.pitch - surface.width; // padding measured in bytes
> ubyte[] pixelData = surface.pixels;
> }
> else
> static assert(0);
>
> static if ( isSource )
> {
> alias padding spadding;
> alias pixelData source;
> }
> else
> {
> alias padding dpadding;
> alias pixelData dest;
> }
> }
>
> // This function shall do no clipping.
>
> void blit( uint sourceRGBA, uint destRGBA )
> ( short sourceX, short sourceY,
> short destX, short destY, short width, short height,
> inout Surface srcSurface, inout Surface dstSurface,
> uint srcColor, uint alpha )
> {
> // this stuff just determines the bits per pixel of the source and
> // destination surfaces
> static if ( sourceRGBA == RGBA32 )
> const sourcebpp = 32;
> else static if ( sourceRGBA == RGB24 )
> const sourcebpp = 24;
> else static if ( sourceRGBA == RGB16_565 || sourceRGBA == RGB16_555 )
> const sourcebpp = 16;
> else
> const sourcebpp = 8;
>
> static if ( destRGBA == RGBA32 )
> const destbpp = 32;
> else static if ( destRGBA == RGB24 )
> const destbpp = 24;
> else static if ( destRGBA == RGB16_565 || destRGBA == RGB16_555 )
> const destbpp = 16;
> else
> const destbpp = 8;
> //
>
> static if ( (sourcebpp == 32 || sourcebpp == 24) && destbpp == 16 )
> const convert32to16 = true;
> else
> const convert32to16 = false;
>
> static if ( sourcebpp == 16 && (destbpp == 32 || destbpp == 24) )
> const convert16to32 = true;
> else
> const convert16to32 = false;
>
> static if ( (destRGBA == RGB16_565 || destRGBA == RGB16_555) && sourceRGBA != A8 )
> srcColor |= (srcColor << 16);
>
> static if ( destbpp == 16 )
> alpha >>= 3;
>
> // note that the padding quantities are necessarily zero if
> // unitWidth = width / 2;
> // that's important because they have different units of measurement!
>
> mixin calculatePaddingAndArrays!( true );
> mixin calculatePaddingAndArrays!( false );
>
> static if ( destbpp == 24 )
> {
> uint lineWidth = width * 3;
>
> static if ( sourcebpp == 24 )
> {
> // same as: unitSrcSurfaceWidth = srcSurface.width * 3;
> uint unitSrcSurfaceWidth = srcSurface.pitch - spadding;
> uint unitSrcWidth = lineWidth;
> }
> else
> {
> uint unitSrcSurfaceWidth = srcSurface.width;
> uint unitSrcWidth = width;
> }
>
> uint unitDstSurfaceWidth = dstSurface.pitch - dpadding;
>
> uint unitDstWidth = lineWidth;
> }
> else static if ( sourcebpp == 16 && destbpp == 16 )
> {
> uint lineWidth = width / 2; // because we do 2 pixels at a time
>
> // The +(width & 1) part is used to make the division round up.
> uint unitSrcSurfaceWidth = (srcSurface.width / 2) + (srcSurface.width & 1);
> uint unitDstSurfaceWidth = (dstSurface.width / 2) + (dstSurface.width & 1);
>
> // The lineWidth variable rounds down on division, so it may be
> // missing a pixel. That is desirable since we don't want alphablend
> // onto the pixel next to the missing pixel. Of course, we will
> // handle the missing pixel individually, but it is still useful to
> // have access to a rounded-up version of the blit's width.
> uint unitSrcWidth = lineWidth + (width & 1);
> uint unitDstWidth = unitSrcWidth;
> }
> else
> {
> uint lineWidth = width;
> uint unitSrcWidth = width;
> uint unitDstWidth = width;
> uint unitSrcSurfaceWidth = srcSurface.width;
> uint unitDstSurfaceWidth = dstSurface.width;
> }
>
> uint sourceAMask = srcSurface.alphaMask;
>
> version( SDL )
> {
> auto sourceSdlSurface = srcSurface.sdl_surface;
> if ( sourceSdlSurface !is null )
> {
> bool srcLocked = lock( sourceSdlSurface );
> scope(exit)
> {
> if ( srcLocked )
> SDL_UnlockSurface( sourceSdlSurface );
> }
> }
>
> auto destSdlSurface = srcSurface.sdl_surface;
> if ( destSdlSurface !is null )
> {
> bool dstLocked = lock( destSdlSurface );
> scope(exit)
> {
> if ( dstLocked )
> SDL_UnlockSurface( destSdlSurface );
> }
> }
> }
>
> static if ( sourcebpp == 24 )
> uint sourceIncrement = 3;
> else
> uint sourceIncrement = 1;
>
> static if ( destbpp == 24 )
> uint destIncrement = 3;
> else
> uint destIncrement = 1;
>
> // Since we are not necessarily blitting accross the entire width of the
> // destination surface or source surface, we have to skip some of the
> // pixels on the end of the current scanline and on the beginning of
> // the next scanline.
> // Add that to the padding (which is explained above), and the result
> // is these source/dest LineExtra variables.
> int sLineExtra = unitSrcSurfaceWidth + spadding - unitSrcWidth;
> int dLineExtra = unitDstSurfaceWidth + dpadding - unitDstWidth;
>
> // initialize the index variables
> // si = source index
> // di = destination index
> int si = (sourceX * sourceIncrement) + (unitSrcSurfaceWidth * sourceY);
> int di = (destX * destIncrement) + (unitDstSurfaceWidth * destY);
>
> // nextLine is always ahead of di by the amount of pixels left in one line
> // of the blit.
> int nextLine;
>
> // endi is the index to stop at.
> //int endi = destX + unitWidth + (unitDstSurfaceWidth + dpadding) * (destY + height);
> int endi = di + (unitDstSurfaceWidth * height);
>
> assert( lineWidth + dLineExtra == unitDstSurfaceWidth + dpadding );
>
> // TODO: remove this
> void writeHex ( char[] name, uint number )
> {
> writef( "(",name,std.string.toString( cast(ulong)number, cast(uint)16 ),")|" );
> }
> //
>
> uint srgb;
> uint drgb;
>
> static if ( sourceRGBA == A8 )
> srgb = srcColor;
>
> while( di < endi )
> {
> nextLine = di + lineWidth;
>
> static if ( convert32to16 || convert16to32 )
> mixin innerLoop!(sourceRGBA,destRGBA,HIGH_ADDRESS_HALF);
>
> while( di < nextLine )
> {
> mixin innerLoop!(sourceRGBA,destRGBA);
> }
>
> static if ( convert32to16 || convert16to32 )
> mixin innerLoop!(sourceRGBA,destRGBA,LOW_ADDRESS_HALF);
>
> si += sLineExtra;
> di += dLineExtra;
> }
> }
>
> version( SDL )
> {
>
> private bool lock( SDL_Surface* surface )
> {
> if ( SDL_MUSTLOCK( surface ) && !surface.locked )
> {
> safe_SDL_LockSurface( surface );
> return true;
> }
> return false;
> }
>
> // automatically throw errors resulting from the SDL_LockSurface function.
> private void safe_SDL_LockSurface( SDL_Surface* surface )
> {
> if ( SDL_LockSurface( surface ) != 0 )
> {
> char* sdlError = SDL_GetError();
> char[] error = sdlError[0..std.c.string.strlen(sdlError)];
> throw new Exception( "SDL_LockSurface failed to lock a surface: "~error );
> }
> }
> }
>
> struct Surface
> {
> ubyte[] pixels;
> uint alphaMask = 0;
> ushort width = 0xffff;
> ushort height = 0xffff;
> ushort pitch = 0xffff; /// width of a scanline in bytes.
> ushort RGBAformat = INVALID;
>
> /// width and height are in pixels.
> static Surface opCall( ubyte[] pixels, uint alphaMask,
> ushort width, ushort height, ushort pitch,
> ushort RGBAformat )
> {
> Surface result;
> assert( pixels !is null );
> result.pixels = pixels;
> result.width = width;
> result.height = height;
> result.pitch = pitch;
> result.RGBAformat = RGBAformat;
> result.alphaMask = alphaMask;
> return result;
> }
>
> version ( SDL )
> {
> SDL_Surface* sdl_surface = null;
>
> static Surface opCall( SDL_Surface* surface, ushort RGBAformat )
> {
> Surface result;
> result.pixels = cast(ubyte[])surface.pixels[0.. surface.pitch * surface.h];
> result.width = surface.w;
> result.height = surface.h;
> result.pitch = surface.pitch;
> assert ( RGBAformat != INVALID );
> result.RGBAformat = RGBAformat;
> result.alphaMask = surface.format.Amask;
> result.sdl_surface = surface;
> return result;
> }
> }
> }
More information about the Digitalmars-d
mailing list