fun with mixins

Bill Baxter dnewsgroup at
Fri Jan 26 18:27:55 PST 2007

I confess I'm not sure what all is going on in your code there.  At a 
glance it looks like there's a lot of hard coded 8/16/24/32's in there. 
  *Seems* like you should be able to make something even more general 
than that and perhaps in the process make it even leaner and meaner.  :-)

I'm looking forward to the day when someone cranks out something like 
GIL using D.

And a better AGG using D uber-templates would be nice too.


Chad J wrote:
> Here's the jist of the attached source:
> template doSomething()
> {
>   auto dummy = value = some + expression;
> }
> uint func( uint some, uint expression )
> {
>   uint value = 52;
>   mixin doSomething!();
>   return value;
> }
> It seems pretty hackish to me, yet useful.
> Attached is a really long-winded alpha blending routine.  The advantage 
> is that it's perhaps the most generalized alpha blending routine I've 
> ever written that is still decently fast (yeah, could be a lot better 
> with simd, gpu usage, or <insert common optimization that doesn't work 
> in general on my pda>).  It could soon do things totally unrelated to 
> alpha blending.  It seems kinda like something that the C preprocessor 
> would be used for, though the thought of using C kinda scares me, and I 
> have hope that D templates/mixins are up to the job.  Maybe someday when 
> I have a lot of time on my hands I can figure out how to make the 
> templates generate runtime for-loops, complete with custom-tailored 
> innerloop code, which would make it a lot easier to optimize edge cases 
> like sourcePixel[i+1] where there may or may not be an i+1'th pixel and 
> I don't want to afford an 'if'.
> I have to wonder, has someone done this stuff already (the mixin trick, 
> or some sort of graphics routine framework in D)?
> ------------------------------------------------------------------------
> /+ Alpha blended blitting routine. +/
> import std.stdio;
> version = SDL;
> version( SDL )
> {
> 	import derelict.sdl.sdl;
> }
> // TODO: RGB32?
> enum : uint
> {
> 	INVALID = 0,
> 	RGBA32,
> 	RGB24,
> 	RGB16_555,
> 	RGB16_565,
> 	RGBA8_I32, // indexed to 32 bit values
> 	A8,
> }
> private template readSource( uint RGBA )
> {
> 	static if ( RGBA == RGBA32 )
> 	{
> 		uint readS_dummy1 = srgb = source[si];
> 		uint readS_dummy2 = alpha = srgb & sourceAMask;
> 		uint readS_dummy3 = srgb = srgb & ~sourceAMask;
> 	}
> 	else static if ( RGBA == RGB24 )
> 	{
> 		// There is no such thing as an array with 24-bit elements, so we have
> 		//   to use pointers.  
> 		uint readS_dummy1 = srgb = *(cast(uint*)(source + si));
> 	}
> 	else static if ( RGBA == RGB16_555 || RGBA == RGB16_565 )
> 	{
> 		// cast(uint) is not necessary in all cases, only if dest is 32 bpp
> 		uint readS_dummy1 = srgb = cast(uint)source[si];
> 	}
> 	else static if ( RGBA == RGBA8_I32 )
> 	{
> 		uint readS_dummy1 = srgb = rgbaTable[source[si]];
> 		uint readS_dummy2 = alpha = srgb & sourceAMask;
> 		uint readS_dummy3 = srgb = srgb & ~sourceAMask;
> 	}
> 	else static if ( RGBA == A8 )
> 	{
> 		uint readS_dummy1 = alpha = cast(uint)source[si];
> 	}
> 	else
> 	{
> 		pragma(msg,"Invalid source RGBA format for reading.");
> 		static assert(0);
> 	}
> }
> private template readDestination( uint RGBA, ubyte half16bpp = NOT_APPLICABLE )
> {
> 	static if ( RGBA == RGBA32 )
> 	{
> 		uint readD_dummy1 = drgb = dest[di];
> 	}
> 	else static if ( RGBA == RGB24 )
> 	{
> 		// There is no such thing as an array with 24-bit elements, so we have
> 		//   to use pointers.  
> 		uint readD_dummy1 = drgb = *(cast(uint*)(dest + di));
> 		// Since we can't write 24 bits, we can either write 3 bytes (slow),
> 		//   or we can overwrite 8 bits of the next pixel.  The latter is 
> 		//   faster and can be done safely if we overwrite those 8 bits with 
> 		//   their previous contents.  
> 		uint drgbOriginal = drgb;
> 	}
> 	else static if ( RGBA == RGB16_555 || RGBA == RGB16_565 )
> 	{
> 		uint readD_dummy1 = drgb = dest[di];
> 		static if ( half16bpp == LOW_ADDRESS_HALF || 
> 				    half16bpp == HIGH_ADDRESS_HALF  )
> 		{
> 			// Store the original values of both pixels being read.
> 			// When reading and writing 2 pixels at a time, it is impossible
> 			//   to prevent overwriting a pixel that we don't want to.  At 
> 			//   least not without some rather complicated code.  So instead, 
> 			//   we just make sure that the pixel we don't want to overwrite 
> 			//   is overwritten with it's original value.  The original value 
> 			//   is stored here.  
> 			uint drgbOriginal = destReadResult;
> 		}
> 	}
> 	else static if ( RGBA == RGBA8_I32 )
> 	{
> 		uint readD_dummy1 = drgb = rgbaTable[dest[di]];
> 	}
> 	else
> 	{
> 		pragma(msg,"Invalid destination RGBA format for reading.");
> 		static assert(0);
> 	}
> }
> private template read( uint sourceRGBA, uint destRGBA, 
>                        ubyte half16bpp = NOT_APPLICABLE )
> {
> 	mixin readSource!( sourceRGBA );
> 	mixin readDestination!( destRGBA, half16bpp );
> }
> private template convert( uint sourceRGBA, uint destRGBA )
> {
> 	static if ( sourceRGBA == RGBA32 || sourceRGBA == RGB24 || sourceRGBA == RGBA8_I32 )
> 	{
> 		static if ( destRGBA == RGBA32 || destRGBA == RGB24 )
> 		{
> 			alias sourceReadResult srgb; // do nothing
> 		}
> 		static if ( destRGBA == RGB16_565 )
> 		{
> 			// Here we must shrink a 32 bit pixel from the source into a
> 			//   16 bit pixel.
> 			// in this situation we write the 16 bit resultant pixels one at
> 			//   a time so the extra 16 bits will be safely discarded.
> 			uint convert_dummy1 = 
> 			srgb = ((0xf800 & (sourceReadResult >> 8 )) +
> 			        (0x07e0 & (sourceReadResult >> 5 )) +
> 			        (0x001f & (sourceReadResult >> 3 )));
> 		}
> 		else static assert(0);
> 	}
> 	else static if ( sourceRGBA == RGB16_565 )
> 	{
> 		static if ( destRGBA == RGBA32 || destRGBA == RGB24 )
> 		{
> 			// Here we must expand a 16 bit pixel from the source into a
> 			//   32 bit pixel.
> 			// In this situation we read the 16 bit pixels one at a time
> 			//   so the extra 16 bits can be safely discarded.
> 			uint convert_dummy1 = 
> 			srgb = (((sourceReadResult & 0xf800) << 8 ) +
> 			        ((sourceReadResult & 0x07e0) << 5 ) +
> 			        ((sourceReadResult & 0x001f) << 3 ));
> 		}
> 		else static if ( destRGBA == RGB16_565 )
> 		{
> 			//alias sourceReadResult srgb; // do nothing
> 		}
> 		else static assert(0);
> 	}
> 	else static if ( sourceRGBA == A8 )
> 	{
> 		//alias srcColor srgb;
> 	}
> 	else static assert(0);
> }
> private template blend( uint RGBA )
> {
> 	// Note that this will get it right regardless of which color is in which 
> 	//   channel.  Of course, the channels' placements must be correct.  
> 	// It also preserves the destination's alpha channel, if present.  
> 	static if ( RGBA == RGBA32 || RGBA == RGB24 || RGBA == RGBA8_I32 || 
> 	                 RGBA == RGB16_565 || RGBA == RGB16_555 )
> 	{
> 		static if ( RGBA == RGBA32 || RGBA == RGB24 || RGBA == RGBA8_I32 )
> 		{
> 			const shift = 8;
> 			const evenMask = 0x00ff00ff;
> 		}
> 		else
> 		{
> 			// For 16bpp formats:
> 			// alpha must be a 5 bit value (the 3 hi bits MUST be clear)
> 			// this does 2 16bit pixels at a time in one 32 bit word.  
> 			// endianness doesn't matter on 565 formats due to symmetry
> 			// TODO: take into account endianness on 555 formats
> 			//        (probably only noticable on big endian machines)
> 			const shift = 5;
> 			const evenMask = 0x07e0f81f;
> 		}
> 		const oddMask = ~evenMask;
> 		static if ( RGBA == RGBA32 || RGBA == RGBA8_I32 )
> 			uint originalDestAlpha = drgb & sourceAMask;
> 		static if ( destbpp == 16 )
> 			uint blend_dummy1 = alpha = alpha >> 3;
> 		static if ( destbpp == 16 && sourceRGBA == A8 )
> 		{
> 			// Extract the middle channel and shift it into the high 16 bits, giving
> 			//   at least 5 bits above it to hold the multiplication overflow, and at
> 			//   least 5 bits below it to hold the high channel's multiplication overflow.
> 			uint sourceChannels = ((srgb << 16) | srgb) & evenMask;
> 			uint destChannels =   ((drgb << 16) | drgb) & evenMask;
> 			// do the blending
> 			uint blend_temp =
> 				(((sourceChannels - destChannels) * alpha) >> shift) + destChannels;
> 			// Now we move the middle channel from the high 16 bits, back into its 
> 			//   rightful place in the middle.  
> 			uint blend_dummy2 = 
> 			drgb = (blend_temp & (evenMask & 0x0000ffff)) | 
> 				  ((blend_temp & (evenMask & 0xffff0000)) >> 16 );
> 		}
> 		else
> 		{
> 			uint blend_dummy2 =
> 			drgb = 
> 				((((((srgb & evenMask)-(drgb & evenMask))  * alpha) >> shift) + drgb) & evenMask) |
> 				((((((srgb & oddMask )-(drgb & oddMask )) >> shift)  * alpha) + drgb) & oddMask);
> 		}
> 		static if ( RGBA == RGBA32 || RGBA == RGBA8_I32 ) // preserve alpha
> 			uint blend_dummy3 = drgb = (drgb & ~sourceAMask) | originalDestAlpha;
> 	}
> 	else
> 	{
> 		pragma(msg,"Invalid RGBA format for alpha blending.");
> 		static assert(0);
> 	}
> }
> private template write( uint RGBA, ubyte half16bpp = NOT_APPLICABLE )
> {
> 	static if ( RGBA == RGBA32 )
> 	{
> 		uint write_dummy1 = dest[di] = drgb;
> 	}
> 	else static if ( RGBA == RGB24 )
> 	{
> 		uint* address = cast(uint*)(dest + di);
> 		version ( BigEndian )
> 			uint write_dummy1 = *address = (drgb & 0xffffff00) | (drgbOriginal & 0x000000ff);
> 		else
> 			uint write_dummy1 = *address = (drgb & 0x00ffffff) | (drgbOriginal & 0xff000000);
> 	}
> 	else static if ( RGBA == RGB16_565 || RGBA == RGB16_555 )
> 	{
> 		// for selecting the lowest or highest pixel in terms of 
> 		//   address in memory rather than place in the word/register
> 		version ( BigEndian )
> 			const writeMask = 0x0000ffff;
> 		else
> 			const writeMask = 0xffff0000;
> 		static if ( half16bpp == HIGH_ADDRESS_HALF )
> 			uint write_dummy1 = dest[di] = (drgb & writeMask) | (drgbOriginal & ~writeMask);
> 		else static if ( half16bpp == LOW_ADDRESS_HALF )
> 			uint write_dummy1 = dest[di] = (drgb & ~writeMask) | (drgbOriginal & writeMask);
> 		else
> 			uint write_dummy1 = dest[di] = drgb;
> 	}
> 	// TODO:  writing RGBA8_I32.  needs an algo to reverse a 32 bpp value into
> 	//          the an 8 bit indexed value.  
> 	else
> 	{
> 		pragma(msg,"Invalid RGBA format for alpha blending.");
> 		static assert(0);
> 	}
> 	uint write_dummy2 = si = si + sourceIncrement;
> 	uint write_dummy3 = di = di + destIncrement;
> }
> private enum : ubyte
> {
> }
> private template innerLoop( uint sourceRGBA, uint destRGBA, 
>                             ubyte half16bpp = NOT_APPLICABLE )
> {
> 	static if ( !(destRGBA == RGB16_565 || destRGBA == RGB16_555) && half16bpp > 0 )
> 	{
> 		pragma(msg,"The half16bpp argument is only to be used when the "
> 		           "destination format is 16 bits per pixel.");
> 		static assert(0);
> 	}
> 	mixin read!(sourceRGBA,destRGBA,half16bpp);
> 	mixin convert!(sourceRGBA,destRGBA);
> 	mixin blend!(destRGBA);
> 	mixin write!(destRGBA,half16bpp);
> }
> private template calculatePaddingAndArrays( bool isSource )
> {
> 	static if ( isSource )
> 	{
> 		alias srcSurface surface;
> 		alias sourcebpp bpp;
> 		alias destbpp otherbpp;
> 	}
> 	else
> 	{
> 		alias dstSurface surface;
> 		alias destbpp bpp;
> 		alias sourcebpp otherbpp;
> 	}
> 	// Padding is the amount of extra data at the end of a scanline used to
> 	//   ensure that the end of the scanline lines up on a 32 bit boundary.
> 	// spadding = source padding
> 	// dpadding = dest padding
> 	// In this case, the units padding is measured in change depending on
> 	//   the source and destination format.  
> 	// The amount of data that is handled in each iteration also changes,
> 	//   and is reflected by the different types of arrays.  
> 	static if ( bpp == 32 )
> 	{
> 		auto padding = 0;
> 		uint[] pixelData = cast(uint[])surface.pixels;
> 	}
> 	else static if ( bpp == 24 )
> 	{
> 		// padding measured in bytes
> 		auto padding = surface.pitch - (surface.width * 3);
> 		ubyte* pixelData = surface.pixels.ptr;
> 	}
> 	else static if ( bpp == 16 )
> 	{
> 		static if ( otherbpp != 16 /+otherbpp == 32 || otherbpp == 24 || otherbpp == 8+/ )
> 		{
> 			// padding measured in shorts
> 			auto padding = (surface.pitch >> 1) - surface.width;
> 			ushort[] pixelData = cast(ushort[])surface.pixels;
> 		}
> 		else
> 		{
> 			auto padding = 0;
> 			uint[] pixelData = cast(uint[])surface.pixels;
> 		}
> 	}
> 	else static if ( bpp == 8 )
> 	{
> 		auto padding = surface.pitch - surface.width; // padding measured in bytes
> 		ubyte[] pixelData = surface.pixels;
> 	}
> 	else
> 		static assert(0);
> 	static if ( isSource )
> 	{
> 		alias padding spadding;
> 		alias pixelData source;
> 	}
> 	else
> 	{
> 		alias padding dpadding;
> 		alias pixelData dest;
> 	}
> }
> // This function shall do no clipping.  
> void blit( uint sourceRGBA, uint destRGBA )
> 		( short sourceX, short sourceY, 
> 		short destX, short destY, short width, short height, 
> 		inout Surface srcSurface, inout Surface dstSurface, 
> 		uint srcColor, uint alpha )
> {
> 	// this stuff just determines the bits per pixel of the source and 
> 	//   destination surfaces
> 	static if ( sourceRGBA == RGBA32 )
> 		const sourcebpp = 32;
> 	else static if ( sourceRGBA == RGB24 )
> 		const sourcebpp = 24;
> 	else static if ( sourceRGBA == RGB16_565 || sourceRGBA == RGB16_555 )
> 		const sourcebpp = 16;
> 	else
> 		const sourcebpp = 8;
> 	static if ( destRGBA == RGBA32 )
> 		const destbpp = 32;
> 	else static if ( destRGBA == RGB24 )
> 		const destbpp = 24;
> 	else static if ( destRGBA == RGB16_565 || destRGBA == RGB16_555 )
> 		const destbpp = 16;
> 	else
> 		const destbpp = 8;
> 	//
> 	static if ( (sourcebpp == 32 || sourcebpp == 24) && destbpp == 16 )
> 		const convert32to16 = true;
> 	else
> 		const convert32to16 = false;
> 	static if ( sourcebpp == 16 && (destbpp == 32 || destbpp == 24) )
> 		const convert16to32 = true;
> 	else
> 		const convert16to32 = false;
> 	static if ( (destRGBA == RGB16_565 || destRGBA == RGB16_555) && sourceRGBA != A8 )
> 		srcColor |= (srcColor << 16);
> 	static if ( destbpp == 16 )
> 		alpha >>= 3;
> 	// note that the padding quantities are necessarily zero if
> 	//   unitWidth = width / 2;
> 	//   that's important because they have different units of measurement!
> 	mixin calculatePaddingAndArrays!( true );
> 	mixin calculatePaddingAndArrays!( false );
> 	static if ( destbpp == 24 )
> 	{
> 		uint lineWidth = width * 3;
> 		static if ( sourcebpp == 24 )
> 		{
> 			// same as: unitSrcSurfaceWidth = srcSurface.width * 3;
> 			uint unitSrcSurfaceWidth = srcSurface.pitch - spadding;
> 			uint unitSrcWidth = lineWidth;
> 		}
> 		else
> 		{
> 			uint unitSrcSurfaceWidth = srcSurface.width;
> 			uint unitSrcWidth = width;
> 		}
> 		uint unitDstSurfaceWidth = dstSurface.pitch - dpadding;
> 		uint unitDstWidth = lineWidth;
> 	}
> 	else static if ( sourcebpp == 16 && destbpp == 16 )
> 	{
> 		uint lineWidth = width / 2; // because we do 2 pixels at a time
> 		// The +(width & 1) part is used to make the division round up.  
> 		uint unitSrcSurfaceWidth = (srcSurface.width / 2) + (srcSurface.width & 1);
> 		uint unitDstSurfaceWidth = (dstSurface.width / 2) + (dstSurface.width & 1);
> 		// The lineWidth variable rounds down on division, so it may be
> 		//   missing a pixel.  That is desirable since we don't want alphablend
> 		//   onto the pixel next to the missing pixel.  Of course, we will 
> 		//   handle the missing pixel individually, but it is still useful to 
> 		//   have access to a rounded-up version of the blit's width.  
> 		uint unitSrcWidth = lineWidth + (width & 1);
> 		uint unitDstWidth = unitSrcWidth;
> 	}
> 	else
> 	{
> 		uint lineWidth = width;
> 		uint unitSrcWidth = width;
> 		uint unitDstWidth = width;
> 		uint unitSrcSurfaceWidth = srcSurface.width;
> 		uint unitDstSurfaceWidth = dstSurface.width;
> 	}
> 	uint sourceAMask = srcSurface.alphaMask;
> 	version( SDL )
> 	{
> 		auto sourceSdlSurface = srcSurface.sdl_surface;
> 		if ( sourceSdlSurface !is null )
> 		{
> 			bool srcLocked = lock( sourceSdlSurface );
> 			scope(exit)
> 			{
> 				if ( srcLocked )
> 					SDL_UnlockSurface( sourceSdlSurface );
> 			}
> 		}
> 		auto destSdlSurface = srcSurface.sdl_surface;
> 		if ( destSdlSurface !is null )
> 		{
> 			bool dstLocked = lock( destSdlSurface );
> 			scope(exit)
> 			{
> 				if ( dstLocked )
> 					SDL_UnlockSurface( destSdlSurface );
> 			}
> 		}
> 	}
> 	static if ( sourcebpp == 24 )
> 		uint sourceIncrement = 3;
> 	else
> 		uint sourceIncrement = 1;
> 	static if ( destbpp == 24 )
> 		uint destIncrement = 3;
> 	else
> 		uint destIncrement = 1;
> 	// Since we are not necessarily blitting accross the entire width of the 
> 	//   destination surface or source surface, we have to skip some of the
> 	//   pixels on the end of the current scanline and on the beginning of
> 	//   the next scanline.  
> 	// Add that to the padding (which is explained above), and the result
> 	//   is these source/dest LineExtra variables.  
> 	int sLineExtra = unitSrcSurfaceWidth + spadding - unitSrcWidth;
> 	int dLineExtra = unitDstSurfaceWidth + dpadding - unitDstWidth;
> 	// initialize the index variables
> 	// si = source index
> 	// di = destination index
> 	int si = (sourceX * sourceIncrement) + (unitSrcSurfaceWidth * sourceY);
> 	int di = (destX   * destIncrement)   + (unitDstSurfaceWidth * destY);
> 	// nextLine is always ahead of di by the amount of pixels left in one line
> 	//   of the blit.  
> 	int nextLine;
> 	// endi is the index to stop at.  
> 	//int endi = destX + unitWidth + (unitDstSurfaceWidth + dpadding) * (destY + height);
> 	int endi = di + (unitDstSurfaceWidth * height);
> 	assert( lineWidth + dLineExtra == unitDstSurfaceWidth + dpadding );
> 	// TODO: remove this
> 	void writeHex ( char[] name, uint number )
> 	{
> 		writef( "(",name,std.string.toString( cast(ulong)number, cast(uint)16 ),")|" );
> 	}
> 	//
> 	uint srgb;
> 	uint drgb;
> 	static if ( sourceRGBA == A8 )
> 		srgb = srcColor;
> 	while( di < endi )
> 	{
> 		nextLine = di + lineWidth;
> 		static if ( convert32to16 || convert16to32 )
> 			mixin innerLoop!(sourceRGBA,destRGBA,HIGH_ADDRESS_HALF);
> 		while( di < nextLine )
> 		{
> 			mixin innerLoop!(sourceRGBA,destRGBA);
> 		}
> 		static if ( convert32to16 || convert16to32 )
> 			mixin innerLoop!(sourceRGBA,destRGBA,LOW_ADDRESS_HALF);
> 		si += sLineExtra;
> 		di += dLineExtra;
> 	}
> }
> version( SDL )
> {
> 	private bool lock( SDL_Surface* surface )
> 	{
> 		if ( SDL_MUSTLOCK( surface ) && !surface.locked )
> 		{
> 			safe_SDL_LockSurface( surface );
> 			return true;
> 		}
> 		return false;
> 	}
> 	// automatically throw errors resulting from the SDL_LockSurface function.
> 	private void safe_SDL_LockSurface( SDL_Surface* surface )
> 	{
> 		if ( SDL_LockSurface( surface ) != 0 )
> 		{
> 			char* sdlError = SDL_GetError();
> 			char[] error = sdlError[0..std.c.string.strlen(sdlError)];
> 			throw new Exception( "SDL_LockSurface failed to lock a surface: "~error );
> 		}
> 	}
> }
> struct Surface
> {
> 	ubyte[] pixels;
> 	uint alphaMask = 0;
> 	ushort width = 0xffff;
> 	ushort height = 0xffff;
> 	ushort pitch = 0xffff; /// width of a scanline in bytes.  
> 	ushort RGBAformat = INVALID;
> 	/// width and height are in pixels.
> 	static Surface opCall( ubyte[] pixels, uint alphaMask,
> 	                       ushort width, ushort height, ushort pitch,
> 	                       ushort RGBAformat )
> 	{
> 		Surface result;
> 		assert( pixels !is null );
> 		result.pixels = pixels;
> 		result.width = width;
> 		result.height = height;
> 		result.pitch = pitch;
> 		result.RGBAformat = RGBAformat;
> 		result.alphaMask = alphaMask;
> 		return result;
> 	}
> 	version ( SDL )
> 	{
> 		SDL_Surface* sdl_surface = null;
> 		static Surface opCall( SDL_Surface* surface, ushort RGBAformat )
> 		{
> 			Surface result;
> 			result.pixels = cast(ubyte[])surface.pixels[0.. surface.pitch * surface.h];
> 			result.width = surface.w;
> 			result.height = surface.h;
> 			result.pitch = surface.pitch;
> 			assert ( RGBAformat != INVALID );
> 			result.RGBAformat = RGBAformat;
> 			result.alphaMask = surface.format.Amask;
> 			result.sdl_surface = surface;
> 			return result;
> 		}
> 	}
> }

More information about the Digitalmars-d mailing list