/** * Custom Float Mark 2 */ module customfloat; import std.bitmanip; import std.traits; import std.typetuple; import std.intrinsic; import std.traits; import std.math; import std.contracts; import std.stdio; import std.conv; debug { /// Converts an unsigend integral to string containing its binary representation. auto inBinary(T)(T v, T m = T.max) if(is(Unsigned!T == T)) { auto f = bsf(m); auto r = bsr(m) + 1; auto output = new char[r - f]; v >>= f; foreach_reverse(ref c;output) { c = v&1 ? '1' : '0'; v >>= 1; } return output; } } /// 64-bit version of std.intrinsic.bsf private int bsf(ulong value) { union Ulong { ulong raw; struct { uint low; uint high; } } Ulong v; v.raw = value; return v.low!=0 ? std.intrinsic.bsf(v.low) : std.intrinsic.bsf(v.high) + 32; } /// 64-bit version of std.intrinsic.bsr private int bsr(ulong value) { union Ulong { ulong raw; struct { uint low; uint high; } } Ulong v; v.raw = value; return v.high==0 ? std.intrinsic.bsr(v.low) : std.intrinsic.bsr(v.high) + 32; } /// Format flags for CustomFloat. public enum CustomFloatFlags { /// Adds a sign bit to allow for signed numbers. signed = 1, /** * Store values in normalized form by default. The actual precision of the * significand is extended by 1 bit by assuming an implicit leading bit of 1 * instead of 0. i.e. $(D 1.nnnn) instead of $(D 0.nnnn). * True for all $(LUCKY IEE754) types */ storeNormalized = 2, /** * Stores the significand in $(LUCKY IEEE754 denormalized) form when the * exponent is 0. Required to express the value 0. */ allowDenorm = 4, /// Allows the storage of $(LUCKY IEEE754 _infinity) values. infinity = 8, /// Allows the storage of $(LUCKY IEEE754 Not a Number) values. nan = 16, /** * If set, select an exponent bias such that max_exp = 1. * i.e. so that the maximum value is >= 1.0 and < 2.0. * Ignored if the exponent bias is manually specified. */ probability = 32, /// If set, unsigned custom floats are assumed to be negative. negativeUnsigned = 64, /// If set, 0 is the only allowed $(LUCKY IEEE754 denormalized) number. Requires allowDenorm and storeNormalized. allowDenormZeroOnly = 128 | allowDenorm | storeNormalized, /// Include _all of the $(LUCKY IEEE754) options. ieee = signed | storeNormalized | allowDenorm | infinity | nan , /// Include none of the above options. none = 0 } /** * Allows user code to define custom floating-point formats. These formats are * for storage only; all operations on them are performed by first implicitly * extracting them to $(D real) first. After the operation is completed the * result can be stored in a custom floating-point value via assignment. * * Example: * ---- * // Define a 16-bit floating point values * CustomFloat!16 x; // Using the number of bits * CustomFloat!(10, 5) y; // Using the precision and exponent width * CustomFloat!(10, 5,CustomFloatFlags.ieee) z; // Using the precision, exponent width and format flags * CustomFloat!(10, 5,CustomFloatFlags.ieee, 15) w; // Using the precision, exponent width, format flags and exponent offset bias * * // Use the 16-bit floats mostly like normal numbers * w = x*y - 1; * writeln(w); * * // Functions calls require conversion * z = sin(+x) + cos(+y); // Use uniary plus to concisely convert to a real * z = sin(x.re) + cos(y.re); // Or use the .re property to convert to a real * z = sin(x.get!float) + cos(y.get!float); // Or use get!T * z = sin(cast(float)x) + cos(cast(float)y); // Or use cast(T) to explicitly convert * * // Define a 8-bit custom float for storing probabilities * alias CustomFloat!(4, 4, CustomFloatFlags.ieee^CustomFloatFlags.probability^CustomFloatFlags.signed ) Probability; * auto p = Probability(0.5); * ---- */ template CustomFloat(uint bits) if( bits == 8 || bits == 16 || bits == 32 || bits == 64 || bits == 80) { static if(bits == 8) alias CustomFloat!( 4, 3) CustomFloat; static if(bits == 16) alias CustomFloat!(10, 5) CustomFloat; static if(bits == 32) alias CustomFloat!(23, 8) CustomFloat; static if(bits == 64) alias CustomFloat!(52,11) CustomFloat; static if(bits == 80) alias CustomFloat!(64,15,CustomFloatFlags.ieee^CustomFloatFlags.storeNormalized) CustomFloat; } ///ditto template CustomFloat(uint precision,uint exponentWidth,CustomFloatFlags flags = CustomFloatFlags.ieee) if( ( (flags & flags.signed) + precision + exponentWidth) % 8 == 0 && precision + exponentWidth > 0) { alias CustomFloat!(precision,exponentWidth,flags, (1 << (exponentWidth - ((flags&flags.probability)==0) )) - ((flags&(flags.nan|flags.infinity))!=0) - ((flags&flags.probability)!=0) ) CustomFloat; // ((flags&CustomFloatFlags.probability)==0) } ///ditto struct CustomFloat( uint precision, // fraction bits (23 for float) uint exponentWidth, // exponent bits (8 for float) Exponent width CustomFloatFlags flags, uint bias) if(( (flags & flags.signed) + precision + exponentWidth) % 8 == 0 && precision + exponentWidth > 0) { private: template uType(uint bits) { /// get the correct unsigned bitfield type to support > 32 bits static if(bits <= size_t.sizeof*8) alias size_t uType; else alias ulong uType; } template sType(uint bits) { /// get the correct signed bitfield type to support > 32 bits static if(bits <= ptrdiff_t.sizeof*8-1) alias ptrdiff_t sType; else alias long sType; } alias uType!precision T_sig; /// Simplify casting code alias uType!exponentWidth T_exp; /// ditto alias sType!exponentWidth T_signed_exp; /// ditto alias CustomFloatFlags Flags; /// Facilitate converting numeric types to custom float union ToBinary(F) if(is(CustomFloat!(F.sizeof*8))) { F set; CustomFloat!(F.sizeof*8) get; // Convert F to the correct binary type. static typeof(get) opCall(F value) { ToBinary r; r.set = value; return r.get; } alias get this; } /// Perform IEEE rounding with round to nearest detection void roundedShift(T,U)(ref T sig, U shift) { if( sig << (T.sizeof*8 - shift) == cast(T) 1uL << (T.sizeof*8 - 1) ) { // round to even sig >>= shift; sig += sig & 1; } else { sig >>= shift - 1; sig += sig & 1; // Perform standard rounding sig >>= 1; } } /// Convert the current value to signed exponent, normalized form void toNormalized(T,U)(ref T sig, ref U exp) { sig = significand; auto shift = (T.sizeof*8) - precision; exp = exponent; static if(flags&(Flags.infinity|Flags.nan)) { // Handle inf or nan if(exp == exponent_max) { exp = exp.max; sig <<= shift; static if(flags&Flags.storeNormalized) { // Save inf/nan in denormalized format sig >>= 1; sig += cast(T) 1uL << (T.sizeof*8 - 1); } return; } } if( (~flags&Flags.storeNormalized) || // Convert denormalized form to normalized form ((flags&Flags.allowDenorm)&&(exp==0)) ){ if(sig > 0) { auto shift2 = precision - bsr(sig); exp -= shift2-1; shift += shift2; } else { // value = 0.0 exp = exp.min; return; } } sig <<= shift; exp -= bias; } /// Set the current value from signed exponent, normalized form void fromNormalized(T,U)(ref T sig, ref U exp) { auto shift = (T.sizeof*8) - precision; if(exp == exp.max) { // infinity or nan exp = exponent_max; static if(flags & Flags.storeNormalized) sig <<= 1; // convert back to normalized form static if(~flags & Flags.infinity) // No infinity support? enforce(sig != 0,"Infinity floating point value assigned to a "~typeof(this).stringof~" (no infinity support)."); static if(~flags & Flags.nan) // No NaN support? enforce(sig == 0,"NaN floating point value assigned to a "~typeof(this).stringof~" (no nan support)."); sig >>= shift; return; } if(exp == exp.min){ // 0.0 exp = 0; sig = 0; return; } exp += bias; if( exp <= 0 ) { static if( ( flags&Flags.allowDenorm) || // Convert from normalized form to denormalized (~flags&Flags.storeNormalized) ) { shift += -exp; roundedShift(sig,1); sig += cast(T) 1uL << (T.sizeof*8 - 1); // Add the leading 1 exp = 0; } else enforce( (flags&Flags.storeNormalized) && exp == 0, "Underflow occured assigning to a "~typeof(this).stringof~" (no denormal support)."); } else { static if(~flags&Flags.storeNormalized) { // Convert from normalized form to denormalized roundedShift(sig,1); sig += cast(T) 1uL << (T.sizeof*8 - 1); // Add the leading 1 } } if(shift > 0) roundedShift(sig,shift); if(sig > significand_max) { // handle significand overflow (should only be 1 bit) static if(~flags&Flags.storeNormalized) { sig >>= 1; } else sig &= significand_max; exp++; } static if((flags&Flags.allowDenormZeroOnly)==Flags.allowDenormZeroOnly) { // disallow non-zero denormals if(exp == 0) { sig <<= 1; if(sig > significand_max && (sig&significand_max) > 0 ) // Check and round to even exp++; sig = 0; } } if(exp >= exponent_max ) { static if( flags&(Flags.infinity|Flags.nan) ) { sig = 0; exp = exponent_max; static if(~flags&(Flags.infinity)) enforce( false, "Overflow occured assigning to a "~typeof(this).stringof~" (no infinity support)."); } else enforce( exp == exponent_max, "Overflow occured assigning to a "~typeof(this).stringof~" (no infinity support)."); } } public: static if( precision == 64 ) { // CustomFloat!80 support hack ulong significand; enum ulong significand_max = ulong.max; mixin(bitfields!( T_exp , "exponent", exponentWidth, bool , "sign" , flags & flags.signed )); } else { mixin(bitfields!( T_sig, "significand", precision, T_exp, "exponent" , exponentWidth, bool , "sign" , flags & flags.signed )); } static if (flags & Flags.infinity) static @property CustomFloat infinity() { /// Returns: infinity value CustomFloat value; static if (flags & Flags.signed) value.sign = 0; value.significand = 0; value.exponent = exponent_max; return value; } static if (flags & Flags.nan) static @property CustomFloat nan() { /// Returns: NaN value CustomFloat value; static if (flags & Flags.signed) value.sign = 0; value.significand = cast(typeof(significand_max)) 1L << (precision-1); value.exponent = exponent_max; return value; } static @property size_t dig(){ /// Returns: number of decimal digits of precision return cast(size_t) log10( 1uL << precision - (flags&Flags.storeNormalized != 0)); } static @property CustomFloat epsilon() { /// Returns: smallest increment to the value 1 CustomFloat value; static if (flags & Flags.signed) value.sign = 0; T_signed_exp exp = -precision; T_sig sig = 0; value.fromNormalized(sig,exp); if(exp == 0 && sig == 0) { // underflowed to zero static if((flags&Flags.allowDenorm) || (~flags&Flags.storeNormalized)) sig = 1; else sig = cast(T) 1uL << (precision - 1); } value.exponent = cast(value.T_exp) exp; value.significand = cast(value.T_sig) sig; return value; } enum mant_dig = precision + (flags&Flags.storeNormalized != 0); /// the number of bits in mantissa static @property int max_10_exp(){ return cast(int) log10( +max ); } /// Returns: maximum int value such that 10max_10_exp is representable enum max_exp = exponent_max-bias+((~flags&(Flags.infinity|flags.nan))!=0); /// maximum int value such that 2max_exp-1 is representable static @property int min_10_exp(){ return cast(int) log10( +min_normal ); } /// Returns: minimum int value such that 10min_10_exp is representable enum min_exp = cast(T_signed_exp)-bias +1+ ((flags&Flags.allowDenorm)!=0); /// minimum int value such that 2min_exp-1 is representable as a normalized value static @property CustomFloat max() { /// Returns: largest representable value that's not infinity CustomFloat value; static if (flags & Flags.signed) value.sign = 0; value.exponent = exponent_max - ((flags&(flags.infinity|flags.nan)) != 0); value.significand = significand_max; return value; } static @property CustomFloat min_normal() { /// Returns: smallest representable normalized value that's not 0 CustomFloat value; static if (flags & Flags.signed) value.sign = 0; value.exponent = 1; static if(flags&Flags.storeNormalized) value.significand = 0; else value.significand = cast(T_sig) 1uL << (precision - 1);; return value; } @property CustomFloat re() { return this; } /// Returns: real part static @property CustomFloat im() { return CustomFloat(0.0f); } /// Returns: imaginary part this(F)(F input) if (__traits(compiles, cast(real)input )) { this = input; }/// Initialize from any $(D real) compatible type. /// Self assignment void opAssign(F:CustomFloat)(F input) { static if (flags & Flags.signed) sign = input.sign; exponent = input.exponent; significand = input.significand; } /// Assigns from any $(D real) compatible type. void opAssign(F)(F input) if (__traits(compiles, cast(real)input )) { static if( staticIndexOf!(Unqual!F, float, double, real) >= 0 ) auto value = ToBinary!(Unqual!F)(input); else auto value = ToBinary!(real )(input); // Assign the sign bit static if (~flags & Flags.signed) enforce( (!value.sign)^((flags&flags.negativeUnsigned)>0) ,"Incorrectly signed floating point value assigned to a "~typeof(this).stringof~" (no sign support)."); else sign = value.sign; CommonType!(T_signed_exp ,value.T_signed_exp ) exp = value.exponent; // Assign the exponent and fraction CommonType!(T_sig, value.T_sig ) sig = value.significand; value.toNormalized(sig,exp); fromNormalized(sig,exp); assert(exp <= exponent_max, text(typeof(this).stringof~" exponent too large: " ,exp," > ",exponent_max, "\t",input,"\t",sig) ); assert(sig <= significand_max, text(typeof(this).stringof~" significand too large: ",sig," > ",significand_max,"\t",input,"\t",exp," ",exponent_max) ); exponent = cast(T_exp) exp; significand = cast(T_sig) sig; } /// Fetches the stored value either as a $(D float), $(D double) or $(D real). @property F get(F)() if (staticIndexOf!(Unqual!F, float, double, real) >= 0) { ToBinary!F result; static if (flags&Flags.signed) result.sign = sign; else result.sign = (flags&flags.negativeUnsigned) > 0; CommonType!(T_signed_exp ,result.get.T_signed_exp ) exp = exponent; // Assign the exponent and fraction CommonType!(T_sig, result.get.T_sig ) sig = significand; toNormalized(sig,exp); result.fromNormalized(sig,exp); assert(exp <= result.exponent_max, text("get exponent too large: " ,exp," > ",result.exponent_max) ); assert(sig <= result.significand_max, text("get significand too large: ",sig," > ",result.significand_max) ); result.exponent = cast(result.get.T_exp) exp; result.significand = cast(result.get.T_sig) sig; return result.set; } ///ditto T opCast(T)() if (__traits(compiles, get!T )) { return get!T; } /// Convert the CustomFloat to a real and perform the relavent operator on the result real opUnary(string op)() if( __traits(compiles, mixin(op~`(get!real)`)) || op=="++" || op=="--" ){ static if(op=="++" || op=="--") { auto result = get!real; this = mixin(op~`result`); return result; } else return mixin(op~`get!real`); } real opBinary(string op,T)(T b) if( __traits(compiles, mixin(`get!real`~op~`b`) ) ) { /// ditto return mixin(`get!real`~op~`b`); } real opBinaryRight(string op,T)(T a) if( __traits(compiles, mixin(`a`~op~`get!real`) ) && /// ditto !__traits(compiles, mixin(`get!real`~op~`b`) ) ) { return mixin(`a`~op~`get!real`); } int opCmp(T)(auto ref T b) if(__traits(compiles, cast(real)b ) ) { /// ditto auto x = get!real; auto y = cast(real) b; return (x>=y)-(x<=y); } void opOpAssign(string op, T)(auto ref T b) if ( __traits(compiles, mixin(`get!real`~op[0..$-1]~`cast(real)b`))) { /// ditto return mixin(`this = this `~op[0..$-1]~` cast(real)b`); } string toString() { return to!string(get!real); } /// ditto }