Bitfield accessors
bearophile
bearophileHUGS at lycos.com
Tue Nov 20 17:03:32 PST 2007
Jarrett Billingsley:
> Cool :)
But later I have changed it again. I have changed the first part of the Bitfield code to this, because sometimes you don't need to access the whole data:
template Bitfield(alias data, Args...) {
static assert(!(Args.length & 1), "Bitfield arguments must be an even number");
static assert(data.sizeof*8 >= _SumLens!(Args),
"The sum of bit fields is bigger than data.sizeof*8");
const char[] Bitfield = _BitfieldShim!((typeof(data)).stringof, data, Args).Ret;
}
private template _SumLens(Args...) {
static if(Args.length == 0)
const uint _SumLens = 0;
else {
static assert(Args[1] > 0, "Bitfield '" ~ Args[0] ~ "' is <= 0");
const uint _SumLens = Args[1] + _SumLens!(Args[2 .. $]);
}
}
> If you're using it in a struct, I'd be very surprised if the call weren't
> optimized out, in which case I'd expect the performance to be about the same
> as C bitfields.
The calls become inlined, but the results may surprise you anyway :-)
For the speed tests I have used this simple D code on a very old PC:
import std.stdio;
// Bitfield code here...
struct INTORFLOAT {
uint i;
mixin(Bitfield!(i, "sign", 1, "biasedexponent", 8, "significand", 23));
}
void main() {
ulong tot;
INTORFLOAT f;
const N = 25;
uint v[N] = [1628676761, 1620574103, 1237153253, 1098880307, 87513741,
13181925, 14686126, 7429435, 16286706, 6474381,
4879794, 7734725, 3745958, 13353858, 4236193,
7587, 4309, 28846, 7313, 14516,
126, 143, 171, 221, 156];
for(int j = 0; j < 10_000_000; j++)
for(int i = 0; i < N; i++) {
f.i = v[i];
f.biasedexponent = f.biasedexponent / 2;
tot += f.biasedexponent + f.sign;
//printf("%d %d\n", f.i, tot);
}
printf("%d\n", tot);
}
The C code:
#include <stdio.h>
#define N 25
typedef union {
unsigned int i;
struct {
unsigned int sign: 1;
unsigned int biasedexponent: 8;
unsigned int significand: 23;
} bits;
} INTORFLOAT;
int main(void) {
int i, j;
unsigned long long tot = 0;
INTORFLOAT f;
unsigned int v[N] = {1628676761, 1620574103, 1237153253, 1098880307, 87513741,
13181925, 14686126, 7429435, 16286706, 6474381,
4879794, 7734725, 3745958, 13353858, 4236193,
7587, 4309, 28846, 7313, 14516,
126, 143, 171, 221, 156};
for(j = 0; j < 10000000; j++)
for(i = 0; i < N; i++) {
f.i = v[i];
f.bits.biasedexponent = f.bits.biasedexponent / 2; // line1
tot += f.bits.biasedexponent + f.bits.sign; // line2
//printf("%d %d\n", f.i, tot);
}
printf("%d\n", tot);
}
I have compiled them with:
DMD: v1.023
dmd -O -inline -release bitfields1.d
gcc: V. 3.4.2 (mingw-special)
gcc -O3 -s bitfields1.c -o bitfields1
Bitfield produces the following getters and setters:
public uint sign() { return (i >> 0x0) & 0x1; }
public void sign(uint val) { i = (i & 0xfffffffffffffffe) | ((val & 0x1) << 0x0); }
public uint biasedexponent() { return (i >> 0x1) & 0xff; }
public void biasedexponent(uint val) { i = (i & 0xfffffffffffffe01) | ((val & 0xff) << 0x1); }
public uint significand() { return (i >> 0x9) & 0x7fffff; }
public void significand(uint val) { i = (i & 0x1ff) | ((val & 0x7fffff) << 0x9); }
The C code runs about 2.5 times faster than the D code (on a faster CPU the difference is probably smaller). (The numerical output of the code is fortunately the same!). I have tested that the methods (member functions) actually become inlined.
Removing line1 and line2 from both the C and D code the running time becomes (much smaller and) equal between D and C, so I presume the D slowdown is just in the reading/writing of the bit fields.
The Bitfield code can be improved (but such improvement of the templates has resulted hairy, because the code has to adapt to data of different sizeof) to produce a method like this:
public void biasedexponent(uint val) { i = (i & 0xfffffe01) | ((val & 0xff) << 0x1); }
With that the running time of the D code becomes just about 2 times the C code.
Just for reference the ASM code of the following C code:
#include <stdio.h>
#define N 25
typedef union {
unsigned int i;
struct {
unsigned int sign: 1;
unsigned int biasedexponent: 8;
unsigned int significand: 23;
} bits;
} INTORFLOAT;
int main(void) {
int i;
unsigned long tot = 0;
INTORFLOAT f;
unsigned int v[N] = {1628676761, 1620574103, 1237153253, 1098880307, 87513741,
13181925, 14686126, 7429435, 16286706, 6474381,
4879794, 7734725, 3745958, 13353858, 4236193,
7587, 4309, 28846, 7313, 14516,
126, 143, 171, 221, 156};
for(i = 0; i < N; i++) {
f.i = v[i];
f.bits.biasedexponent = f.bits.biasedexponent / 2; // line1
tot += f.bits.biasedexponent + f.bits.sign; // line2
}
printf("%d\n", tot);
}
Produced by:
gcc -O3 -S bitfields2.c -o bitfields2.asm
Is:
.file "bitfields2.c"
.def ___main; .scl 2; .type 32; .endef
.data
.align 4
LC0:
.long 1628676761
.long 1620574103
.long 1237153253
.long 1098880307
.long 87513741
.long 13181925
.long 14686126
.long 7429435
.long 16286706
.long 6474381
.long 4879794
.long 7734725
.long 3745958
.long 13353858
.long 4236193
.long 7587
.long 4309
.long 28846
.long 7313
.long 14516
.long 126
.long 143
.long 171
.long 221
.long 156
.section .rdata,"dr"
LC1:
.ascii "%d\12\0"
.text
.p2align 4,,15
.globl _main
.def _main; .scl 2; .type 32; .endef
_main:
pushl %ebp
movl $16, %eax
movl %esp, %ebp
pushl %ebx
subl $132, %esp
andl $-16, %esp
call __alloca
call ___main
movl $100, %ecx
leal -120(%ebp), %eax
movl $LC0, %edx
movl %ecx, 8(%esp)
xorl %ebx, %ebx
movl %edx, 4(%esp)
movl %eax, (%esp)
call _memcpy
xorl %ecx, %ecx
.p2align 4,,15
L5:
movl -120(%ebp,%ecx,4), %edx
incl %ecx
movl %edx, %eax
shrl $2, %eax
andl $1, %edx
andl $127, %eax
addl %edx, %eax
addl %eax, %ebx
cmpl $24, %ecx
jle L5
movl %ebx, 4(%esp)
movl $LC1, (%esp)
call _printf
movl -4(%ebp), %ebx
leave
ret
.def _memcpy; .scl 3; .type 32; .endef
.def _printf; .scl 3; .type 32; .endef
I have no good way to see the ASM generated by dmd yet (I may buy Walter's utility). (Maybe some of you can show that asm, but remember to remove the outer j loop as in the C version).
My obvious conclusion is that I hope to see built-in bitfields in D ;-)
The D docs say they aren't used often, but now and then I translate C code to D and I don't want to write many (slow) getter and setter properties like that :-)
Bye,
bearophile
More information about the Digitalmars-d-learn
mailing list