Bug with align(1) and ulong
Jean-Baptiste Boric via D.gnu
d.gnu at puremagic.com
Thu Jun 26 07:50:52 PDT 2014
Hi there,
I'm a C/C++ programmer that decided to try D for a small
side-project (it boils down to extracting informations from a
NTFS partition).
After exhausting all others explanations, I think I found a bug
in gdc : when I use ulong fields in structs with align(1) they
are not properly aligned ; even worse, they "bleed" three bytes
onto the next field.
Here's the minimal test code :
---
import std.stdio;
import std.stream;
import std.conv;
//
// On-disk data structures
//
// NTFS BIOS parameter block
struct NTFS_BootSector_BPB {
align (1) {
ushort bytesPerSector;
ubyte sectorsPerCluster;
ubyte[7] _unused1;
ubyte mediaDescriptor;
ubyte[18] _unused2;
ulong totalSectors;
ulong logicalClusterNumberMFT;
ulong logicalClusterNumberMFTmir;
uint clustersPerMFTRecord;
uint clustersPerIndexBuffer;
ulong volumeSerialNumber;
ubyte[4] _unused3;
}
}
// NTFS boot sector
struct NTFS_BootSector {
align (1) {
ubyte[3] _unused1;
ubyte[8] oemID;
NTFS_BootSector_BPB bpb;
ubyte[426] _unused2;
ushort signature;
}
}
// NTFS BIOS parameter block 2
struct NTFS_BootSector_BPB2 {
align (1) {
ushort bytesPerSector;
ubyte sectorsPerCluster;
ubyte[7] _unused1;
ubyte mediaDescriptor;
ubyte[18] _unused2;
ubyte[8] totalSectors;
ubyte[8] logicalClusterNumberMFT;
ubyte[8] logicalClusterNumberMFTmir;
uint clustersPerMFTRecord;
uint clustersPerIndexBuffer;
ubyte[8] volumeSerialNumber;
ubyte[4] _unused3;
}
}
// NTFS boot sector
struct NTFS_BootSector2 {
align (1) {
ubyte[3] _unused1;
ubyte[8] oemID;
NTFS_BootSector_BPB2 bpb;
ubyte[426] _unused2;
ushort signature;
}
}
int main(char[][] args) {
if(args.length != 2) {
writeln("Usage: bug INPUT_FILE");
return -1;
}
Stream stream;
NTFS_BootSector bootSector;
NTFS_BootSector2 bootSector2;
stream = new BufferedFile(to!string(args[1]));
// Read boot sector
stream.seek(0, SeekPos.Set);
stream.readExact(cast(ubyte*)&bootSector, 512);
// Read boot sector again
stream.seek(0, SeekPos.Set);
stream.readExact(cast(ubyte*)&bootSector2, 512);
return 0;
}
---
And here's the results viewed with gdb (boot code cut for
brevety) :
---
(gdb) print /x bootSector
$2 = {
_unused1 = {0xeb, 0x52, 0x90},
oemID = {0x4e, 0x54, 0x46, 0x53, 0x20, 0x20, 0x20, 0x20},
bpb = {
bytesPerSector = 0x200,
sectorsPerCluster = 0x8,
_unused1 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
mediaDescriptor = 0xf8,
_unused2 = {0x0, 0x0, 0x3f, 0x0, 0xff, 0x0, 0x0, 0x28, 0x3,
0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0, 0x80, 0x0},
totalSectors = 0xc00000000000006,
logicalClusterNumberMFT = 0x20000000000,
logicalClusterNumberMFTmir = 0xf60000000000,
clustersPerMFTRecord = 0x100,
clustersPerIndexBuffer = 0x451adf00,
volumeSerialNumber = 0x96a04533a0,
_unused3 = {0x0, 0x0, 0x0, 0x0}
},
_unused2 = {0xfa...},
signature = 0xaa55
}
(gdb) print /x bootSector2
$3 = {
_unused1 = {0xeb, 0x52, 0x90},
oemID = {0x4e, 0x54, 0x46, 0x53, 0x20, 0x20, 0x20, 0x20},
bpb = {
bytesPerSector = 0x200,
sectorsPerCluster = 0x8,
_unused1 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
mediaDescriptor = 0xf8,
_unused2 = {0x0, 0x0, 0x3f, 0x0, 0xff, 0x0, 0x0, 0x28, 0x3,
0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0, 0x80, 0x0},
totalSectors = {0xd0, 0x14, 0xe0, 0x6, 0x0, 0x0, 0x0, 0x0},
logicalClusterNumberMFT = {0x0, 0x0, 0xc, 0x0, 0x0, 0x0, 0x0,
0x0},
logicalClusterNumberMFTmir = {0x2, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0},
clustersPerMFTRecord = 0x100,
clustersPerIndexBuffer = 0x451adf00,
volumeSerialNumber = {0xdf, 0x1a, 0x45, 0xa0, 0x33, 0x45,
0xa0, 0x96},
_unused3 = {0x0, 0x0, 0x0, 0x0}
},
_unused2 = {0xfa...},
signature = 0xaa55
}
---
Even though both structures have exactly the same memory layout
in theory, the values of totalSectors, logicalClusterNumberMFT,
logicalClusterNumberMFTmir and volumeSerialNumber are different
(shifted 3 bytes).
Worse, when I hex-modify the file to have byte 0x52 equals to
0xFF (_unused3 becomes {0x0, 0x0, 0xff, 0x0}), this is what
happens :
---
(gdb) print /x bootSector
$4 = {
_unused1 = {0xeb, 0x52, 0x90},
oemID = {0x4e, 0x54, 0x46, 0x53, 0x20, 0x20, 0x20, 0x20},
bpb = {
bytesPerSector = 0x200,
sectorsPerCluster = 0x8,
_unused1 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
mediaDescriptor = 0xf8,
_unused2 = {0x0, 0x0, 0x3f, 0x0, 0xff, 0x0, 0x0, 0x28, 0x3,
0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0, 0x80, 0x0},
totalSectors = 0xc00000000000006,
logicalClusterNumberMFT = 0x20000000000,
logicalClusterNumberMFTmir = 0xf60000000000,
clustersPerMFTRecord = 0x100,
clustersPerIndexBuffer = 0x451adf00,
volumeSerialNumber = 0xff000096a04533a0,
_unused3 = {0x0, 0x0, 0xff, 0x0}
},
_unused2 = {0xfa...},
signature = 0xaa55
}
(gdb) print /x bootSector2
$5 = {
_unused1 = {0xeb, 0x52, 0x90},
oemID = {0x4e, 0x54, 0x46, 0x53, 0x20, 0x20, 0x20, 0x20},
bpb = {
bytesPerSector = 0x200,
sectorsPerCluster = 0x8,
_unused1 = {0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0},
mediaDescriptor = 0xf8,
_unused2 = {0x0, 0x0, 0x3f, 0x0, 0xff, 0x0, 0x0, 0x28, 0x3,
0x0, 0x0, 0x0, 0x0, 0x0, 0x80, 0x0, 0x80, 0x0},
totalSectors = {0xd0, 0x14, 0xe0, 0x6, 0x0, 0x0, 0x0, 0x0},
logicalClusterNumberMFT = {0x0, 0x0, 0xc, 0x0, 0x0, 0x0, 0x0,
0x0},
logicalClusterNumberMFTmir = {0x2, 0x0, 0x0, 0x0, 0x0, 0x0,
0x0, 0x0},
clustersPerMFTRecord = 0x100,
clustersPerIndexBuffer = 0x451adf00,
volumeSerialNumber = {0xdf, 0x1a, 0x45, 0xa0, 0x33, 0x45,
0xa0, 0x96},
_unused3 = {0x0, 0x0, 0xff, 0x0}
},
_unused2 = {0xfa...},
signature = 0xaa55
}
---
In the bootSector struct, the volumeSerialNumber field "bled"
onto _unused3. This is not limited to gdb, the values are also
wrong when I try to use them in the D code...
I'm using gdc (Debian 4.6.3-2) 4.6.3 and gdb (GDB) 7.4.1-debian.
I thoroughly checked everything, and I'm all out of rational
explanations. I'm terribly sorry in advance if I missed something
totally obvious :-)
More information about the D.gnu
mailing list