Cross module inlining in runtime
Artur Skawina
art.08.09 at gmail.com
Tue Jan 10 15:38:44 PST 2012
On 01/11/12 00:30, Iain Buclaw wrote:
> On 10 January 2012 19:49, Artur Skawina <art.08.09 at gmail.com> wrote:
>>> I have porting the runtime/phobos asms to gcc asm on my to-do list, will
>>> try to get to that within two weeks. What would be the preferred way -
>>> version() guards? if yes - what version? Or would you prefer replacing
>>> the asms, if the changes are not going to be merged upstream anyway?
>>
>> So i decided to start with this today. As i have a case where turning on
>> logging increases a programs run time from seconds to hours, while it
>> spends most of the time in the GC, thought gcbits would be a good place
>> to start.
>>
>> But after adding gdc asm support to GCBits.testClear() the only thing that
>> changed was this:
>>
>> XXXXXXXX <uint gc.gcbits.GCBits.testClear(uint)>:
>> push %eRX
>> mov %eRX,%eRX
>> mov XX(%eRX),%eRX
>> - push %eRX
>> - mov %eRX,%eRX
>> - shr $0x5,%eRX
>> - lea XX(,%eRX,4),%eRX
>> - mov XX(%eRX),%eRX
>> - add (%eRX),%eRX
>> - mov $0x1,%eRX
>> - shl %Rl,%eRX
>> - mov %eRX,%eRX
>> mov (%eRX),%eRX
>> - not %eRX
>> - and %eRX,%eRX
>> - and %eRX,%eRX
>> - mov %eRX,(%eRX)
>> - pop %eRX
>> + mov XX(%eRX),%eRX
>> + btr %eRX,XX(%eRX)
>> + sbb %eRX,%eRX
>> pop %eRX
>> ret
>>
>> OK, the function turned into ~ three instructions, good, but why didn't it
>> then get inlined into any of the callers? Trying to force things with an
>> attribute turned up this:
>>
>
> four instructions. :~)
I'm already imagining the inlined case, where the "mov" could be free. :)
>> ../../../libphobos/gc/gcx.d: In member function 'gc.gcx.Gcx.fullcollect':
>> BUILD32/gdc/dev/gcc-4.6.1/libphobos/gc/gcbits.d:119:0: sorry, unimplemented: inlining failed in call to 'testClear': function body not available
>> ../../../libphobos/gc/gcx.d:2647:0: sorry, unimplemented: called from here
>> BUILD32/gdc/dev/gcc-4.6.1/libphobos/gc/gcbits.d:119:0: sorry, unimplemented: inlining failed in call to 'testClear': function body not available
>> ../../../libphobos/gc/gcx.d:2729:0: sorry, unimplemented: called from here
>> make[3]: *** [gc/gcx.o] Error 1
>>
>> Any way to make this work? Much of the asm gains will be lost when the code
>> isn't inlined.
> How is the function written?
--- druntime/gc/gcbits.d.org 2012-01-10 19:56:11.580039157 +0100
+++ druntime/gc/gcbits.d 2012-01-10 23:19:50.046264596 +0100
@@ -29,7 +29,10 @@ version (DigitalMars)
}
else version (GNU)
{
- // use the unoptimized version
+ version(X86)
+ version = GNU_Asm_x86;
+ version(X86_64)
+ version = GNU_Asm_x86;
}
else version (D_InlineAsm_X86)
{
@@ -115,6 +118,7 @@ struct GCBits
data[1 + (i >> BITS_SHIFT)] &= ~(BITS_1 << (i & BITS_MASK));
}
+ //pragma(attribute, always_inline)
wordtype testClear(size_t i)
{
version (bitops)
@@ -133,6 +137,19 @@ struct GCBits
ret 4 ;
}
}
+ else version (GNU_Asm_x86)
+ {
+ wordtype result = void;
+ asm
+ {
+ "btr %2, %0; sbb %1,%1"
+ : "+m" *(data+1), "=r" result
+ : "Ir" i
+ : "memory" ;
+
+ }
+ return result;
+ }
else
{
//result = (cast(bit *)(data + 1))[i];
@@ -164,6 +181,19 @@ struct GCBits
ret 4 ;
}
}
+ else version (GNU_Asm_x86)
+ {
+ wordtype result = void;
+ asm
+ {
+ "bts %2, %0; sbb %1,%1"
+ : "+m" *(data+1), "=r" result
+ : "Ir" i
+ : "memory" ;
+
+ }
+ return result;
+ }
else
{
//result = (cast(bit *)(data + 1))[i];
More information about the D.gnu
mailing list