dcompute is nearing minimal functionality
Nicholas Wilson via Digitalmars-d
digitalmars-d at puremagic.com
Wed Jul 13 20:30:44 PDT 2016
DCompute is my effort to target CUDA and SPIR to enable hassle
free native execution on the gpu.
It is a D library +modification of LDC and is available at
https://github.com/thewilsonator/ldc/tree/dcompute
The compiler is nearing minimal functionality and is able to
generate code for cuda's pxt format.
Required to build is Khronos's llvm
https://github.com/KhronosGroup/SPIRV-LLVM
and the usual build requirements of LDC,i.e. a dmd compatible d
compiler, cmake.
Post cmake configuration and generation you will possibly need to
add -L-lLLVMSPIRVLib to the (l|g)dmd invocation.
The code at the moment is very hacky and very hardcoded, with a
lot of magic, and currently requires a manual invocation of llc,
but hopefully this will improve.
$car pointer.d
@compute module dcompute.types.pointer;
import ldc.attributes;
enum Private = 0;
enum Global = 1;
enum Shared = 2;
enum Constant = 3;
enum Generic = 4;
@kernel //magic attribute
void xdfgcmain(Pointer!(1,float) a,Pointer!(1,float) b , float p)
{
*a = *b + p;
}
struct compute {} //really hacky and somewhat magic
pure: @trusted: nothrow: @nogc:
struct Pointer(uint p, T) if(p <= Generic) //magic type
{
T* ptr;
alias ptr this;
}
$../ldcbuild/bin/ldc2 pointer.d -output-ll -O1
$llc -mcpu=sm_20 gpusuff_ptx.ll -o kernel.ptx
$cat kernel.ptx
//
// Generated by LLVM NVPTX Back-End
//
.version 3.2
.target sm_20
.address_size 64
//
.globl _D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv
//
@_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv
.visible .entry
_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv(
.param .align 8 .b8
_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv_param_0[8],
.param .align 8 .b8
_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv_param_1[8],
.param .f32
_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv_param_2
)
{
.reg .f32 %f<4>;
.reg .s64 %rd<3>;
// BB#0:
ld.param.u64 %rd1,
[_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv_param_0];
ld.param.u64 %rd2,
[_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv_param_1];
ld.param.f32 %f1,
[_D8dcompute5types7pointer9xdfgcmainFS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerS8dcompute5types7pointer18__T7PointerVki1TfZ7PointerfZv_param_2];
ld.global.f32 %f2, [%rd2];
add.rn.f32 %f3, %f2, %f1;
st.global.f32 [%rd1], %f3;
ret;
}
More information about the Digitalmars-d
mailing list