SSE asm with functions

Byron bheads at emich.edu
Wed Jun 15 13:05:32 PDT 2011


In the attached file xmm.d I have a function xnormal that takes a vector ( alias float[4] )  an computes the
unit vector.  The SSE code seems to work fine, but it keeps returning [nan, nan, nan, nan ] and the
writeln prints the same. But if I change the return from r ( output vector ) to v ( input vector ) it prints
the correct normal vector, and returns the input vector.  Is this my bug or a compiler bug?
DMD32 v2.053  OS X

const(vector) xnormal( ref const(vector) v )
{
    vector r;
    asm
    {
        mov EAX, v;
        movups XMM0, [EAX]; //load vector
        movaps XMM2, XMM0; // copy original data

        // find x^2 + y^2 + z^2 + w^2
        mulps XMM0, XMM0; // xx, yy, zz, ww
        movaps XMM1, XMM0; // copy, cause we will write into X0
        shufps XMM0, XMM1, 0x4e; // 0100 1110 zwxy
        addps XMM0, XMM1; // xyzw + zwxy

        movaps XMM1, XMM0; // copy, cause we will write into X0
        shufps XMM0, XMM1, 0x11; // 0001 0001 (y+w)(x+z)(y+w)(x+z)
        addps XMM0, XMM1; // (x+z)(y+w)(z+x)(w+y) + (y+w)(x+z)(y+w)(x+z)
                          // (x+z+y+w)(y+w+x+z)(z+x+y+w)(w+y+x+z)

        rsqrtps XMM0, XMM0; // 1/sqrt(XMM0)
        mulps XMM2, XMM0; // x/sqrt(x^2+y^2+z^2+w^2) , ...
        movups r, XMM2;
    }
    writeln( "Result: ", r, "\t", v );
    return r;
}

I would like to use D for a thesis projects, but wont be able to if its still this buggy.

-Byron
begin 644 xmm.d
M;6]D=6QE('AM;3L*"FEM<&]R="!S=&0N<W1D:6\L('-T9"YM871H.PH*86QI
M87, at 9FQO871;-%T@=F5C=&]R.PH*8V]N<W0H=F5C=&]R*2!N;W)M86PH(')E
M9B!C;VYS="AV96-T;W(I('8@*0I["B`@("!V96-T;W(@<F5T.PH@("`@9FQO
M870@;&5N(#T@,#L*("`@(&9O<F5A8V at H(&D[(#`N+C0@*0H@("`@("`@(&QE
M;B`K/2!V6VE=("H@=EMI73L*("`@(&QE;B`](#$N,&8O<W%R="AL96XI.PH*
M("`@(&9O<F5A8V at H(&D[(#`N+C0@*0H@("`@("`@(')E=%MI72`]("!V6VE=
M("H@;&5N.PH*("`@(')E='5R;B!R970["GT*"G9E8W1O<B!N;W)M86QI>F4H
M(')E9B!V96-T;W(@=B`I"GL*("`@(&9L;V%T(&QE;B`](#`["B`@("!F;W)E
M86-H*"!I.R`P+BXT("D*("`@("`@("!L96X@*ST@=EMI72`J('9;:5T["B`@
M("!L96X@/2`Q+C!F+W-Q<G0H;&5N*3L*"B`@("!F;W)E86-H*"!I.R`P+BXT
M("D*("`@("`@("!V6VE=("H](&QE;CL*"B`@("!R971U<FX@=CL*?0H*8V]N
M<W0H=F5C=&]R*2!X;F]R;6%L*"!R968 at 8V]N<W0H=F5C=&]R*2!V("D*>PH@
M("`@=F5C=&]R('(["B`@("!A<VT*("`@('L*("`@("`@("!M;W8 at 14%8+"!V
M.PH@("`@("`@(&UO=G5P<R!834TP+"!;14%873L at +R]L;V%D('9E8W1O<@H@
M("`@("`@(&UO=F%P<R!834TR+"!834TP.R`O+R!C;W!Y(&]R:6=I;F%L(&1A
M=&$*"B`@("`@("`@+R\@9FEN9"!X7C(@*R!Y7C(@*R!Z7C(@*R!W7C(*("`@
M("`@("!M=6QP<R!834TP+"!834TP.R`O+R!X>"P@>7DL('IZ+"!W=PH@("`@
M("`@(&UO=F%P<R!834TQ+"!834TP.R`O+R!C;W!Y+"!C875S92!W92!W:6QL
M('=R:71E(&EN=&\@6#`*("`@("`@("!S:'5F<', at 6$U-,"P at 6$U-,2P@,'@T
M93L at +R\@,#$P,"`Q,3$P('IW>'D*("`@("`@("!A9&1P<R!834TP+"!834TQ
M.R`O+R!X>7IW("L@>G=X>0H*("`@("`@("!M;W9A<', at 6$U-,2P at 6$U-,#L@
M+R\@8V]P>2P at 8V%U<V4@=V4@=VEL;"!W<FET92!I;G1O(%@P"B`@("`@("`@
M<VAU9G!S(%A-33`L(%A-33$L(#!X,3$[("\O(#`P,#$@,#`P,2`H>2MW*2AX
M*WHI*'DK=RDH>"MZ*0H@("`@("`@(&%D9'!S(%A-33`L(%A-33$[("\O("AX
M*WHI*'DK=RDH>BMX*2AW*WDI("L@*'DK=RDH>"MZ*2AY*W<I*'@K>BD*("`@
M("`@("`@("`@("`@("`@("`@("`@("`O+R`H>"MZ*WDK=RDH>2MW*W at K>BDH
M>BMX*WDK=RDH=RMY*W at K>BD*("`@("`@("`*("`@("`@("!R<W%R='!S(%A-
M33`L(%A-33`[("\O(#$O<W%R="A834TP*0H@("`@("`@(&UU;'!S(%A-33(L
M(%A-33`[("\O('@O<W%R="AX7C(K>5XR*WI>,BMW7C(I("P at +BXN"B`@("`@
M("`@;6]V=7!S('(L(%A-33(["B`@("!]"B`@("!W<FET96QN*"`B4F5S=6QT
M.B`B+"!R+"`B7'0B+"!V("D["B`@("!R971U<FX@<CL*?0H*=F5C=&]R('AN
M;W)M86QI>F4H(')E9B!V96-T;W(@=B`I"GL*("`@(&%S;0H@("`@>PH@("`@
M("`@(&UO=B!%05 at L('8["B`@("`@("`@;6]V=7!S(%A-33`L(%M%05A=.R`O
M+VQO860@=F5C=&]R"B`@("`@("`@;6]V87!S(%A-33(L(%A-33`[("\O(&-O
M<'D@;W)I9VEN86P at 9&%T80H*("`@("`@("`O+R!F:6YD('A>,B`K('E>,B`K
M('I>,B`K('=>, at H@("`@("`@(&UU;'!S(%A-33`L(%A-33`[("\O('AX+"!Y
M>2P@>GHL('=W"B`@("`@("`@;6]V87!S(%A-33$L(%A-33`[("\O(&-O<'DL
M(&-A=7-E('=E('=I;&P@=W)I=&4@:6YT;R!8,`H@("`@("`@('-H=69P<R!8
M34TP+"!834TQ+"`P>#1E.R`O+R`P,3`P(#$Q,3`@>G=X>0H@("`@("`@(&%D
M9'!S(%A-33`L(%A-33$[("\O('AY>G<@*R!Z=WAY"@H@("`@("`@(&UO=F%P
M<R!834TQ+"!834TP.R`O+R!C;W!Y+"!C875S92!W92!W:6QL('=R:71E(&EN
M=&\@6#`*("`@("`@("!S:'5F<', at 6$U-,"P at 6$U-,2P@,'@Q,3L at +R\@,#`P
M,2`P,#`Q("AY*W<I*'@K>BDH>2MW*2AX*WHI"B`@("`@("`@861D<', at 6$U-
M,"P at 6$U-,3L at +R\@*'@K>BDH>2MW*2AZ*W at I*'<K>2D@*R`H>2MW*2AX*WHI
M*'DK=RDH>"MZ*0H@("`@("`@("`@("`@("`@("`@("`@("`@("\O("AX*WHK
M>2MW*2AY*W<K>"MZ*2AZ*W at K>2MW*2AW*WDK>"MZ*0H@("`@("`@(`H@("`@
M("`@(')S<7)T<', at 6$U-,"P at 6$U-,#L at +R\@,2]S<7)T*%A-33`I"B`@("`@
M("`@;75L<', at 6$U-,BP at 6$U-,#L at +R\@>"]S<7)T*'A>,BMY7C(K>EXR*W=>
M,BD at +"`N+BX*("`@("`@("!M;W9U<', at 6T5!6%TL(%A-33(["B`@("!]"B`@
M("!R971U<FX@=CL*?0H*=F]I9"!M86EN*"D*>PH*("`@('9E8W1O<B!V,2`]
M(%LP+C4L,2XU+#,N,30Q+#!=.PH@("`@=F5C=&]R('8R(#T@=C$["@H@("`@
M=W)I=&5L;B@@(EQN4F5T=7)N(&YE=R!N;W)M86QI>F5D('9E8W1O<G,B("D[
M"B`@("!W<FET96QN*"`B4U-%.B`B+"!V,2P@(EQT(BP@('AN;W)M86PH('8Q
M("DI.PH@("`@=W)I=&5L;B@@(F9O<F5A8V at Z("(L('8R+"`B7'0B+"!N;W)M
M86PH('8R("DI.PH*("`@('=R:71E;&XH(")<;DYO<FUA;&EZ92!I;B!P;&%C
M92(@*3L*("`@('=R:71E;&XH(")34T4Z("(L('8Q+"`B7'0B+"`@>&YO<FUA
M;&EZ92@@=C$@*2D["B`@("!W<FET96QN*"`B9F]R96%C:#H@(BP@=C(L(")<
9="(L(&YO<FUA;&EZ92@@=C(@*2D["GT*"@``
`
end


More information about the Digitalmars-d mailing list