slightly faster probably...

[pascal]function Normalize(vec: tvector4f): tvector4f;
asm
movups xmm0, [vec]
movaps xmm2, xmm0
mulps xmm0, xmm0
movaps xmm1, xmm0
shufps xmm0, xmm1, $4E
addps xmm0, xmm1
movaps xmm1, xmm0
shufps xmm1, xmm1, $11
addps xmm0, xmm1
rsqrtps xmm0, xmm0
mulps xmm2, xmm0
movups [result], xmm2
end;[/pascal]