diff options
Diffstat (limited to 'depedencies/include/glm/gtx/simd_quat.inl')
-rw-r--r-- | depedencies/include/glm/gtx/simd_quat.inl | 620 |
1 files changed, 0 insertions, 620 deletions
diff --git a/depedencies/include/glm/gtx/simd_quat.inl b/depedencies/include/glm/gtx/simd_quat.inl deleted file mode 100644 index b84865c..0000000 --- a/depedencies/include/glm/gtx/simd_quat.inl +++ /dev/null @@ -1,620 +0,0 @@ -/// @ref gtx_simd_quat -/// @file glm/gtx/simd_quat.inl - -namespace glm{ -namespace detail{ - - -////////////////////////////////////// -// Debugging -#if 0 -void print(__m128 v) -{ - GLM_ALIGN(16) float result[4]; - _mm_store_ps(result, v); - - printf("__m128: %f %f %f %f\n", result[0], result[1], result[2], result[3]); -} - -void print(const fvec4SIMD &v) -{ - printf("fvec4SIMD: %f %f %f %f\n", v.x, v.y, v.z, v.w); -} -#endif - -////////////////////////////////////// -// Implicit basic constructors - -# if !GLM_HAS_DEFAULTED_FUNCTIONS || !defined(GLM_FORCE_NO_CTOR_INIT) - GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD() -# ifdef GLM_FORCE_NO_CTOR_INIT - : Data(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f)) -# endif - {} -# endif - -# if !GLM_HAS_DEFAULTED_FUNCTIONS - GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(fquatSIMD const & q) : - Data(q.Data) - {} -# endif//!GLM_HAS_DEFAULTED_FUNCTIONS - -GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(__m128 const & Data) : - Data(Data) -{} - -////////////////////////////////////// -// Explicit basic constructors - -GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(float const & w, float const & x, float const & y, float const & z) : - Data(_mm_set_ps(w, z, y, x)) -{} - -GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(quat const & q) : - Data(_mm_set_ps(q.w, q.z, q.y, q.x)) -{} - -GLM_FUNC_QUALIFIER fquatSIMD::fquatSIMD(vec3 const & eulerAngles) -{ - vec3 c = glm::cos(eulerAngles * 0.5f); - vec3 s = glm::sin(eulerAngles * 0.5f); - - Data = _mm_set_ps( - (c.x * c.y * c.z) + (s.x * s.y * s.z), - (c.x * c.y * s.z) - (s.x * s.y * c.z), - (c.x * s.y * c.z) + (s.x * c.y * s.z), - (s.x * c.y * c.z) - (c.x * s.y * s.z)); -} - - -////////////////////////////////////// -// Unary arithmetic operators - -#if !GLM_HAS_DEFAULTED_FUNCTIONS - GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator=(fquatSIMD const & q) - { - this->Data = q.Data; - return *this; - } -#endif//!GLM_HAS_DEFAULTED_FUNCTIONS - -GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator*=(float const & s) -{ - this->Data = _mm_mul_ps(this->Data, _mm_set_ps1(s)); - return *this; -} - -GLM_FUNC_QUALIFIER fquatSIMD& fquatSIMD::operator/=(float const & s) -{ - this->Data = _mm_div_ps(Data, _mm_set1_ps(s)); - return *this; -} - - - -// negate operator -GLM_FUNC_QUALIFIER fquatSIMD operator- (fquatSIMD const & q) -{ - return fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(-1.0f, -1.0f, -1.0f, -1.0f))); -} - -// operator+ -GLM_FUNC_QUALIFIER fquatSIMD operator+ (fquatSIMD const & q1, fquatSIMD const & q2) -{ - return fquatSIMD(_mm_add_ps(q1.Data, q2.Data)); -} - -//operator* -GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q1, fquatSIMD const & q2) -{ - // SSE2 STATS: - // 11 shuffle - // 8 mul - // 8 add - - // SSE4 STATS: - // 3 shuffle - // 4 mul - // 4 dpps - - __m128 mul0 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(0, 1, 2, 3))); - __m128 mul1 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(1, 0, 3, 2))); - __m128 mul2 = _mm_mul_ps(q1.Data, _mm_shuffle_ps(q2.Data, q2.Data, _MM_SHUFFLE(2, 3, 0, 1))); - __m128 mul3 = _mm_mul_ps(q1.Data, q2.Data); - -# if(GLM_ARCH & GLM_ARCH_SSE41_BIT) - __m128 add0 = _mm_dp_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f), 0xff); - __m128 add1 = _mm_dp_ps(mul1, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f), 0xff); - __m128 add2 = _mm_dp_ps(mul2, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f), 0xff); - __m128 add3 = _mm_dp_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f), 0xff); -# else - mul0 = _mm_mul_ps(mul0, _mm_set_ps(1.0f, -1.0f, 1.0f, 1.0f)); - __m128 add0 = _mm_add_ps(mul0, _mm_movehl_ps(mul0, mul0)); - add0 = _mm_add_ss(add0, _mm_shuffle_ps(add0, add0, 1)); - - mul1 = _mm_mul_ps(mul1, _mm_set_ps(1.0f, 1.0f, 1.0f, -1.0f)); - __m128 add1 = _mm_add_ps(mul1, _mm_movehl_ps(mul1, mul1)); - add1 = _mm_add_ss(add1, _mm_shuffle_ps(add1, add1, 1)); - - mul2 = _mm_mul_ps(mul2, _mm_set_ps(1.0f, 1.0f, -1.0f, 1.0f)); - __m128 add2 = _mm_add_ps(mul2, _mm_movehl_ps(mul2, mul2)); - add2 = _mm_add_ss(add2, _mm_shuffle_ps(add2, add2, 1)); - - mul3 = _mm_mul_ps(mul3, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f)); - __m128 add3 = _mm_add_ps(mul3, _mm_movehl_ps(mul3, mul3)); - add3 = _mm_add_ss(add3, _mm_shuffle_ps(add3, add3, 1)); -#endif - - - // This SIMD code is a politically correct way of doing this, but in every test I've tried it has been slower than - // the final code below. I'll keep this here for reference - maybe somebody else can do something better... - // - //__m128 xxyy = _mm_shuffle_ps(add0, add1, _MM_SHUFFLE(0, 0, 0, 0)); - //__m128 zzww = _mm_shuffle_ps(add2, add3, _MM_SHUFFLE(0, 0, 0, 0)); - // - //return _mm_shuffle_ps(xxyy, zzww, _MM_SHUFFLE(2, 0, 2, 0)); - - float x; - float y; - float z; - float w; - - _mm_store_ss(&x, add0); - _mm_store_ss(&y, add1); - _mm_store_ss(&z, add2); - _mm_store_ss(&w, add3); - - return detail::fquatSIMD(w, x, y, z); -} - -GLM_FUNC_QUALIFIER fvec4SIMD operator* (fquatSIMD const & q, fvec4SIMD const & v) -{ - static const __m128 two = _mm_set1_ps(2.0f); - - __m128 q_wwww = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3)); - __m128 q_swp0 = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1)); - __m128 q_swp1 = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2)); - __m128 v_swp0 = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 0, 2, 1)); - __m128 v_swp1 = _mm_shuffle_ps(v.Data, v.Data, _MM_SHUFFLE(3, 1, 0, 2)); - - __m128 uv = _mm_sub_ps(_mm_mul_ps(q_swp0, v_swp1), _mm_mul_ps(q_swp1, v_swp0)); - __m128 uv_swp0 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 0, 2, 1)); - __m128 uv_swp1 = _mm_shuffle_ps(uv, uv, _MM_SHUFFLE(3, 1, 0, 2)); - __m128 uuv = _mm_sub_ps(_mm_mul_ps(q_swp0, uv_swp1), _mm_mul_ps(q_swp1, uv_swp0)); - - - uv = _mm_mul_ps(uv, _mm_mul_ps(q_wwww, two)); - uuv = _mm_mul_ps(uuv, two); - - return _mm_add_ps(v.Data, _mm_add_ps(uv, uuv)); -} - -GLM_FUNC_QUALIFIER fvec4SIMD operator* (fvec4SIMD const & v, fquatSIMD const & q) -{ - return glm::inverse(q) * v; -} - -GLM_FUNC_QUALIFIER fquatSIMD operator* (fquatSIMD const & q, float s) -{ - return fquatSIMD(_mm_mul_ps(q.Data, _mm_set1_ps(s))); -} - -GLM_FUNC_QUALIFIER fquatSIMD operator* (float s, fquatSIMD const & q) -{ - return fquatSIMD(_mm_mul_ps(_mm_set1_ps(s), q.Data)); -} - - -//operator/ -GLM_FUNC_QUALIFIER fquatSIMD operator/ (fquatSIMD const & q, float s) -{ - return fquatSIMD(_mm_div_ps(q.Data, _mm_set1_ps(s))); -} - - -}//namespace detail - - -GLM_FUNC_QUALIFIER quat quat_cast -( - detail::fquatSIMD const & x -) -{ - GLM_ALIGN(16) quat Result; - _mm_store_ps(&Result[0], x.Data); - - return Result; -} - -template <typename T> -GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast_impl(const T m0[], const T m1[], const T m2[]) -{ - T trace = m0[0] + m1[1] + m2[2] + T(1.0); - if (trace > T(0)) - { - T s = static_cast<T>(0.5) / sqrt(trace); - - return _mm_set_ps( - static_cast<float>(T(0.25) / s), - static_cast<float>((m0[1] - m1[0]) * s), - static_cast<float>((m2[0] - m0[2]) * s), - static_cast<float>((m1[2] - m2[1]) * s)); - } - else - { - if (m0[0] > m1[1]) - { - if (m0[0] > m2[2]) - { - // X is biggest. - T s = sqrt(m0[0] - m1[1] - m2[2] + T(1.0)) * T(0.5); - - return _mm_set_ps( - static_cast<float>((m1[2] - m2[1]) * s), - static_cast<float>((m2[0] + m0[2]) * s), - static_cast<float>((m0[1] + m1[0]) * s), - static_cast<float>(T(0.5) * s)); - } - } - else - { - if (m1[1] > m2[2]) - { - // Y is biggest. - T s = sqrt(m1[1] - m0[0] - m2[2] + T(1.0)) * T(0.5); - - return _mm_set_ps( - static_cast<float>((m2[0] - m0[2]) * s), - static_cast<float>((m1[2] + m2[1]) * s), - static_cast<float>(T(0.5) * s), - static_cast<float>((m0[1] + m1[0]) * s)); - } - } - - // Z is biggest. - T s = sqrt(m2[2] - m0[0] - m1[1] + T(1.0)) * T(0.5); - - return _mm_set_ps( - static_cast<float>((m0[1] - m1[0]) * s), - static_cast<float>(T(0.5) * s), - static_cast<float>((m1[2] + m2[1]) * s), - static_cast<float>((m2[0] + m0[2]) * s)); - } -} - -GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast -( - detail::fmat4x4SIMD const & m -) -{ - // Scalar implementation for now. - GLM_ALIGN(16) float m0[4]; - GLM_ALIGN(16) float m1[4]; - GLM_ALIGN(16) float m2[4]; - - _mm_store_ps(m0, m[0].Data); - _mm_store_ps(m1, m[1].Data); - _mm_store_ps(m2, m[2].Data); - - return quatSIMD_cast_impl(m0, m1, m2); -} - -template <typename T, precision P> -GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast -( - tmat4x4<T, P> const & m -) -{ - return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]); -} - -template <typename T, precision P> -GLM_FUNC_QUALIFIER detail::fquatSIMD quatSIMD_cast -( - tmat3x3<T, P> const & m -) -{ - return quatSIMD_cast_impl(&m[0][0], &m[1][0], &m[2][0]); -} - - -GLM_FUNC_QUALIFIER detail::fmat4x4SIMD mat4SIMD_cast -( - detail::fquatSIMD const & q -) -{ - detail::fmat4x4SIMD result; - - __m128 _wwww = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 3, 3, 3)); - __m128 _xyzw = q.Data; - __m128 _zxyw = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 1, 0, 2)); - __m128 _yzxw = _mm_shuffle_ps(q.Data, q.Data, _MM_SHUFFLE(3, 0, 2, 1)); - - __m128 _xyzw2 = _mm_add_ps(_xyzw, _xyzw); - __m128 _zxyw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 1, 0, 2)); - __m128 _yzxw2 = _mm_shuffle_ps(_xyzw2, _xyzw2, _MM_SHUFFLE(3, 0, 2, 1)); - - __m128 _tmp0 = _mm_sub_ps(_mm_set1_ps(1.0f), _mm_mul_ps(_yzxw2, _yzxw)); - _tmp0 = _mm_sub_ps(_tmp0, _mm_mul_ps(_zxyw2, _zxyw)); - - __m128 _tmp1 = _mm_mul_ps(_yzxw2, _xyzw); - _tmp1 = _mm_add_ps(_tmp1, _mm_mul_ps(_zxyw2, _wwww)); - - __m128 _tmp2 = _mm_mul_ps(_zxyw2, _xyzw); - _tmp2 = _mm_sub_ps(_tmp2, _mm_mul_ps(_yzxw2, _wwww)); - - - // There's probably a better, more politically correct way of doing this... - result[0].Data = _mm_set_ps( - 0.0f, - reinterpret_cast<float*>(&_tmp2)[0], - reinterpret_cast<float*>(&_tmp1)[0], - reinterpret_cast<float*>(&_tmp0)[0]); - - result[1].Data = _mm_set_ps( - 0.0f, - reinterpret_cast<float*>(&_tmp1)[1], - reinterpret_cast<float*>(&_tmp0)[1], - reinterpret_cast<float*>(&_tmp2)[1]); - - result[2].Data = _mm_set_ps( - 0.0f, - reinterpret_cast<float*>(&_tmp0)[2], - reinterpret_cast<float*>(&_tmp2)[2], - reinterpret_cast<float*>(&_tmp1)[2]); - - result[3].Data = _mm_set_ps( - 1.0f, - 0.0f, - 0.0f, - 0.0f); - - - return result; -} - -GLM_FUNC_QUALIFIER mat4 mat4_cast -( - detail::fquatSIMD const & q -) -{ - return mat4_cast(mat4SIMD_cast(q)); -} - - - -GLM_FUNC_QUALIFIER float length -( - detail::fquatSIMD const & q -) -{ - return glm::sqrt(dot(q, q)); -} - -GLM_FUNC_QUALIFIER detail::fquatSIMD normalize -( - detail::fquatSIMD const & q -) -{ - return _mm_mul_ps(q.Data, _mm_set1_ps(1.0f / length(q))); -} - -GLM_FUNC_QUALIFIER float dot -( - detail::fquatSIMD const & q1, - detail::fquatSIMD const & q2 -) -{ - float result; - _mm_store_ss(&result, detail::sse_dot_ps(q1.Data, q2.Data)); - - return result; -} - -GLM_FUNC_QUALIFIER detail::fquatSIMD mix -( - detail::fquatSIMD const & x, - detail::fquatSIMD const & y, - float const & a -) -{ - float cosTheta = dot(x, y); - - if (cosTheta > 1.0f - glm::epsilon<float>()) - { - return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); - } - else - { - float angle = glm::acos(cosTheta); - - - float s0 = glm::sin((1.0f - a) * angle); - float s1 = glm::sin(a * angle); - float d = 1.0f / glm::sin(angle); - - return (s0 * x + s1 * y) * d; - } -} - -GLM_FUNC_QUALIFIER detail::fquatSIMD lerp -( - detail::fquatSIMD const & x, - detail::fquatSIMD const & y, - float const & a -) -{ - // Lerp is only defined in [0, 1] - assert(a >= 0.0f); - assert(a <= 1.0f); - - return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); -} - -GLM_FUNC_QUALIFIER detail::fquatSIMD slerp -( - detail::fquatSIMD const & x, - detail::fquatSIMD const & y, - float const & a -) -{ - detail::fquatSIMD z = y; - - float cosTheta = dot(x, y); - - // If cosTheta < 0, the interpolation will take the long way around the sphere. - // To fix this, one quat must be negated. - if (cosTheta < 0.0f) - { - z = -y; - cosTheta = -cosTheta; - } - - // Perform a linear interpolation when cosTheta is close to 1 to avoid side effect of sin(angle) becoming a zero denominator - if(cosTheta > 1.0f - epsilon<float>()) - { - return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); - } - else - { - float angle = glm::acos(cosTheta); - - - float s0 = glm::sin((1.0f - a) * angle); - float s1 = glm::sin(a * angle); - float d = 1.0f / glm::sin(angle); - - return (s0 * x + s1 * y) * d; - } -} - - -GLM_FUNC_QUALIFIER detail::fquatSIMD fastMix -( - detail::fquatSIMD const & x, - detail::fquatSIMD const & y, - float const & a -) -{ - float cosTheta = dot(x, y); - - if (cosTheta > 1.0f - glm::epsilon<float>()) - { - return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); - } - else - { - float angle = glm::fastAcos(cosTheta); - - - __m128 s = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f)); - - __m128 s0 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3)); - __m128 s1 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2)); - __m128 d = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1))); - - return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d); - } -} - -GLM_FUNC_QUALIFIER detail::fquatSIMD fastSlerp -( - detail::fquatSIMD const & x, - detail::fquatSIMD const & y, - float const & a -) -{ - detail::fquatSIMD z = y; - - float cosTheta = dot(x, y); - if (cosTheta < 0.0f) - { - z = -y; - cosTheta = -cosTheta; - } - - - if(cosTheta > 1.0f - epsilon<float>()) - { - return _mm_add_ps(x.Data, _mm_mul_ps(_mm_set1_ps(a), _mm_sub_ps(y.Data, x.Data))); - } - else - { - float angle = glm::fastAcos(cosTheta); - - - __m128 s = glm::fastSin(_mm_set_ps((1.0f - a) * angle, a * angle, angle, 0.0f)); - - __m128 s0 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(3, 3, 3, 3)); - __m128 s1 = _mm_shuffle_ps(s, s, _MM_SHUFFLE(2, 2, 2, 2)); - __m128 d = _mm_div_ps(_mm_set1_ps(1.0f), _mm_shuffle_ps(s, s, _MM_SHUFFLE(1, 1, 1, 1))); - - return _mm_mul_ps(_mm_add_ps(_mm_mul_ps(s0, x.Data), _mm_mul_ps(s1, y.Data)), d); - } -} - - - -GLM_FUNC_QUALIFIER detail::fquatSIMD conjugate -( - detail::fquatSIMD const & q -) -{ - return detail::fquatSIMD(_mm_mul_ps(q.Data, _mm_set_ps(1.0f, -1.0f, -1.0f, -1.0f))); -} - -GLM_FUNC_QUALIFIER detail::fquatSIMD inverse -( - detail::fquatSIMD const & q -) -{ - return conjugate(q) / dot(q, q); -} - - -GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD -( - float const & angle, - vec3 const & v -) -{ - float s = glm::sin(angle * 0.5f); - - return _mm_set_ps( - glm::cos(angle * 0.5f), - v.z * s, - v.y * s, - v.x * s); -} - -GLM_FUNC_QUALIFIER detail::fquatSIMD angleAxisSIMD -( - float const & angle, - float const & x, - float const & y, - float const & z -) -{ - return angleAxisSIMD(angle, vec3(x, y, z)); -} - - -GLM_FUNC_QUALIFIER __m128 fastSin(__m128 x) -{ - static const __m128 c0 = _mm_set1_ps(0.16666666666666666666666666666667f); - static const __m128 c1 = _mm_set1_ps(0.00833333333333333333333333333333f); - static const __m128 c2 = _mm_set1_ps(0.00019841269841269841269841269841f); - - __m128 x3 = _mm_mul_ps(x, _mm_mul_ps(x, x)); - __m128 x5 = _mm_mul_ps(x3, _mm_mul_ps(x, x)); - __m128 x7 = _mm_mul_ps(x5, _mm_mul_ps(x, x)); - - __m128 y0 = _mm_mul_ps(x3, c0); - __m128 y1 = _mm_mul_ps(x5, c1); - __m128 y2 = _mm_mul_ps(x7, c2); - - return _mm_sub_ps(_mm_add_ps(_mm_sub_ps(x, y0), y1), y2); -} - - -}//namespace glm |