/* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include "Quaternion.h" namespace Math { inline Quaternion Quaternion::FromRotationBetweenNormalizedVectors(const Vector& from, const Vector& to) { ASSERT(from.IsNormalized3() && to.IsNormalized3()); Quaternion result; // Parallel vectors - return zero rotation Vector const dot = Vector::Dot3(from, to); if (dot.IsGreaterThanEqual4(Vector::OneMinusEpsilon)) { result = Quaternion::Identity; } // Opposite vectors - return 180 rotation around any orthogonal axis else if (dot.IsLessThanEqual4(Vector::EpsilonMinusOne)) { Float4 const fromValues = from.ToFloat4(); result = Quaternion(-fromValues.m_z, fromValues.m_y, fromValues.m_x, 0); result.Normalize(); } else // Calculate quaternion rotation { Vector const cross = Vector::Cross3(from, to); Vector Q = Vector::Select(cross, dot, Vector::Select0001); Q += Vector::Select(Vector::Zero, Q.Length4(), Vector::Select0001); result = Quaternion(Q); result.Normalize(); } return result; } inline Quaternion Quaternion::FromRotationBetweenNormalizedVectors(const Vector& from, const Vector& to, const Vector& fallbackRotationAxis) { ASSERT(from.IsNormalized3() && to.IsNormalized3()); Quaternion Q(NoInit); Vector rotationAxis = from.Cross3(to).GetNormalized3(); if (rotationAxis.GetLengthSquared3() == 0) { rotationAxis = fallbackRotationAxis; } float const dot = from.GetDot3(to); if (dot >= (1.0f - Math::Epsilon)) { Q = Quaternion::Identity; } else { float const angle = Math::ACos(dot); Q = Quaternion(rotationAxis, angle); } return Q; } inline Quaternion Quaternion::FromRotationBetweenVectors(const Vector& sourceVector, const Vector& targetVector) { return FromRotationBetweenNormalizedVectors( sourceVector.GetNormalized3(), targetVector.GetNormalized3()); } inline Quaternion Quaternion::NLerp(const Quaternion& from, const Quaternion& to, float T) { ASSERT(T >= 0.0f && T <= 1.0f); Quaternion adjustedFrom(from); // Ensure that the rotations are in the same direction if (Quaternion::Dot(from, to).IsLessThan4(Vector::Zero)) { adjustedFrom.Negate(); } Quaternion result(Vector::Lerp(adjustedFrom.ToVector(), to.ToVector(), T)); result.Normalize(); return result; } inline Quaternion Quaternion::SLerp(const Quaternion& from, const Quaternion& to, float T) { ASSERT(T >= 0.0f && T <= 1.0f); static SIMD::UIntMask const maskSign = { 0x80000000,0x00000000,0x00000000,0x00000000 }; static __m128 const oneMinusEpsilon = { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f }; Vector const VecT(T); Vector cosOmega = Quaternion::Dot(from, to); Vector control = cosOmega.LessThan(Vector::Zero); Vector sign = Vector::Select(Vector::One, Vector::NegativeOne, control); cosOmega = _mm_mul_ps(cosOmega, sign); control = cosOmega.LessThan(oneMinusEpsilon); Vector sinOmega = _mm_mul_ps(cosOmega, cosOmega); sinOmega = _mm_sub_ps(Vector::One, sinOmega); sinOmega = _mm_sqrt_ps(sinOmega); Vector omega = Vector::ATan2(sinOmega, cosOmega); Vector V01 = _mm_shuffle_ps(VecT, VecT, _MM_SHUFFLE(2, 3, 0, 1)); V01 = _mm_and_ps(V01, SIMD::g_maskXY00); V01 = _mm_xor_ps(V01, maskSign); V01 = _mm_add_ps(Vector::UnitX, V01); Vector S0 = _mm_mul_ps(V01, omega); S0 = Vector::Sin(S0); S0 = _mm_div_ps(S0, sinOmega); S0 = Vector::Select(V01, S0, control); Vector S1 = S0.GetSplatY(); S0 = S0.GetSplatX(); S1 = _mm_mul_ps(S1, sign); Vector result = _mm_mul_ps(from, S0); S1 = _mm_mul_ps(S1, to); result = _mm_add_ps(result, S1); return Quaternion(result); } inline Quaternion Quaternion::FastSLerp(const Quaternion& q0, const Quaternion& q1, float t) { // Precomputed constants constexpr float const mu = 1.85298109240830f; static Vector const u0123 = _mm_setr_ps(1.f / (1 * 3), 1.f / (2 * 5), 1.f / (3 * 7), 1.f / (4 * 9)); static Vector const u4567 = _mm_setr_ps(1.f / (5 * 11), 1.f / (6 * 13), 1.f / (7 * 15), mu / (8 * 17)); static Vector const v0123 = _mm_setr_ps(1.f / 3, 2.f / 5, 3.f / 7, 4.f / 9); static Vector const v4567 = _mm_setr_ps(5.f / 11, 6.f / 13, 7.f / 15, mu * 8 / 17); static Vector const vSignMask = _mm_set1_ps(-0.f); // Common code for computing the scalar coefficients of SLERP auto CalculateCoefficient = [](Vector vT, Vector xm1) { Vector const vTSquared = vT * vT; // ( b4, b5, b6, b7 ) = ( x-1 ) * ( u4 * t^2 - v4, u5 * t^2 - v5, u6 * t^2 - v6, u7 * t^2 - v7 ) Vector b4567 = Vector::MultiplySubtract(u4567, vTSquared, v4567); b4567 *= xm1; // ( b7, b7, b7, b7 ) Vector b = b4567.GetSplatW(); Vector c = b + Vector::One; // ( b6, b6, b6, b6 ) b = b4567.GetSplatZ(); c = Vector::MultiplyAdd(b, c, Vector::One); // ( b5, b5, b5, b5 ) b = b4567.GetSplatY(); c = Vector::MultiplyAdd(b, c, Vector::One); // ( b4, b4, b4, b4 ) b = b4567.GetSplatX(); c = Vector::MultiplyAdd(b, c, Vector::One); // ( b0, b1, b2, b3 ) = // ( x-1)*(u0* t^2-v0, u1 * t^2 -v1, u2* t^2-v2, u3* t^2-v3 ) Vector b0123 = Vector::MultiplySubtract(u0123, vTSquared, v0123); b0123 *= xm1; // ( b3, b3, b3, b3 ) b = b0123.GetSplatW(); c = Vector::MultiplyAdd(b, c, Vector::One); // ( b2, b2, b2, b2 ) b = b0123.GetSplatZ(); c = Vector::MultiplyAdd(b, c, Vector::One); // ( b1, b1, b1, b1 ) b = b0123.GetSplatY(); c = Vector::MultiplyAdd(b, c, Vector::One); // ( b0, b0, b0, b0 ) b = b0123.GetSplatX(); c = Vector::MultiplyAdd(b, c, Vector::One); c *= vT; return c; }; Vector x = Vector::Dot4(q0.m_data, q1.m_data); // cos ( theta ) in all components Vector sign = _mm_and_ps(vSignMask, x); x = _mm_xor_ps(sign, x); Vector localQ1 = _mm_xor_ps(sign, q1); Vector xm1 = x - Vector::One; Vector cT = CalculateCoefficient(Vector(t), xm1); Vector cD = CalculateCoefficient(Vector(1.0f - t), xm1); cT = cT * localQ1; Quaternion result(Vector::MultiplyAdd(cD, q0.m_data, cT)); return result; } inline Quaternion Quaternion::SQuad(const Quaternion& q0, const Quaternion& q1, const Quaternion& q2, const Quaternion& q3, float t) { ASSERT(t >= 0.0f && t <= 1.0f); Quaternion const q03 = Quaternion::SLerp(q0, q3, t); Quaternion const q12 = Quaternion::SLerp(q1, q2, t); t = (t - (t * t)) * 2; Quaternion const result = Quaternion::SLerp(q03, q12, t); return result; } inline Quaternion Quaternion::Delta(const Quaternion& from, const Quaternion& to) { return to * from.GetInverse(); } inline Vector Quaternion::Dot(const Quaternion& q0, const Quaternion& q1) { return Vector::Dot4(q0.m_data, q1.m_data); } inline Radians Quaternion::Distance(const Quaternion& q0, const Quaternion& q1) { float const dot = Math::Clamp(Dot(q0, q1).ToFloat(), -1.0f, 1.0f); return Radians(2 * Math::ACos(Math::Abs(dot))); } inline Quaternion::Quaternion(NoInit_t) { } inline Quaternion::Quaternion(IdentityInit_t) : m_data(Vector::UnitW.m_data) { } inline Quaternion::Quaternion(const Vector& v) : m_data(v.m_data) { } inline Quaternion::Quaternion(float ix, float iy, float iz, float iw) { m_data = _mm_set_ps(iw, iz, iy, ix); } inline Quaternion::Quaternion(const Float4& v) : Quaternion(v.m_x, v.m_y, v.m_z, v.m_w) { } inline Quaternion::Quaternion(const Vector& axis, Radians angle) { ASSERT(axis.IsNormalized3()); auto N = _mm_and_ps(axis, SIMD::g_maskXYZ0); N = _mm_or_ps(N, Vector::UnitW); auto scale = _mm_set_ps1(0.5f * (float)angle); Vector sine, cosine; Vector::SinCos(sine, cosine, scale); scale = _mm_and_ps(sine, SIMD::g_maskXYZ0); cosine = _mm_and_ps(cosine, SIMD::g_mask000W); scale = _mm_or_ps(scale, cosine); N = _mm_mul_ps(N, scale); m_data = N; } inline Quaternion::Quaternion(AxisAngle axisAngle) : Quaternion(Vector(axisAngle.m_axis), axisAngle.m_angle) { } inline Quaternion::Quaternion(const EulerAngles& eulerAngles) { auto const rotationX = Quaternion(Vector::UnitX, eulerAngles.m_x); auto const rotationY = Quaternion(Vector::UnitY, eulerAngles.m_y); auto const rotationZ = Quaternion(Vector::UnitZ, eulerAngles.m_z); // Rotation order is XYZ - all in global space, hence the order is reversed m_data = (rotationX * rotationY * rotationZ).GetNormalized().m_data; } inline Quaternion::Quaternion(Radians rotX, Radians rotY, Radians rotZ) : Quaternion(EulerAngles(rotX, rotY, rotZ)) { } inline Quaternion::operator __m128& () { return m_data; } inline Quaternion::operator const __m128& () const { return m_data; } inline Float4 Quaternion::ToFloat4() const { Float4 v; _mm_storeu_ps(&v.m_x, m_data); return v; } inline Vector Quaternion::ToVector() const { return Vector(m_data); } inline Vector Quaternion::Length() { return ToVector().Length4(); } inline float Quaternion::GetLength() const { return ToVector().GetLength4(); } inline Radians Quaternion::GetAngle() const { return Radians(2.0f * Math::ACos(GetW())); } inline AxisAngle Quaternion::ToAxisAngle() const { return AxisAngle(ToVector(), Radians(2.0f * Math::ACos(GetW()))); } inline Vector Quaternion::RotateVector(const Vector& vector) const { Quaternion const A(Vector::Select(Vector::Select1110, vector, Vector::Select1110)); Quaternion const result = GetConjugate() * A; return (result * *this).ToVector(); } inline Vector Quaternion::RotateVectorInverse(const Vector& vector) const { Quaternion const A(Vector::Select(Vector::Select1110, vector, Vector::Select1110)); Quaternion const result = *this * A; return (result * GetConjugate()).ToVector(); } inline Quaternion& Quaternion::Conjugate() { static __m128 const conj = { -1.0f, -1.0f, -1.0f, 1.0f }; m_data = _mm_mul_ps(*this, conj); return *this; } inline Quaternion Quaternion::GetConjugate() const { Quaternion q = *this; q.Conjugate(); return q; } inline Quaternion& Quaternion::Negate() { m_data = _mm_mul_ps(*this, Vector::NegativeOne); return *this; } inline Quaternion Quaternion::GetNegated() const { Quaternion q = *this; q.Negate(); return q; } inline Quaternion& Quaternion::Invert() { Vector const conjugate(GetConjugate().m_data); Vector const length = ToVector().Length4(); Vector const mask = length.LessThanEqual(Vector::Epsilon); Vector const result = conjugate / length; m_data = result.Select(result, Vector::Zero, mask); return *this; } inline Quaternion Quaternion::GetInverse() const { Quaternion q = *this; q.Invert(); return q; } inline Quaternion& Quaternion::Normalize() { m_data = ToVector().GetNormalized4().m_data; return *this; } inline Quaternion Quaternion::GetNormalized() const { Quaternion q = *this; q.Normalize(); return q; } inline Vector Quaternion::XAxis() const noexcept { const float x = _mm_cvtss_f32(m_data); const float y = _mm_cvtss_f32( _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(1, 1, 1, 1))); const float z = _mm_cvtss_f32( _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(2, 2, 2, 2))); const float w = _mm_cvtss_f32( _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(3, 3, 3, 3))); const float s = 2.0f * w; const float x2 = 2.0f * x; return Vector( x2 * x + s * w - 1.0f, x2 * y + s * z, x2 * z + s * -y); } inline Vector Quaternion::YAxis() const noexcept { const float x = _mm_cvtss_f32(m_data); const float y = _mm_cvtss_f32( _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(1, 1, 1, 1))); const float z = _mm_cvtss_f32( _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(2, 2, 2, 2))); const float w = _mm_cvtss_f32( _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(3, 3, 3, 3))); const float s = 2.0f * w; const float y2 = 2.0f * y; return Vector( y2 * x + s * -z, y2 * y + s * w - 1.0f, y2 * z + s * x); } inline Vector Quaternion::ZAxis() const noexcept { const float x = _mm_cvtss_f32(m_data); const float y = _mm_cvtss_f32( _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(1, 1, 1, 1))); const float z = _mm_cvtss_f32( _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(2, 2, 2, 2))); const float w = _mm_cvtss_f32( _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(3, 3, 3, 3))); const float s = 2.0f * w; const float z2 = 2.0f * z; return Vector( x * z2 + s * y, y * z2 + s * -x, z * z2 + s * w - 1.0f); } inline Quaternion& Quaternion::MakeShortestPath() { // If we have a > 180 angle, negate // w < 0.0f is the same as dot( identity, q ) < 0 if (GetW() < 0.0f) { Negate(); } return *this; } inline Quaternion Quaternion::GetShortestPath() const { Quaternion sp = *this; sp.MakeShortestPath(); return sp; } inline Quaternion& Quaternion::NormalizeInaccurate() { *this = GetNormalizedInaccurate(); return *this; } inline Quaternion Quaternion::GetNormalizedInaccurate() const { __m128 vLengthSq = _mm_mul_ps(m_data, m_data); __m128 vTemp = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(3, 2, 3, 2)); vLengthSq = _mm_add_ps(vLengthSq, vTemp); vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(1, 0, 0, 0)); vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0)); vLengthSq = _mm_add_ps(vLengthSq, vTemp); vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(2, 2, 2, 2)); // Get the reciprocal and mul to perform the normalization Quaternion result; result.m_data = _mm_rsqrt_ps(vLengthSq); result.m_data = _mm_mul_ps(result.m_data, m_data); return result; } inline bool Quaternion::IsNormalized() const { return ToVector().IsNormalized4(); } inline bool Quaternion::IsIdentity() const { return ToVector().IsEqual3(Vector::UnitW); } inline Quaternion Quaternion::operator*(const Quaternion& rhs) const { static const __m128 controlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f }; static const __m128 controlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f }; static const __m128 controlYXWZ = { -1.0f, 1.0f, 1.0f,-1.0f }; // Copy to SSE registers and use as few as possible for x86 __m128 Q2X = rhs; __m128 Q2Y = rhs; __m128 Q2Z = rhs; __m128 vResult = rhs; // Splat with one instruction vResult = _mm_shuffle_ps(vResult, vResult, _MM_SHUFFLE(3, 3, 3, 3)); Q2X = _mm_shuffle_ps(Q2X, Q2X, _MM_SHUFFLE(0, 0, 0, 0)); Q2Y = _mm_shuffle_ps(Q2Y, Q2Y, _MM_SHUFFLE(1, 1, 1, 1)); Q2Z = _mm_shuffle_ps(Q2Z, Q2Z, _MM_SHUFFLE(2, 2, 2, 2)); // Retire Q1 and perform Q1*Q2W vResult = _mm_mul_ps(vResult, *this); __m128 Q1Shuffle = *this; // Shuffle the copies of Q1 Q1Shuffle = _mm_shuffle_ps(Q1Shuffle, Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3)); // Mul by Q1WZYX Q2X = _mm_mul_ps(Q2X, Q1Shuffle); Q1Shuffle = _mm_shuffle_ps(Q1Shuffle, Q1Shuffle, _MM_SHUFFLE(2, 3, 0, 1)); // Flip the signs on m_y and m_z Q2X = _mm_mul_ps(Q2X, controlWZYX); // Mul by Q1ZWXY Q2Y = _mm_mul_ps(Q2Y, Q1Shuffle); Q1Shuffle = _mm_shuffle_ps(Q1Shuffle, Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3)); // Flip the signs on m_z and m_w Q2Y = _mm_mul_ps(Q2Y, controlZWXY); // Mul by Q1YXWZ Q2Z = _mm_mul_ps(Q2Z, Q1Shuffle); vResult = _mm_add_ps(vResult, Q2X); // Flip the signs on m_x and m_w Q2Z = _mm_mul_ps(Q2Z, controlYXWZ); Q2Y = _mm_add_ps(Q2Y, Q2Z); vResult = _mm_add_ps(vResult, Q2Y); return Quaternion(vResult); } inline Quaternion& Quaternion::operator*=(const Quaternion& rhs) { *this = *this * rhs; return *this; } inline bool Quaternion::IsNearEqual(const Quaternion& rhs, Radians const threshold) const { return Quaternion::Distance(*this, rhs) <= threshold; } inline bool Quaternion::operator==(const Quaternion& rhs) const { return ToVector() == rhs.ToVector(); } inline bool Quaternion::operator!=(const Quaternion& rhs) const { return !operator==(rhs); } inline Vector Quaternion::GetSplatW() const { return _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(3, 3, 3, 3)); } inline float Quaternion::GetW() const { auto vTemp = GetSplatW(); return _mm_cvtss_f32(vTemp); } }