Ye-Song's picture
Add files using upload-large-folder tool
7b853a5 verified
/*
* SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: Apache-2.0
*/
#pragma once
#include "Quaternion.h"
namespace Math
{
inline Quaternion Quaternion::FromRotationBetweenNormalizedVectors(const Vector& from, const Vector& to)
{
ASSERT(from.IsNormalized3() && to.IsNormalized3());
Quaternion result;
// Parallel vectors - return zero rotation
Vector const dot = Vector::Dot3(from, to);
if (dot.IsGreaterThanEqual4(Vector::OneMinusEpsilon))
{
result = Quaternion::Identity;
}
// Opposite vectors - return 180 rotation around any orthogonal axis
else if (dot.IsLessThanEqual4(Vector::EpsilonMinusOne))
{
Float4 const fromValues = from.ToFloat4();
result = Quaternion(-fromValues.m_z, fromValues.m_y, fromValues.m_x, 0);
result.Normalize();
}
else // Calculate quaternion rotation
{
Vector const cross = Vector::Cross3(from, to);
Vector Q = Vector::Select(cross, dot, Vector::Select0001);
Q += Vector::Select(Vector::Zero, Q.Length4(), Vector::Select0001);
result = Quaternion(Q);
result.Normalize();
}
return result;
}
inline Quaternion Quaternion::FromRotationBetweenNormalizedVectors(const Vector& from, const Vector& to, const Vector& fallbackRotationAxis)
{
ASSERT(from.IsNormalized3() && to.IsNormalized3());
Quaternion Q(NoInit);
Vector rotationAxis = from.Cross3(to).GetNormalized3();
if (rotationAxis.GetLengthSquared3() == 0)
{
rotationAxis = fallbackRotationAxis;
}
float const dot = from.GetDot3(to);
if (dot >= (1.0f - Math::Epsilon))
{
Q = Quaternion::Identity;
}
else
{
float const angle = Math::ACos(dot);
Q = Quaternion(rotationAxis, angle);
}
return Q;
}
inline Quaternion Quaternion::FromRotationBetweenVectors(const Vector& sourceVector, const Vector& targetVector)
{
return FromRotationBetweenNormalizedVectors(
sourceVector.GetNormalized3(),
targetVector.GetNormalized3());
}
inline Quaternion Quaternion::NLerp(const Quaternion& from, const Quaternion& to, float T)
{
ASSERT(T >= 0.0f && T <= 1.0f);
Quaternion adjustedFrom(from);
// Ensure that the rotations are in the same direction
if (Quaternion::Dot(from, to).IsLessThan4(Vector::Zero))
{
adjustedFrom.Negate();
}
Quaternion result(Vector::Lerp(adjustedFrom.ToVector(), to.ToVector(), T));
result.Normalize();
return result;
}
inline Quaternion Quaternion::SLerp(const Quaternion& from, const Quaternion& to, float T)
{
ASSERT(T >= 0.0f && T <= 1.0f);
static SIMD::UIntMask const maskSign = { 0x80000000,0x00000000,0x00000000,0x00000000 };
static __m128 const oneMinusEpsilon = { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f };
Vector const VecT(T);
Vector cosOmega = Quaternion::Dot(from, to);
Vector control = cosOmega.LessThan(Vector::Zero);
Vector sign = Vector::Select(Vector::One, Vector::NegativeOne, control);
cosOmega = _mm_mul_ps(cosOmega, sign);
control = cosOmega.LessThan(oneMinusEpsilon);
Vector sinOmega = _mm_mul_ps(cosOmega, cosOmega);
sinOmega = _mm_sub_ps(Vector::One, sinOmega);
sinOmega = _mm_sqrt_ps(sinOmega);
Vector omega = Vector::ATan2(sinOmega, cosOmega);
Vector V01 = _mm_shuffle_ps(VecT, VecT, _MM_SHUFFLE(2, 3, 0, 1));
V01 = _mm_and_ps(V01, SIMD::g_maskXY00);
V01 = _mm_xor_ps(V01, maskSign);
V01 = _mm_add_ps(Vector::UnitX, V01);
Vector S0 = _mm_mul_ps(V01, omega);
S0 = Vector::Sin(S0);
S0 = _mm_div_ps(S0, sinOmega);
S0 = Vector::Select(V01, S0, control);
Vector S1 = S0.GetSplatY();
S0 = S0.GetSplatX();
S1 = _mm_mul_ps(S1, sign);
Vector result = _mm_mul_ps(from, S0);
S1 = _mm_mul_ps(S1, to);
result = _mm_add_ps(result, S1);
return Quaternion(result);
}
inline Quaternion Quaternion::FastSLerp(const Quaternion& q0, const Quaternion& q1, float t)
{
// Precomputed constants
constexpr float const mu = 1.85298109240830f;
static Vector const u0123 = _mm_setr_ps(1.f / (1 * 3), 1.f / (2 * 5), 1.f / (3 * 7), 1.f / (4 * 9));
static Vector const u4567 = _mm_setr_ps(1.f / (5 * 11), 1.f / (6 * 13), 1.f / (7 * 15), mu / (8 * 17));
static Vector const v0123 = _mm_setr_ps(1.f / 3, 2.f / 5, 3.f / 7, 4.f / 9);
static Vector const v4567 = _mm_setr_ps(5.f / 11, 6.f / 13, 7.f / 15, mu * 8 / 17);
static Vector const vSignMask = _mm_set1_ps(-0.f);
// Common code for computing the scalar coefficients of SLERP
auto CalculateCoefficient = [](Vector vT, Vector xm1)
{
Vector const vTSquared = vT * vT;
// ( b4, b5, b6, b7 ) = ( x-1 ) * ( u4 * t^2 - v4, u5 * t^2 - v5, u6 * t^2 - v6, u7 * t^2 - v7 )
Vector b4567 = Vector::MultiplySubtract(u4567, vTSquared, v4567);
b4567 *= xm1;
// ( b7, b7, b7, b7 )
Vector b = b4567.GetSplatW();
Vector c = b + Vector::One;
// ( b6, b6, b6, b6 )
b = b4567.GetSplatZ();
c = Vector::MultiplyAdd(b, c, Vector::One);
// ( b5, b5, b5, b5 )
b = b4567.GetSplatY();
c = Vector::MultiplyAdd(b, c, Vector::One);
// ( b4, b4, b4, b4 )
b = b4567.GetSplatX();
c = Vector::MultiplyAdd(b, c, Vector::One);
// ( b0, b1, b2, b3 ) =
// ( x-1)*(u0* t^2-v0, u1 * t^2 -v1, u2* t^2-v2, u3* t^2-v3 )
Vector b0123 = Vector::MultiplySubtract(u0123, vTSquared, v0123);
b0123 *= xm1;
// ( b3, b3, b3, b3 )
b = b0123.GetSplatW();
c = Vector::MultiplyAdd(b, c, Vector::One);
// ( b2, b2, b2, b2 )
b = b0123.GetSplatZ();
c = Vector::MultiplyAdd(b, c, Vector::One);
// ( b1, b1, b1, b1 )
b = b0123.GetSplatY();
c = Vector::MultiplyAdd(b, c, Vector::One);
// ( b0, b0, b0, b0 )
b = b0123.GetSplatX();
c = Vector::MultiplyAdd(b, c, Vector::One);
c *= vT;
return c;
};
Vector x = Vector::Dot4(q0.m_data, q1.m_data); // cos ( theta ) in all components
Vector sign = _mm_and_ps(vSignMask, x);
x = _mm_xor_ps(sign, x);
Vector localQ1 = _mm_xor_ps(sign, q1);
Vector xm1 = x - Vector::One;
Vector cT = CalculateCoefficient(Vector(t), xm1);
Vector cD = CalculateCoefficient(Vector(1.0f - t), xm1);
cT = cT * localQ1;
Quaternion result(Vector::MultiplyAdd(cD, q0.m_data, cT));
return result;
}
inline Quaternion Quaternion::SQuad(const Quaternion& q0, const Quaternion& q1, const Quaternion& q2, const Quaternion& q3, float t)
{
ASSERT(t >= 0.0f && t <= 1.0f);
Quaternion const q03 = Quaternion::SLerp(q0, q3, t);
Quaternion const q12 = Quaternion::SLerp(q1, q2, t);
t = (t - (t * t)) * 2;
Quaternion const result = Quaternion::SLerp(q03, q12, t);
return result;
}
inline Quaternion Quaternion::Delta(const Quaternion& from, const Quaternion& to)
{
return to * from.GetInverse();
}
inline Vector Quaternion::Dot(const Quaternion& q0, const Quaternion& q1)
{
return Vector::Dot4(q0.m_data, q1.m_data);
}
inline Radians Quaternion::Distance(const Quaternion& q0, const Quaternion& q1)
{
float const dot = Math::Clamp(Dot(q0, q1).ToFloat(), -1.0f, 1.0f);
return Radians(2 * Math::ACos(Math::Abs(dot)));
}
inline Quaternion::Quaternion(NoInit_t)
{
}
inline Quaternion::Quaternion(IdentityInit_t)
: m_data(Vector::UnitW.m_data)
{
}
inline Quaternion::Quaternion(const Vector& v)
: m_data(v.m_data)
{
}
inline Quaternion::Quaternion(float ix, float iy, float iz, float iw)
{
m_data = _mm_set_ps(iw, iz, iy, ix);
}
inline Quaternion::Quaternion(const Float4& v)
: Quaternion(v.m_x, v.m_y, v.m_z, v.m_w)
{
}
inline Quaternion::Quaternion(const Vector& axis, Radians angle)
{
ASSERT(axis.IsNormalized3());
auto N = _mm_and_ps(axis, SIMD::g_maskXYZ0);
N = _mm_or_ps(N, Vector::UnitW);
auto scale = _mm_set_ps1(0.5f * (float)angle);
Vector sine, cosine;
Vector::SinCos(sine, cosine, scale);
scale = _mm_and_ps(sine, SIMD::g_maskXYZ0);
cosine = _mm_and_ps(cosine, SIMD::g_mask000W);
scale = _mm_or_ps(scale, cosine);
N = _mm_mul_ps(N, scale);
m_data = N;
}
inline Quaternion::Quaternion(AxisAngle axisAngle)
: Quaternion(Vector(axisAngle.m_axis), axisAngle.m_angle)
{
}
inline Quaternion::Quaternion(const EulerAngles& eulerAngles)
{
auto const rotationX = Quaternion(Vector::UnitX, eulerAngles.m_x);
auto const rotationY = Quaternion(Vector::UnitY, eulerAngles.m_y);
auto const rotationZ = Quaternion(Vector::UnitZ, eulerAngles.m_z);
// Rotation order is XYZ - all in global space, hence the order is reversed
m_data = (rotationX * rotationY * rotationZ).GetNormalized().m_data;
}
inline Quaternion::Quaternion(Radians rotX, Radians rotY, Radians rotZ)
: Quaternion(EulerAngles(rotX, rotY, rotZ))
{
}
inline Quaternion::operator __m128& ()
{
return m_data;
}
inline Quaternion::operator const __m128& () const
{
return m_data;
}
inline Float4 Quaternion::ToFloat4() const
{
Float4 v;
_mm_storeu_ps(&v.m_x, m_data);
return v;
}
inline Vector Quaternion::ToVector() const
{
return Vector(m_data);
}
inline Vector Quaternion::Length()
{
return ToVector().Length4();
}
inline float Quaternion::GetLength() const
{
return ToVector().GetLength4();
}
inline Radians Quaternion::GetAngle() const
{
return Radians(2.0f * Math::ACos(GetW()));
}
inline AxisAngle Quaternion::ToAxisAngle() const
{
return AxisAngle(ToVector(), Radians(2.0f * Math::ACos(GetW())));
}
inline Vector Quaternion::RotateVector(const Vector& vector) const
{
Quaternion const A(Vector::Select(Vector::Select1110, vector, Vector::Select1110));
Quaternion const result = GetConjugate() * A;
return (result * *this).ToVector();
}
inline Vector Quaternion::RotateVectorInverse(const Vector& vector) const
{
Quaternion const A(Vector::Select(Vector::Select1110, vector, Vector::Select1110));
Quaternion const result = *this * A;
return (result * GetConjugate()).ToVector();
}
inline Quaternion& Quaternion::Conjugate()
{
static __m128 const conj = { -1.0f, -1.0f, -1.0f, 1.0f };
m_data = _mm_mul_ps(*this, conj);
return *this;
}
inline Quaternion Quaternion::GetConjugate() const
{
Quaternion q = *this;
q.Conjugate();
return q;
}
inline Quaternion& Quaternion::Negate()
{
m_data = _mm_mul_ps(*this, Vector::NegativeOne);
return *this;
}
inline Quaternion Quaternion::GetNegated() const
{
Quaternion q = *this;
q.Negate();
return q;
}
inline Quaternion& Quaternion::Invert()
{
Vector const conjugate(GetConjugate().m_data);
Vector const length = ToVector().Length4();
Vector const mask = length.LessThanEqual(Vector::Epsilon);
Vector const result = conjugate / length;
m_data = result.Select(result, Vector::Zero, mask);
return *this;
}
inline Quaternion Quaternion::GetInverse() const
{
Quaternion q = *this;
q.Invert();
return q;
}
inline Quaternion& Quaternion::Normalize()
{
m_data = ToVector().GetNormalized4().m_data;
return *this;
}
inline Quaternion Quaternion::GetNormalized() const
{
Quaternion q = *this;
q.Normalize();
return q;
}
inline Vector Quaternion::XAxis() const noexcept
{
const float x = _mm_cvtss_f32(m_data);
const float y = _mm_cvtss_f32(
_mm_shuffle_ps(m_data, m_data,
_MM_SHUFFLE(1, 1, 1, 1)));
const float z = _mm_cvtss_f32(
_mm_shuffle_ps(m_data, m_data,
_MM_SHUFFLE(2, 2, 2, 2)));
const float w = _mm_cvtss_f32(
_mm_shuffle_ps(m_data, m_data,
_MM_SHUFFLE(3, 3, 3, 3)));
const float s = 2.0f * w;
const float x2 = 2.0f * x;
return Vector(
x2 * x + s * w - 1.0f,
x2 * y + s * z,
x2 * z + s * -y);
}
inline Vector Quaternion::YAxis() const noexcept
{
const float x = _mm_cvtss_f32(m_data);
const float y = _mm_cvtss_f32(
_mm_shuffle_ps(m_data, m_data,
_MM_SHUFFLE(1, 1, 1, 1)));
const float z = _mm_cvtss_f32(
_mm_shuffle_ps(m_data, m_data,
_MM_SHUFFLE(2, 2, 2, 2)));
const float w = _mm_cvtss_f32(
_mm_shuffle_ps(m_data, m_data,
_MM_SHUFFLE(3, 3, 3, 3)));
const float s = 2.0f * w;
const float y2 = 2.0f * y;
return Vector(
y2 * x + s * -z,
y2 * y + s * w - 1.0f,
y2 * z + s * x);
}
inline Vector Quaternion::ZAxis() const noexcept
{
const float x = _mm_cvtss_f32(m_data);
const float y = _mm_cvtss_f32(
_mm_shuffle_ps(m_data, m_data,
_MM_SHUFFLE(1, 1, 1, 1)));
const float z = _mm_cvtss_f32(
_mm_shuffle_ps(m_data, m_data,
_MM_SHUFFLE(2, 2, 2, 2)));
const float w = _mm_cvtss_f32(
_mm_shuffle_ps(m_data, m_data,
_MM_SHUFFLE(3, 3, 3, 3)));
const float s = 2.0f * w;
const float z2 = 2.0f * z;
return Vector(
x * z2 + s * y,
y * z2 + s * -x,
z * z2 + s * w - 1.0f);
}
inline Quaternion& Quaternion::MakeShortestPath()
{
// If we have a > 180 angle, negate
// w < 0.0f is the same as dot( identity, q ) < 0
if (GetW() < 0.0f)
{
Negate();
}
return *this;
}
inline Quaternion Quaternion::GetShortestPath() const
{
Quaternion sp = *this;
sp.MakeShortestPath();
return sp;
}
inline Quaternion& Quaternion::NormalizeInaccurate()
{
*this = GetNormalizedInaccurate();
return *this;
}
inline Quaternion Quaternion::GetNormalizedInaccurate() const
{
__m128 vLengthSq = _mm_mul_ps(m_data, m_data);
__m128 vTemp = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(3, 2, 3, 2));
vLengthSq = _mm_add_ps(vLengthSq, vTemp);
vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(1, 0, 0, 0));
vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0));
vLengthSq = _mm_add_ps(vLengthSq, vTemp);
vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(2, 2, 2, 2));
// Get the reciprocal and mul to perform the normalization
Quaternion result;
result.m_data = _mm_rsqrt_ps(vLengthSq);
result.m_data = _mm_mul_ps(result.m_data, m_data);
return result;
}
inline bool Quaternion::IsNormalized() const
{
return ToVector().IsNormalized4();
}
inline bool Quaternion::IsIdentity() const
{
return ToVector().IsEqual3(Vector::UnitW);
}
inline Quaternion Quaternion::operator*(const Quaternion& rhs) const
{
static const __m128 controlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f };
static const __m128 controlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f };
static const __m128 controlYXWZ = { -1.0f, 1.0f, 1.0f,-1.0f };
// Copy to SSE registers and use as few as possible for x86
__m128 Q2X = rhs;
__m128 Q2Y = rhs;
__m128 Q2Z = rhs;
__m128 vResult = rhs;
// Splat with one instruction
vResult = _mm_shuffle_ps(vResult, vResult, _MM_SHUFFLE(3, 3, 3, 3));
Q2X = _mm_shuffle_ps(Q2X, Q2X, _MM_SHUFFLE(0, 0, 0, 0));
Q2Y = _mm_shuffle_ps(Q2Y, Q2Y, _MM_SHUFFLE(1, 1, 1, 1));
Q2Z = _mm_shuffle_ps(Q2Z, Q2Z, _MM_SHUFFLE(2, 2, 2, 2));
// Retire Q1 and perform Q1*Q2W
vResult = _mm_mul_ps(vResult, *this);
__m128 Q1Shuffle = *this;
// Shuffle the copies of Q1
Q1Shuffle = _mm_shuffle_ps(Q1Shuffle, Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3));
// Mul by Q1WZYX
Q2X = _mm_mul_ps(Q2X, Q1Shuffle);
Q1Shuffle = _mm_shuffle_ps(Q1Shuffle, Q1Shuffle, _MM_SHUFFLE(2, 3, 0, 1));
// Flip the signs on m_y and m_z
Q2X = _mm_mul_ps(Q2X, controlWZYX);
// Mul by Q1ZWXY
Q2Y = _mm_mul_ps(Q2Y, Q1Shuffle);
Q1Shuffle = _mm_shuffle_ps(Q1Shuffle, Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3));
// Flip the signs on m_z and m_w
Q2Y = _mm_mul_ps(Q2Y, controlZWXY);
// Mul by Q1YXWZ
Q2Z = _mm_mul_ps(Q2Z, Q1Shuffle);
vResult = _mm_add_ps(vResult, Q2X);
// Flip the signs on m_x and m_w
Q2Z = _mm_mul_ps(Q2Z, controlYXWZ);
Q2Y = _mm_add_ps(Q2Y, Q2Z);
vResult = _mm_add_ps(vResult, Q2Y);
return Quaternion(vResult);
}
inline Quaternion& Quaternion::operator*=(const Quaternion& rhs)
{
*this = *this * rhs;
return *this;
}
inline bool Quaternion::IsNearEqual(const Quaternion& rhs, Radians const threshold) const
{
return Quaternion::Distance(*this, rhs) <= threshold;
}
inline bool Quaternion::operator==(const Quaternion& rhs) const
{
return ToVector() == rhs.ToVector();
}
inline bool Quaternion::operator!=(const Quaternion& rhs) const
{
return !operator==(rhs);
}
inline Vector Quaternion::GetSplatW() const
{
return _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(3, 3, 3, 3));
}
inline float Quaternion::GetW() const
{
auto vTemp = GetSplatW();
return _mm_cvtss_f32(vTemp);
}
}