Add files using upload-large-folder tool

7b853a5 verified 2 months ago

18.9 kB

	/*
	* SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
	* SPDX-License-Identifier: Apache-2.0
	*/

	#pragma once

	#include "Quaternion.h"

	namespace Math
	{
	inline Quaternion Quaternion::FromRotationBetweenNormalizedVectors(const Vector& from, const Vector& to)
	{
	ASSERT(from.IsNormalized3() && to.IsNormalized3());

	Quaternion result;

	// Parallel vectors - return zero rotation
	Vector const dot = Vector::Dot3(from, to);
	if (dot.IsGreaterThanEqual4(Vector::OneMinusEpsilon))
	{
	result = Quaternion::Identity;
	}
	// Opposite vectors - return 180 rotation around any orthogonal axis
	else if (dot.IsLessThanEqual4(Vector::EpsilonMinusOne))
	{
	Float4 const fromValues = from.ToFloat4();
	result = Quaternion(-fromValues.m_z, fromValues.m_y, fromValues.m_x, 0);
	result.Normalize();
	}
	else // Calculate quaternion rotation
	{
	Vector const cross = Vector::Cross3(from, to);
	Vector Q = Vector::Select(cross, dot, Vector::Select0001);
	Q += Vector::Select(Vector::Zero, Q.Length4(), Vector::Select0001);
	result = Quaternion(Q);
	result.Normalize();
	}

	return result;
	}

	inline Quaternion Quaternion::FromRotationBetweenNormalizedVectors(const Vector& from, const Vector& to, const Vector& fallbackRotationAxis)
	{
	ASSERT(from.IsNormalized3() && to.IsNormalized3());

	Quaternion Q(NoInit);

	Vector rotationAxis = from.Cross3(to).GetNormalized3();
	if (rotationAxis.GetLengthSquared3() == 0)
	{
	rotationAxis = fallbackRotationAxis;
	}

	float const dot = from.GetDot3(to);
	if (dot >= (1.0f - Math::Epsilon))
	{
	Q = Quaternion::Identity;
	}
	else
	{
	float const angle = Math::ACos(dot);
	Q = Quaternion(rotationAxis, angle);
	}

	return Q;
	}

	inline Quaternion Quaternion::FromRotationBetweenVectors(const Vector& sourceVector, const Vector& targetVector)
	{
	return FromRotationBetweenNormalizedVectors(
	sourceVector.GetNormalized3(),
	targetVector.GetNormalized3());
	}

	inline Quaternion Quaternion::NLerp(const Quaternion& from, const Quaternion& to, float T)
	{
	ASSERT(T >= 0.0f && T <= 1.0f);

	Quaternion adjustedFrom(from);

	// Ensure that the rotations are in the same direction
	if (Quaternion::Dot(from, to).IsLessThan4(Vector::Zero))
	{
	adjustedFrom.Negate();
	}

	Quaternion result(Vector::Lerp(adjustedFrom.ToVector(), to.ToVector(), T));
	result.Normalize();
	return result;
	}

	inline Quaternion Quaternion::SLerp(const Quaternion& from, const Quaternion& to, float T)
	{
	ASSERT(T >= 0.0f && T <= 1.0f);

	static SIMD::UIntMask const maskSign = { 0x80000000,0x00000000,0x00000000,0x00000000 };
	static __m128 const oneMinusEpsilon = { 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f, 1.0f - 0.00001f };

	Vector const VecT(T);

	Vector cosOmega = Quaternion::Dot(from, to);

	Vector control = cosOmega.LessThan(Vector::Zero);
	Vector sign = Vector::Select(Vector::One, Vector::NegativeOne, control);

	cosOmega = _mm_mul_ps(cosOmega, sign);
	control = cosOmega.LessThan(oneMinusEpsilon);

	Vector sinOmega = _mm_mul_ps(cosOmega, cosOmega);
	sinOmega = _mm_sub_ps(Vector::One, sinOmega);
	sinOmega = _mm_sqrt_ps(sinOmega);

	Vector omega = Vector::ATan2(sinOmega, cosOmega);

	Vector V01 = _mm_shuffle_ps(VecT, VecT, _MM_SHUFFLE(2, 3, 0, 1));
	V01 = _mm_and_ps(V01, SIMD::g_maskXY00);
	V01 = _mm_xor_ps(V01, maskSign);
	V01 = _mm_add_ps(Vector::UnitX, V01);

	Vector S0 = _mm_mul_ps(V01, omega);
	S0 = Vector::Sin(S0);
	S0 = _mm_div_ps(S0, sinOmega);
	S0 = Vector::Select(V01, S0, control);

	Vector S1 = S0.GetSplatY();
	S0 = S0.GetSplatX();

	S1 = _mm_mul_ps(S1, sign);
	Vector result = _mm_mul_ps(from, S0);
	S1 = _mm_mul_ps(S1, to);
	result = _mm_add_ps(result, S1);

	return Quaternion(result);
	}

	inline Quaternion Quaternion::FastSLerp(const Quaternion& q0, const Quaternion& q1, float t)
	{
	// Precomputed constants
	constexpr float const mu = 1.85298109240830f;
	static Vector const u0123 = _mm_setr_ps(1.f / (1 * 3), 1.f / (2 * 5), 1.f / (3 * 7), 1.f / (4 * 9));
	static Vector const u4567 = _mm_setr_ps(1.f / (5 * 11), 1.f / (6 * 13), 1.f / (7 * 15), mu / (8 * 17));
	static Vector const v0123 = _mm_setr_ps(1.f / 3, 2.f / 5, 3.f / 7, 4.f / 9);
	static Vector const v4567 = _mm_setr_ps(5.f / 11, 6.f / 13, 7.f / 15, mu * 8 / 17);
	static Vector const vSignMask = _mm_set1_ps(-0.f);

	// Common code for computing the scalar coefficients of SLERP
	auto CalculateCoefficient = [](Vector vT, Vector xm1)
	{
	Vector const vTSquared = vT * vT;

	// ( b4, b5, b6, b7 ) = ( x-1 ) * ( u4 * t^2 - v4, u5 * t^2 - v5, u6 * t^2 - v6, u7 * t^2 - v7 )
	Vector b4567 = Vector::MultiplySubtract(u4567, vTSquared, v4567);
	b4567 *= xm1;

	// ( b7, b7, b7, b7 )
	Vector b = b4567.GetSplatW();
	Vector c = b + Vector::One;

	// ( b6, b6, b6, b6 )
	b = b4567.GetSplatZ();
	c = Vector::MultiplyAdd(b, c, Vector::One);

	// ( b5, b5, b5, b5 )
	b = b4567.GetSplatY();
	c = Vector::MultiplyAdd(b, c, Vector::One);

	// ( b4, b4, b4, b4 )
	b = b4567.GetSplatX();
	c = Vector::MultiplyAdd(b, c, Vector::One);

	// ( b0, b1, b2, b3 ) =
	// ( x-1)(u0 t^2-v0, u1 * t^2 -v1, u2* t^2-v2, u3* t^2-v3 )
	Vector b0123 = Vector::MultiplySubtract(u0123, vTSquared, v0123);
	b0123 *= xm1;

	// ( b3, b3, b3, b3 )
	b = b0123.GetSplatW();
	c = Vector::MultiplyAdd(b, c, Vector::One);

	// ( b2, b2, b2, b2 )
	b = b0123.GetSplatZ();
	c = Vector::MultiplyAdd(b, c, Vector::One);

	// ( b1, b1, b1, b1 )
	b = b0123.GetSplatY();
	c = Vector::MultiplyAdd(b, c, Vector::One);

	// ( b0, b0, b0, b0 )
	b = b0123.GetSplatX();
	c = Vector::MultiplyAdd(b, c, Vector::One);
	c *= vT;

	return c;
	};

	Vector x = Vector::Dot4(q0.m_data, q1.m_data); // cos ( theta ) in all components

	Vector sign = _mm_and_ps(vSignMask, x);
	x = _mm_xor_ps(sign, x);
	Vector localQ1 = _mm_xor_ps(sign, q1);

	Vector xm1 = x - Vector::One;

	Vector cT = CalculateCoefficient(Vector(t), xm1);
	Vector cD = CalculateCoefficient(Vector(1.0f - t), xm1);
	cT = cT * localQ1;

	Quaternion result(Vector::MultiplyAdd(cD, q0.m_data, cT));
	return result;
	}

	inline Quaternion Quaternion::SQuad(const Quaternion& q0, const Quaternion& q1, const Quaternion& q2, const Quaternion& q3, float t)
	{
	ASSERT(t >= 0.0f && t <= 1.0f);

	Quaternion const q03 = Quaternion::SLerp(q0, q3, t);
	Quaternion const q12 = Quaternion::SLerp(q1, q2, t);
	t = (t - (t * t)) * 2;
	Quaternion const result = Quaternion::SLerp(q03, q12, t);
	return result;
	}

	inline Quaternion Quaternion::Delta(const Quaternion& from, const Quaternion& to)
	{
	return to * from.GetInverse();
	}

	inline Vector Quaternion::Dot(const Quaternion& q0, const Quaternion& q1)
	{
	return Vector::Dot4(q0.m_data, q1.m_data);
	}

	inline Radians Quaternion::Distance(const Quaternion& q0, const Quaternion& q1)
	{
	float const dot = Math::Clamp(Dot(q0, q1).ToFloat(), -1.0f, 1.0f);
	return Radians(2 * Math::ACos(Math::Abs(dot)));
	}

	inline Quaternion::Quaternion(NoInit_t)
	{
	}

	inline Quaternion::Quaternion(IdentityInit_t)
	: m_data(Vector::UnitW.m_data)
	{
	}

	inline Quaternion::Quaternion(const Vector& v)
	: m_data(v.m_data)
	{
	}

	inline Quaternion::Quaternion(float ix, float iy, float iz, float iw)
	{
	m_data = _mm_set_ps(iw, iz, iy, ix);
	}

	inline Quaternion::Quaternion(const Float4& v)
	: Quaternion(v.m_x, v.m_y, v.m_z, v.m_w)
	{
	}

	inline Quaternion::Quaternion(const Vector& axis, Radians angle)
	{
	ASSERT(axis.IsNormalized3());

	auto N = _mm_and_ps(axis, SIMD::g_maskXYZ0);
	N = _mm_or_ps(N, Vector::UnitW);
	auto scale = _mm_set_ps1(0.5f * (float)angle);

	Vector sine, cosine;
	Vector::SinCos(sine, cosine, scale);

	scale = _mm_and_ps(sine, SIMD::g_maskXYZ0);
	cosine = _mm_and_ps(cosine, SIMD::g_mask000W);
	scale = _mm_or_ps(scale, cosine);

	N = _mm_mul_ps(N, scale);
	m_data = N;
	}

	inline Quaternion::Quaternion(AxisAngle axisAngle)
	: Quaternion(Vector(axisAngle.m_axis), axisAngle.m_angle)
	{
	}

	inline Quaternion::Quaternion(const EulerAngles& eulerAngles)
	{
	auto const rotationX = Quaternion(Vector::UnitX, eulerAngles.m_x);
	auto const rotationY = Quaternion(Vector::UnitY, eulerAngles.m_y);
	auto const rotationZ = Quaternion(Vector::UnitZ, eulerAngles.m_z);

	// Rotation order is XYZ - all in global space, hence the order is reversed
	m_data = (rotationX * rotationY * rotationZ).GetNormalized().m_data;
	}

	inline Quaternion::Quaternion(Radians rotX, Radians rotY, Radians rotZ)
	: Quaternion(EulerAngles(rotX, rotY, rotZ))
	{
	}

	inline Quaternion::operator __m128& ()
	{
	return m_data;
	}

	inline Quaternion::operator const __m128& () const
	{
	return m_data;
	}

	inline Float4 Quaternion::ToFloat4() const
	{
	Float4 v;
	_mm_storeu_ps(&v.m_x, m_data);
	return v;
	}

	inline Vector Quaternion::ToVector() const
	{
	return Vector(m_data);
	}

	inline Vector Quaternion::Length()
	{
	return ToVector().Length4();
	}

	inline float Quaternion::GetLength() const
	{
	return ToVector().GetLength4();
	}

	inline Radians Quaternion::GetAngle() const
	{
	return Radians(2.0f * Math::ACos(GetW()));
	}

	inline AxisAngle Quaternion::ToAxisAngle() const
	{
	return AxisAngle(ToVector(), Radians(2.0f * Math::ACos(GetW())));
	}

	inline Vector Quaternion::RotateVector(const Vector& vector) const
	{
	Quaternion const A(Vector::Select(Vector::Select1110, vector, Vector::Select1110));
	Quaternion const result = GetConjugate() * A;
	return (result * *this).ToVector();
	}

	inline Vector Quaternion::RotateVectorInverse(const Vector& vector) const
	{
	Quaternion const A(Vector::Select(Vector::Select1110, vector, Vector::Select1110));
	Quaternion const result = this A;
	return (result * GetConjugate()).ToVector();
	}

	inline Quaternion& Quaternion::Conjugate()
	{
	static __m128 const conj = { -1.0f, -1.0f, -1.0f, 1.0f };
	m_data = _mm_mul_ps(*this, conj);
	return *this;
	}

	inline Quaternion Quaternion::GetConjugate() const
	{
	Quaternion q = *this;
	q.Conjugate();
	return q;
	}
	inline Quaternion& Quaternion::Negate()
	{
	m_data = _mm_mul_ps(*this, Vector::NegativeOne);
	return *this;
	}

	inline Quaternion Quaternion::GetNegated() const
	{
	Quaternion q = *this;
	q.Negate();
	return q;
	}

	inline Quaternion& Quaternion::Invert()
	{
	Vector const conjugate(GetConjugate().m_data);
	Vector const length = ToVector().Length4();
	Vector const mask = length.LessThanEqual(Vector::Epsilon);
	Vector const result = conjugate / length;
	m_data = result.Select(result, Vector::Zero, mask);
	return *this;
	}

	inline Quaternion Quaternion::GetInverse() const
	{
	Quaternion q = *this;
	q.Invert();
	return q;
	}

	inline Quaternion& Quaternion::Normalize()
	{
	m_data = ToVector().GetNormalized4().m_data;
	return *this;
	}

	inline Quaternion Quaternion::GetNormalized() const
	{
	Quaternion q = *this;
	q.Normalize();
	return q;
	}

	inline Vector Quaternion::XAxis() const noexcept
	{
	const float x = _mm_cvtss_f32(m_data);
	const float y = _mm_cvtss_f32(
	_mm_shuffle_ps(m_data, m_data,
	_MM_SHUFFLE(1, 1, 1, 1)));
	const float z = _mm_cvtss_f32(
	_mm_shuffle_ps(m_data, m_data,
	_MM_SHUFFLE(2, 2, 2, 2)));
	const float w = _mm_cvtss_f32(
	_mm_shuffle_ps(m_data, m_data,
	_MM_SHUFFLE(3, 3, 3, 3)));

	const float s = 2.0f * w;
	const float x2 = 2.0f * x;

	return Vector(
	x2 * x + s * w - 1.0f,
	x2 * y + s * z,
	x2 * z + s * -y);
	}

	inline Vector Quaternion::YAxis() const noexcept
	{
	const float x = _mm_cvtss_f32(m_data);
	const float y = _mm_cvtss_f32(
	_mm_shuffle_ps(m_data, m_data,
	_MM_SHUFFLE(1, 1, 1, 1)));
	const float z = _mm_cvtss_f32(
	_mm_shuffle_ps(m_data, m_data,
	_MM_SHUFFLE(2, 2, 2, 2)));
	const float w = _mm_cvtss_f32(
	_mm_shuffle_ps(m_data, m_data,
	_MM_SHUFFLE(3, 3, 3, 3)));

	const float s = 2.0f * w;
	const float y2 = 2.0f * y;

	return Vector(
	y2 * x + s * -z,
	y2 * y + s * w - 1.0f,
	y2 * z + s * x);
	}

	inline Vector Quaternion::ZAxis() const noexcept
	{
	const float x = _mm_cvtss_f32(m_data);
	const float y = _mm_cvtss_f32(
	_mm_shuffle_ps(m_data, m_data,
	_MM_SHUFFLE(1, 1, 1, 1)));
	const float z = _mm_cvtss_f32(
	_mm_shuffle_ps(m_data, m_data,
	_MM_SHUFFLE(2, 2, 2, 2)));
	const float w = _mm_cvtss_f32(
	_mm_shuffle_ps(m_data, m_data,
	_MM_SHUFFLE(3, 3, 3, 3)));

	const float s = 2.0f * w;
	const float z2 = 2.0f * z;

	return Vector(
	x * z2 + s * y,
	y * z2 + s * -x,
	z * z2 + s * w - 1.0f);
	}

	inline Quaternion& Quaternion::MakeShortestPath()
	{
	// If we have a > 180 angle, negate
	// w < 0.0f is the same as dot( identity, q ) < 0
	if (GetW() < 0.0f)
	{
	Negate();
	}

	return *this;
	}

	inline Quaternion Quaternion::GetShortestPath() const
	{
	Quaternion sp = *this;
	sp.MakeShortestPath();
	return sp;
	}

	inline Quaternion& Quaternion::NormalizeInaccurate()
	{
	*this = GetNormalizedInaccurate();
	return *this;
	}

	inline Quaternion Quaternion::GetNormalizedInaccurate() const
	{
	__m128 vLengthSq = _mm_mul_ps(m_data, m_data);
	__m128 vTemp = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(3, 2, 3, 2));
	vLengthSq = _mm_add_ps(vLengthSq, vTemp);
	vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(1, 0, 0, 0));
	vTemp = _mm_shuffle_ps(vTemp, vLengthSq, _MM_SHUFFLE(3, 3, 0, 0));
	vLengthSq = _mm_add_ps(vLengthSq, vTemp);
	vLengthSq = _mm_shuffle_ps(vLengthSq, vLengthSq, _MM_SHUFFLE(2, 2, 2, 2));

	// Get the reciprocal and mul to perform the normalization
	Quaternion result;
	result.m_data = _mm_rsqrt_ps(vLengthSq);
	result.m_data = _mm_mul_ps(result.m_data, m_data);
	return result;
	}

	inline bool Quaternion::IsNormalized() const
	{
	return ToVector().IsNormalized4();
	}

	inline bool Quaternion::IsIdentity() const
	{
	return ToVector().IsEqual3(Vector::UnitW);
	}

	inline Quaternion Quaternion::operator*(const Quaternion& rhs) const
	{
	static const __m128 controlWZYX = { 1.0f,-1.0f, 1.0f,-1.0f };
	static const __m128 controlZWXY = { 1.0f, 1.0f,-1.0f,-1.0f };
	static const __m128 controlYXWZ = { -1.0f, 1.0f, 1.0f,-1.0f };

	// Copy to SSE registers and use as few as possible for x86
	__m128 Q2X = rhs;
	__m128 Q2Y = rhs;
	__m128 Q2Z = rhs;
	__m128 vResult = rhs;
	// Splat with one instruction
	vResult = _mm_shuffle_ps(vResult, vResult, _MM_SHUFFLE(3, 3, 3, 3));
	Q2X = _mm_shuffle_ps(Q2X, Q2X, _MM_SHUFFLE(0, 0, 0, 0));
	Q2Y = _mm_shuffle_ps(Q2Y, Q2Y, _MM_SHUFFLE(1, 1, 1, 1));
	Q2Z = _mm_shuffle_ps(Q2Z, Q2Z, _MM_SHUFFLE(2, 2, 2, 2));
	// Retire Q1 and perform Q1*Q2W
	vResult = _mm_mul_ps(vResult, *this);
	__m128 Q1Shuffle = *this;
	// Shuffle the copies of Q1
	Q1Shuffle = _mm_shuffle_ps(Q1Shuffle, Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3));
	// Mul by Q1WZYX
	Q2X = _mm_mul_ps(Q2X, Q1Shuffle);
	Q1Shuffle = _mm_shuffle_ps(Q1Shuffle, Q1Shuffle, _MM_SHUFFLE(2, 3, 0, 1));
	// Flip the signs on m_y and m_z
	Q2X = _mm_mul_ps(Q2X, controlWZYX);
	// Mul by Q1ZWXY
	Q2Y = _mm_mul_ps(Q2Y, Q1Shuffle);
	Q1Shuffle = _mm_shuffle_ps(Q1Shuffle, Q1Shuffle, _MM_SHUFFLE(0, 1, 2, 3));
	// Flip the signs on m_z and m_w
	Q2Y = _mm_mul_ps(Q2Y, controlZWXY);
	// Mul by Q1YXWZ
	Q2Z = _mm_mul_ps(Q2Z, Q1Shuffle);
	vResult = _mm_add_ps(vResult, Q2X);
	// Flip the signs on m_x and m_w
	Q2Z = _mm_mul_ps(Q2Z, controlYXWZ);
	Q2Y = _mm_add_ps(Q2Y, Q2Z);
	vResult = _mm_add_ps(vResult, Q2Y);

	return Quaternion(vResult);
	}

	inline Quaternion& Quaternion::operator*=(const Quaternion& rhs)
	{
	this = this * rhs;
	return *this;
	}

	inline bool Quaternion::IsNearEqual(const Quaternion& rhs, Radians const threshold) const
	{
	return Quaternion::Distance(*this, rhs) <= threshold;
	}

	inline bool Quaternion::operator==(const Quaternion& rhs) const
	{
	return ToVector() == rhs.ToVector();
	}

	inline bool Quaternion::operator!=(const Quaternion& rhs) const
	{
	return !operator==(rhs);
	}

	inline Vector Quaternion::GetSplatW() const
	{
	return _mm_shuffle_ps(m_data, m_data, _MM_SHUFFLE(3, 3, 3, 3));
	}

	inline float Quaternion::GetW() const
	{
	auto vTemp = GetSplatW();
	return _mm_cvtss_f32(vTemp);
	}
	}