/* * SPDX-FileCopyrightText: Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-License-Identifier: Apache-2.0 */ #pragma once #include #include "Matrix.h" namespace Math { inline Matrix Matrix::FromRotation(const Quaternion& rotation) { return Matrix(rotation); } inline Matrix Matrix::FromTranslation(const Vector& translation) { Matrix M; M.m_rows[0] = Vector::UnitX; M.m_rows[1] = Vector::UnitY; M.m_rows[2] = Vector::UnitZ; M.m_rows[3] = translation.GetWithW1(); return M; } inline Matrix Matrix::FromScale(const Vector& scale) { Matrix M; M.m_rows[0] = _mm_and_ps(scale, SIMD::g_maskX000); M.m_rows[1] = _mm_and_ps(scale, SIMD::g_mask0Y00); M.m_rows[2] = _mm_and_ps(scale, SIMD::g_mask00Z0); M.m_rows[3] = Vector::UnitW; return M; } inline Matrix Matrix::FromUniformScale(float uniformScale) { Matrix M; M.m_rows[0] = _mm_set_ps(0, 0, 0, uniformScale); M.m_rows[1] = _mm_set_ps(0, 0, uniformScale, 0); M.m_rows[2] = _mm_set_ps(0, uniformScale, 0, 0); M.m_rows[3] = Vector::UnitW; return M; } inline Matrix Matrix::FromTranslationAndScale(const Vector& translation, const Vector& scale) { Matrix M; M.m_rows[0] = _mm_and_ps(scale, SIMD::g_maskX000); M.m_rows[1] = _mm_and_ps(scale, SIMD::g_mask0Y00); M.m_rows[2] = _mm_and_ps(scale, SIMD::g_mask00Z0); M.m_rows[3] = translation.GetWithW1(); return M; } inline Matrix Matrix::FromRotationBetweenVectors(Vector const sourceVector, Vector const targetVector) { return Matrix(Quaternion::FromRotationBetweenNormalizedVectors(sourceVector, targetVector)); } inline Matrix::Matrix() { memcpy(this, &Matrix::Identity, sizeof(Matrix)); } inline Matrix::Matrix(NoInit_t) { } inline Matrix::Matrix(ZeroInit_t) { memset(this, 0, sizeof(Matrix)); } inline Matrix::Matrix(const Vector axis, Radians angleRadians) { Vector normal = axis.GetNormalized3(); Vector C0, C1; Vector::SinCos(C0, C1, Vector((float)angleRadians)); Vector C2 = Vector::One - C1; __m128 N0 = _mm_shuffle_ps(normal, normal, _MM_SHUFFLE(3, 0, 2, 1)); __m128 N1 = _mm_shuffle_ps(normal, normal, _MM_SHUFFLE(3, 1, 0, 2)); __m128 V0 = _mm_mul_ps(C2, N0); V0 = _mm_mul_ps(V0, N1); __m128 R0 = _mm_mul_ps(C2, normal); R0 = _mm_mul_ps(R0, normal); R0 = _mm_add_ps(R0, C1); __m128 R1 = _mm_mul_ps(C0, normal); R1 = _mm_add_ps(R1, V0); __m128 R2 = _mm_mul_ps(C0, normal); R2 = _mm_sub_ps(V0, R2); V0 = _mm_and_ps(R0, SIMD::g_maskXYZ0); __m128 V1 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(2, 1, 2, 0)); V1 = _mm_shuffle_ps(V1, V1, _MM_SHUFFLE(0, 3, 2, 1)); __m128 V2 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(0, 0, 1, 1)); V2 = _mm_shuffle_ps(V2, V2, _MM_SHUFFLE(2, 0, 2, 0)); R2 = _mm_shuffle_ps(V0, V1, _MM_SHUFFLE(1, 0, 3, 0)); R2 = _mm_shuffle_ps(R2, R2, _MM_SHUFFLE(1, 3, 2, 0)); m_rows[0] = R2; R2 = _mm_shuffle_ps(V0, V1, _MM_SHUFFLE(3, 2, 3, 1)); R2 = _mm_shuffle_ps(R2, R2, _MM_SHUFFLE(1, 3, 0, 2)); m_rows[1] = R2; V2 = _mm_shuffle_ps(V2, V0, _MM_SHUFFLE(3, 2, 1, 0)); m_rows[2] = V2; m_rows[3] = Vector::UnitW; } inline Matrix::Matrix(const AxisAngle axisAngle) : Matrix(Vector(axisAngle.m_axis), axisAngle.m_angle) { } inline Matrix::Matrix(const Quaternion& rotation) { SetRotation(rotation); m_rows[3] = Vector::UnitW; } inline Matrix::Matrix(const Quaternion& rotation, const Vector& translation, const Vector& scale) { SetRotation(rotation); m_rows[0] = m_rows[0] * scale.GetSplatX(); m_rows[1] = m_rows[1] * scale.GetSplatY(); m_rows[2] = m_rows[2] * scale.GetSplatZ(); m_rows[3] = translation.GetWithW1(); } inline Matrix::Matrix(const Quaternion& rotation, const Vector& translation, float scale) : Matrix(rotation, translation, Vector(scale)) { } inline float* Matrix::AsFloatArray() { return &m_values[0][0]; } inline const float* Matrix::AsFloatArray() const { return &m_values[0][0]; } inline const Vector& Matrix::GetRow(uint32_t row) const { return m_rows[row]; } inline const Vector& Matrix::GetAxisX() const { return m_rows[0]; } inline const Vector& Matrix::GetAxisY() const { return m_rows[1]; } inline const Vector& Matrix::GetAxisZ() const { return m_rows[2]; } inline void Matrix::SetAxisX(const Vector& xAxis) { m_rows[0] = xAxis; } inline void Matrix::SetAxisY(const Vector& yAxis) { m_rows[1] = yAxis; } inline void Matrix::SetAxisZ(const Vector& zAxis) { m_rows[2] = zAxis; } inline Float3 Matrix::GetForwardVector() const { return GetAxisZ(); } inline Float3 Matrix::GetRightVector() const { return GetAxisX(); } inline Float3 Matrix::GetUpVector() const { return GetAxisY(); } inline Vector Matrix::GetUnitAxisX() const { return m_rows[0].GetNormalized3(); } inline Vector Matrix::GetUnitAxisY() const { return m_rows[1].GetNormalized3(); } inline Vector Matrix::GetUnitAxisZ() const { return m_rows[2].GetNormalized3(); } inline bool Matrix::IsIdentity() const { __m128 vTemp1 = _mm_cmpeq_ps(m_rows[0], Vector::UnitX); __m128 vTemp2 = _mm_cmpeq_ps(m_rows[1], Vector::UnitY); __m128 vTemp3 = _mm_cmpeq_ps(m_rows[2], Vector::UnitZ); __m128 vTemp4 = _mm_cmpeq_ps(m_rows[3], Vector::UnitW); vTemp1 = _mm_and_ps(vTemp1, vTemp2); vTemp3 = _mm_and_ps(vTemp3, vTemp4); vTemp1 = _mm_and_ps(vTemp1, vTemp3); return (_mm_movemask_ps(vTemp1) == 0x0f); } inline bool Matrix::IsOrthogonal() const { Matrix const transpose = GetTransposed(); Matrix result = *this * transpose; return result.IsIdentity(); } inline bool Matrix::IsOrthonormal() const { static const Vector three(3); auto dotCheck = Vector::Dot3(m_rows[0], m_rows[1]) + Vector::Dot3(m_rows[0], m_rows[2]) + Vector::Dot3(m_rows[1], m_rows[2]); auto magnitudeCheck = m_rows[0].LengthSquared3() + m_rows[1].LengthSquared3() + m_rows[2].LengthSquared3(); auto result = dotCheck + magnitudeCheck; return result.IsNearEqual3(three); } inline Matrix& Matrix::Transpose() { __m128 vTemp1 = _mm_shuffle_ps(m_rows[0], m_rows[1], _MM_SHUFFLE(1, 0, 1, 0)); __m128 vTemp3 = _mm_shuffle_ps(m_rows[0], m_rows[1], _MM_SHUFFLE(3, 2, 3, 2)); __m128 vTemp2 = _mm_shuffle_ps(m_rows[2], m_rows[3], _MM_SHUFFLE(1, 0, 1, 0)); __m128 vTemp4 = _mm_shuffle_ps(m_rows[2], m_rows[3], _MM_SHUFFLE(3, 2, 3, 2)); m_rows[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0)); m_rows[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1)); m_rows[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0)); m_rows[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1)); return *this; } inline Matrix Matrix::GetTransposed() const { Matrix m = *this; m.Transpose(); return m; } inline Matrix& Matrix::Invert() { Matrix MT = GetTransposed(); __m128 V00 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[2], _MM_SHUFFLE(1, 1, 0, 0)); __m128 V10 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[3], _MM_SHUFFLE(3, 2, 3, 2)); __m128 V01 = _mm_shuffle_ps(MT.m_rows[0], MT.m_rows[0], _MM_SHUFFLE(1, 1, 0, 0)); __m128 V11 = _mm_shuffle_ps(MT.m_rows[1], MT.m_rows[1], _MM_SHUFFLE(3, 2, 3, 2)); __m128 V02 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[0], _MM_SHUFFLE(2, 0, 2, 0)); __m128 V12 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[1], _MM_SHUFFLE(3, 1, 3, 1)); __m128 D0 = _mm_mul_ps(V00, V10); __m128 D1 = _mm_mul_ps(V01, V11); __m128 D2 = _mm_mul_ps(V02, V12); V00 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[2], _MM_SHUFFLE(3, 2, 3, 2)); V10 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[3], _MM_SHUFFLE(1, 1, 0, 0)); V01 = _mm_shuffle_ps(MT.m_rows[0], MT.m_rows[0], _MM_SHUFFLE(3, 2, 3, 2)); V11 = _mm_shuffle_ps(MT.m_rows[1], MT.m_rows[1], _MM_SHUFFLE(1, 1, 0, 0)); V02 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[0], _MM_SHUFFLE(3, 1, 3, 1)); V12 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[1], _MM_SHUFFLE(2, 0, 2, 0)); V00 = _mm_mul_ps(V00, V10); V01 = _mm_mul_ps(V01, V11); V02 = _mm_mul_ps(V02, V12); D0 = _mm_sub_ps(D0, V00); D1 = _mm_sub_ps(D1, V01); D2 = _mm_sub_ps(D2, V02); // V11 = D0Y,D0W,D2Y,D2Y V11 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(1, 1, 3, 1)); V00 = _mm_shuffle_ps(MT.m_rows[1], MT.m_rows[1], _MM_SHUFFLE(1, 0, 2, 1)); V10 = _mm_shuffle_ps(V11, D0, _MM_SHUFFLE(0, 3, 0, 2)); V01 = _mm_shuffle_ps(MT.m_rows[0], MT.m_rows[0], _MM_SHUFFLE(0, 1, 0, 2)); V11 = _mm_shuffle_ps(V11, D0, _MM_SHUFFLE(2, 1, 2, 1)); // V13 = D1Y,D1W,D2W,D2W __m128 V13 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(3, 3, 3, 1)); V02 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[3], _MM_SHUFFLE(1, 0, 2, 1)); V12 = _mm_shuffle_ps(V13, D1, _MM_SHUFFLE(0, 3, 0, 2)); __m128 V03 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[2], _MM_SHUFFLE(0, 1, 0, 2)); V13 = _mm_shuffle_ps(V13, D1, _MM_SHUFFLE(2, 1, 2, 1)); __m128 C0 = _mm_mul_ps(V00, V10); __m128 C2 = _mm_mul_ps(V01, V11); __m128 C4 = _mm_mul_ps(V02, V12); __m128 C6 = _mm_mul_ps(V03, V13); // V11 = D0X,D0Y,D2X,D2X V11 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(0, 0, 1, 0)); V00 = _mm_shuffle_ps(MT.m_rows[1], MT.m_rows[1], _MM_SHUFFLE(2, 1, 3, 2)); V10 = _mm_shuffle_ps(D0, V11, _MM_SHUFFLE(2, 1, 0, 3)); V01 = _mm_shuffle_ps(MT.m_rows[0], MT.m_rows[0], _MM_SHUFFLE(1, 3, 2, 3)); V11 = _mm_shuffle_ps(D0, V11, _MM_SHUFFLE(0, 2, 1, 2)); // V13 = D1X,D1Y,D2Z,D2Z V13 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(2, 2, 1, 0)); V02 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[3], _MM_SHUFFLE(2, 1, 3, 2)); V12 = _mm_shuffle_ps(D1, V13, _MM_SHUFFLE(2, 1, 0, 3)); V03 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[2], _MM_SHUFFLE(1, 3, 2, 3)); V13 = _mm_shuffle_ps(D1, V13, _MM_SHUFFLE(0, 2, 1, 2)); V00 = _mm_mul_ps(V00, V10); V01 = _mm_mul_ps(V01, V11); V02 = _mm_mul_ps(V02, V12); V03 = _mm_mul_ps(V03, V13); C0 = _mm_sub_ps(C0, V00); C2 = _mm_sub_ps(C2, V01); C4 = _mm_sub_ps(C4, V02); C6 = _mm_sub_ps(C6, V03); V00 = _mm_shuffle_ps(MT.m_rows[1], MT.m_rows[1], _MM_SHUFFLE(0, 3, 0, 3)); // V10 = D0Z,D0Z,D2X,D2Y V10 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(1, 0, 2, 2)); V10 = _mm_shuffle_ps(V10, V10, _MM_SHUFFLE(0, 2, 3, 0)); V01 = _mm_shuffle_ps(MT.m_rows[0], MT.m_rows[0], _MM_SHUFFLE(2, 0, 3, 1)); // V11 = D0X,D0W,D2X,D2Y V11 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(1, 0, 3, 0)); V11 = _mm_shuffle_ps(V11, V11, _MM_SHUFFLE(2, 1, 0, 3)); V02 = _mm_shuffle_ps(MT.m_rows[3], MT.m_rows[3], _MM_SHUFFLE(0, 3, 0, 3)); // V12 = D1Z,D1Z,D2Z,D2W V12 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(3, 2, 2, 2)); V12 = _mm_shuffle_ps(V12, V12, _MM_SHUFFLE(0, 2, 3, 0)); V03 = _mm_shuffle_ps(MT.m_rows[2], MT.m_rows[2], _MM_SHUFFLE(2, 0, 3, 1)); // V13 = D1X,D1W,D2Z,D2W V13 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(3, 2, 3, 0)); V13 = _mm_shuffle_ps(V13, V13, _MM_SHUFFLE(2, 1, 0, 3)); V00 = _mm_mul_ps(V00, V10); V01 = _mm_mul_ps(V01, V11); V02 = _mm_mul_ps(V02, V12); V03 = _mm_mul_ps(V03, V13); __m128 C1 = _mm_sub_ps(C0, V00); C0 = _mm_add_ps(C0, V00); __m128 C3 = _mm_add_ps(C2, V01); C2 = _mm_sub_ps(C2, V01); __m128 C5 = _mm_sub_ps(C4, V02); C4 = _mm_add_ps(C4, V02); __m128 C7 = _mm_add_ps(C6, V03); C6 = _mm_sub_ps(C6, V03); C0 = _mm_shuffle_ps(C0, C1, _MM_SHUFFLE(3, 1, 2, 0)); C2 = _mm_shuffle_ps(C2, C3, _MM_SHUFFLE(3, 1, 2, 0)); C4 = _mm_shuffle_ps(C4, C5, _MM_SHUFFLE(3, 1, 2, 0)); C6 = _mm_shuffle_ps(C6, C7, _MM_SHUFFLE(3, 1, 2, 0)); C0 = _mm_shuffle_ps(C0, C0, _MM_SHUFFLE(3, 1, 2, 0)); C2 = _mm_shuffle_ps(C2, C2, _MM_SHUFFLE(3, 1, 2, 0)); C4 = _mm_shuffle_ps(C4, C4, _MM_SHUFFLE(3, 1, 2, 0)); C6 = _mm_shuffle_ps(C6, C6, _MM_SHUFFLE(3, 1, 2, 0)); __m128 vTemp = Vector::Dot4(C0, MT.m_rows[0]); vTemp = _mm_div_ps(Vector::One, vTemp); m_rows[0] = _mm_mul_ps(C0, vTemp); m_rows[1] = _mm_mul_ps(C2, vTemp); m_rows[2] = _mm_mul_ps(C4, vTemp); m_rows[3] = _mm_mul_ps(C6, vTemp); return *this; } inline Matrix Matrix::GetInverse() const { Matrix m = *this; m.Invert(); return m; } inline Vector Matrix::GetDeterminant() const { Vector V0 = m_rows[2].Shuffle(1, 0, 0, 0); Vector V1 = m_rows[3].Shuffle(2, 2, 1, 1); Vector V2 = m_rows[2].Shuffle(1, 0, 0, 0); Vector V3 = m_rows[3].Shuffle(3, 3, 3, 2); Vector V4 = m_rows[2].Shuffle(2, 2, 1, 1); Vector V5 = m_rows[3].Shuffle(3, 3, 3, 2); Vector P0 = V0 * V1; Vector P1 = V2 * V3; Vector P2 = V4 * V5; V0 = m_rows[2].Shuffle(2, 2, 1, 1); V1 = m_rows[3].Shuffle(1, 0, 0, 0); V2 = m_rows[2].Shuffle(3, 3, 3, 2); V3 = m_rows[3].Shuffle(1, 0, 0, 0); V4 = m_rows[2].Shuffle(3, 3, 3, 2); V5 = m_rows[3].Shuffle(2, 2, 1, 1); P0 = Vector::NegativeMultiplySubtract(V0, V1, P0); P1 = Vector::NegativeMultiplySubtract(V2, V3, P1); P2 = Vector::NegativeMultiplySubtract(V4, V5, P2); V0 = m_rows[1].Shuffle(3, 3, 3, 2); V1 = m_rows[1].Shuffle(2, 2, 1, 1); V2 = m_rows[1].Shuffle(1, 0, 0, 0); static Vector const Sign(1.0f, -1.0f, 1.0f, -1.0f); Vector S = m_rows[0] * Sign; Vector R = V0 * P0; R = Vector::NegativeMultiplySubtract(V1, P1, R); R = Vector::MultiplyAdd(V2, P2, R); return Vector::Dot4(S, R); } inline float Matrix::GetDeterminantAsFloat() const { return GetDeterminant().GetX(); } inline Vector Matrix::GetTranslation() const { return m_rows[3].GetWithW0(); } inline const Vector& Matrix::GetTranslationWithW() const { return m_rows[3]; } inline Matrix& Matrix::SetTranslation(const Vector& v) { m_rows[3] = v.GetWithW1(); return *this; } inline Matrix& Matrix::SetTranslation(const Float3& v) { m_rows[3] = Vector(v, 1.0f); return *this; } inline Matrix& Matrix::SetTranslation(const Float4& v) { m_rows[3] = Vector(v.m_x, v.m_y, v.m_z, 1.0f); return *this; } inline Quaternion Matrix::GetRotation() const { // based on RTM: https://github.com/nfrechette/rtm const Vector& axisX = m_rows[0]; const Vector& axisY = m_rows[1]; const Vector& axisZ = m_rows[2]; // Zero scale is not supported if (axisX.IsNearZero4() || axisY.IsNearZero4() || axisZ.IsNearZero4()) { HALT(); } float const axisX_X = axisX.GetX(); float const axisY_Y = axisY.GetY(); float const axisZ_Z = axisZ.GetZ(); float const mtx_trace = axisX_X + axisY_Y + axisZ_Z; if (mtx_trace > 0.0) { float const axisX_y = axisX.GetY(); float const axisX_z = axisX.GetZ(); float const axisY_x = axisY.GetX(); float const axisY_z = axisY.GetZ(); float const axisZ_x = axisZ.GetX(); float const axisZ_y = axisZ.GetY(); float const inv_trace = Math::Reciprocal(Math::Sqrt(mtx_trace + 1.0f)); float const half_inv_trace = inv_trace * 0.5f; float const m_x = (axisY_z - axisZ_y) * half_inv_trace; float const m_y = (axisZ_x - axisX_z) * half_inv_trace; float const m_z = (axisX_y - axisY_x) * half_inv_trace; float const m_w = Math::Reciprocal(inv_trace) * 0.5f; return Quaternion(m_x, m_y, m_z, m_w).GetNormalized(); } else { // Find the axis with the highest diagonal value int32_t axisIdx0 = 0; if (axisY_Y > axisX_X) { axisIdx0 = 1; } if (axisZ_Z > m_rows[axisIdx0][axisIdx0]) { axisIdx0 = 2; } int32_t const axisIdx1 = (axisIdx0 + 1) % 3; int32_t const axisIdx2 = (axisIdx1 + 1) % 3; float const pseudoTrace = 1.0f + m_rows[axisIdx0][axisIdx0] - m_rows[axisIdx1][axisIdx1] - m_rows[axisIdx2][axisIdx2]; float const inversePseudoTrace = Math::Reciprocal(Math::Sqrt(pseudoTrace)); float const halfInversePseudoTrace = inversePseudoTrace * 0.5f; Float4 rawQuatValues; rawQuatValues[axisIdx0] = Math::Reciprocal(inversePseudoTrace) * 0.5f; rawQuatValues[axisIdx1] = halfInversePseudoTrace * (m_rows[axisIdx0][axisIdx1] + m_rows[axisIdx1][axisIdx0]); rawQuatValues[axisIdx2] = halfInversePseudoTrace * (m_rows[axisIdx0][axisIdx2] + m_rows[axisIdx2][axisIdx0]); rawQuatValues[3] = halfInversePseudoTrace * (m_rows[axisIdx1][axisIdx2] - m_rows[axisIdx2][axisIdx1]); return Quaternion(rawQuatValues).GetNormalized(); } } inline Matrix& Matrix::SetRotation(const Matrix& rotation) { ASSERT(Math::Abs(rotation.GetDeterminant().GetX()) == 1.0f); m_rows[0] = rotation.m_rows[0]; m_rows[1] = rotation.m_rows[1]; m_rows[2] = rotation.m_rows[2]; return *this; } inline Matrix& Matrix::SetRotation(const Quaternion& rotation) { static __m128 const constant1110 = { 1.0f, 1.0f, 1.0f, 0.0f }; __m128 Q0 = _mm_add_ps(rotation, rotation); __m128 Q1 = _mm_mul_ps(rotation, Q0); __m128 V0 = _mm_shuffle_ps(Q1, Q1, _MM_SHUFFLE(3, 0, 0, 1)); V0 = _mm_and_ps(V0, SIMD::g_maskXYZ0); __m128 V1 = _mm_shuffle_ps(Q1, Q1, _MM_SHUFFLE(3, 1, 2, 2)); V1 = _mm_and_ps(V1, SIMD::g_maskXYZ0); __m128 R0 = _mm_sub_ps(constant1110, V0); R0 = _mm_sub_ps(R0, V1); V0 = _mm_shuffle_ps(rotation, rotation, _MM_SHUFFLE(3, 1, 0, 0)); V1 = _mm_shuffle_ps(Q0, Q0, _MM_SHUFFLE(3, 2, 1, 2)); V0 = _mm_mul_ps(V0, V1); V1 = _mm_shuffle_ps(rotation, rotation, _MM_SHUFFLE(3, 3, 3, 3)); __m128 V2 = _mm_shuffle_ps(Q0, Q0, _MM_SHUFFLE(3, 0, 2, 1)); V1 = _mm_mul_ps(V1, V2); __m128 R1 = _mm_add_ps(V0, V1); __m128 R2 = _mm_sub_ps(V0, V1); V0 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(1, 0, 2, 1)); V0 = _mm_shuffle_ps(V0, V0, _MM_SHUFFLE(1, 3, 2, 0)); V1 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(2, 2, 0, 0)); V1 = _mm_shuffle_ps(V1, V1, _MM_SHUFFLE(2, 0, 2, 0)); Q1 = _mm_shuffle_ps(R0, V0, _MM_SHUFFLE(1, 0, 3, 0)); Q1 = _mm_shuffle_ps(Q1, Q1, _MM_SHUFFLE(1, 3, 2, 0)); m_rows[0] = Q1; Q1 = _mm_shuffle_ps(R0, V0, _MM_SHUFFLE(3, 2, 3, 1)); Q1 = _mm_shuffle_ps(Q1, Q1, _MM_SHUFFLE(1, 3, 0, 2)); m_rows[1] = Q1; Q1 = _mm_shuffle_ps(V1, R0, _MM_SHUFFLE(3, 2, 1, 0)); m_rows[2] = Q1; return *this; } inline Matrix& Matrix::SetRotationMaintainingScale(const Matrix& rotation) { Vector const scale = GetScale(); SetRotation(rotation); return SetScale(scale); } inline Matrix& Matrix::SetRotationMaintainingScale(const Quaternion& rotation) { Vector const scale = GetScale(); SetRotation(rotation); return SetScale(scale); } inline Matrix& Matrix::SetScale(float uniformScale) { SetScale(Vector(uniformScale)); return *this; } inline Matrix& Matrix::RemoveScaleFast() { m_rows[0] = m_rows[0].GetNormalized4(); m_rows[1] = m_rows[1].GetNormalized4(); m_rows[2] = m_rows[2].GetNormalized4(); return *this; } inline Matrix& Matrix::SetScaleFast(const Vector& scale) { m_rows[0] = m_rows[0].GetNormalized3() * scale.GetSplatX(); m_rows[1] = m_rows[1].GetNormalized3() * scale.GetSplatY(); m_rows[2] = m_rows[2].GetNormalized3() * scale.GetSplatZ(); return *this; } inline Matrix& Matrix::SetScaleFast(float uniformScale) { SetScaleFast(Vector(uniformScale)); return *this; } inline Vector Matrix::RotateVector(const Vector& vector) const { Vector const X = vector.GetSplatX(); Vector const Y = vector.GetSplatY(); Vector const Z = vector.GetSplatZ(); Vector Result = Z * m_rows[2]; Result = Vector::MultiplyAdd(Y, m_rows[1], Result); Result = Vector::MultiplyAdd(X, m_rows[0], Result); return Result; } inline Vector Matrix::TransformNormal(const Vector& vector) const { return RotateVector(vector); } inline Vector Matrix::TransformPoint(const Vector& point) const { Vector const X = point.GetSplatX(); Vector const Y = point.GetSplatY(); Vector const Z = point.GetSplatZ(); Vector result = Vector::MultiplyAdd(Z, m_rows[2], m_rows[3]); result = Vector::MultiplyAdd(Y, m_rows[1], result); result = Vector::MultiplyAdd(X, m_rows[0], result); Vector const W = result.GetSplatW(); return result / W; } inline Vector Matrix::TransformVector3(const Vector& V) const { Vector const X = V.GetSplatX(); Vector const Y = V.GetSplatY(); Vector const Z = V.GetSplatZ(); Vector result = Vector::MultiplyAdd(Z, m_rows[2], m_rows[3]); result = Vector::MultiplyAdd(Y, m_rows[1], result); result = Vector::MultiplyAdd(X, m_rows[0], result); return result; } inline Vector Matrix::TransformVector4(const Vector& V) const { // Splat m_x,m_y,m_z and m_w Vector vTempX = V.GetSplatX(); Vector vTempY = V.GetSplatY(); Vector vTempZ = V.GetSplatZ(); Vector vTempW = V.GetSplatW(); // Mul by the matrix vTempX = _mm_mul_ps(vTempX, m_rows[0]); vTempY = _mm_mul_ps(vTempY, m_rows[1]); vTempZ = _mm_mul_ps(vTempZ, m_rows[2]); vTempW = _mm_mul_ps(vTempW, m_rows[3]); // Add them all together vTempX = _mm_add_ps(vTempX, vTempY); vTempZ = _mm_add_ps(vTempZ, vTempW); vTempX = _mm_add_ps(vTempX, vTempZ); return vTempX; } inline Vector& Matrix::operator[](uint32_t i) { ASSERT(i < 4); return m_rows[i]; } inline const Vector Matrix::operator[](uint32_t i) const { ASSERT(i < 4); return m_rows[i]; } inline Matrix Matrix::operator*(const Matrix& rhs) const { Matrix result = *this; result *= rhs; return result; } inline Matrix& Matrix::operator*= (const Matrix& rhs) { Vector vX, vY, vZ, vW; // Use vW to hold the original row vW = m_rows[0]; vX = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(0, 0, 0, 0)); vY = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(1, 1, 1, 1)); vZ = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(2, 2, 2, 2)); vW = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(3, 3, 3, 3)); vX = _mm_mul_ps(vX, rhs.m_rows[0]); vY = _mm_mul_ps(vY, rhs.m_rows[1]); vZ = _mm_mul_ps(vZ, rhs.m_rows[2]); vW = _mm_mul_ps(vW, rhs.m_rows[3]); vX = _mm_add_ps(vX, vZ); vY = _mm_add_ps(vY, vW); vX = _mm_add_ps(vX, vY); m_rows[0] = vX; // Repeat for the other 3 rows vW = m_rows[1]; vX = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(0, 0, 0, 0)); vY = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(1, 1, 1, 1)); vZ = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(2, 2, 2, 2)); vW = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(3, 3, 3, 3)); vX = _mm_mul_ps(vX, rhs.m_rows[0]); vY = _mm_mul_ps(vY, rhs.m_rows[1]); vZ = _mm_mul_ps(vZ, rhs.m_rows[2]); vW = _mm_mul_ps(vW, rhs.m_rows[3]); vX = _mm_add_ps(vX, vZ); vY = _mm_add_ps(vY, vW); vX = _mm_add_ps(vX, vY); m_rows[1] = vX; vW = m_rows[2]; vX = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(0, 0, 0, 0)); vY = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(1, 1, 1, 1)); vZ = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(2, 2, 2, 2)); vW = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(3, 3, 3, 3)); vX = _mm_mul_ps(vX, rhs.m_rows[0]); vY = _mm_mul_ps(vY, rhs.m_rows[1]); vZ = _mm_mul_ps(vZ, rhs.m_rows[2]); vW = _mm_mul_ps(vW, rhs.m_rows[3]); vX = _mm_add_ps(vX, vZ); vY = _mm_add_ps(vY, vW); vX = _mm_add_ps(vX, vY); m_rows[2] = vX; vW = m_rows[3]; vX = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(0, 0, 0, 0)); vY = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(1, 1, 1, 1)); vZ = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(2, 2, 2, 2)); vW = _mm_shuffle_ps(vW, vW, _MM_SHUFFLE(3, 3, 3, 3)); vX = _mm_mul_ps(vX, rhs.m_rows[0]); vY = _mm_mul_ps(vY, rhs.m_rows[1]); vZ = _mm_mul_ps(vZ, rhs.m_rows[2]); vW = _mm_mul_ps(vW, rhs.m_rows[3]); vX = _mm_add_ps(vX, vZ); vY = _mm_add_ps(vY, vW); vX = _mm_add_ps(vX, vY); m_rows[3] = vX; return *this; } inline Matrix Matrix::operator*(const Quaternion& rhs) const { return operator*(Matrix(rhs)); } inline Matrix Matrix::operator*=(const Quaternion& rhs) { return operator*=(Matrix(rhs)); } inline bool Matrix::operator==(const Matrix& rhs) const { for (auto i = 0; i < 4; i++) { for (auto j = 0; j < 4; j++) { if (m_values[i][j] != rhs.m_values[i][j]) { return false; } } } return true; } }