17 #ifndef BT_SIMD__QUATERNION_H_ 18 #define BT_SIMD__QUATERNION_H_ 25 #ifdef BT_USE_DOUBLE_PRECISION 26 #define btQuaternionData btQuaternionDoubleData 27 #define btQuaternionDataName "btQuaternionDoubleData" 29 #define btQuaternionData btQuaternionFloatData 30 #define btQuaternionDataName "btQuaternionFloatData" 31 #endif //BT_USE_DOUBLE_PRECISION 38 #define vOnes (_mm_set_ps(1.0f, 1.0f, 1.0f, 1.0f)) 42 #if defined(BT_USE_SSE) 44 #define vQInv (_mm_set_ps(+0.0f, -0.0f, -0.0f, -0.0f)) 45 #define vPPPM (_mm_set_ps(-0.0f, +0.0f, +0.0f, +0.0f)) 47 #elif defined(BT_USE_NEON) 60 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE))|| defined(BT_USE_NEON) 70 mVec128 = rhs.mVec128;
103 #ifndef BT_EULER_DEFAULT_ZYX 135 setValue(cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
136 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
137 sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
138 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
155 setValue(sinRoll * cosPitch * cosYaw - cosRoll * sinPitch * sinYaw,
156 cosRoll * sinPitch * cosYaw + sinRoll * cosPitch * sinYaw,
157 cosRoll * cosPitch * sinYaw - sinRoll * sinPitch * cosYaw,
158 cosRoll * cosPitch * cosYaw + sinRoll * sinPitch * sinYaw);
164 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 165 mVec128 = _mm_add_ps(mVec128, q.mVec128);
166 #elif defined(BT_USE_NEON) 167 mVec128 = vaddq_f32(mVec128, q.mVec128);
181 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 182 mVec128 = _mm_sub_ps(mVec128, q.mVec128);
183 #elif defined(BT_USE_NEON) 184 mVec128 = vsubq_f32(mVec128, q.mVec128);
198 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 199 __m128 vs = _mm_load_ss(&s);
200 vs = bt_pshufd_ps(vs, 0);
201 mVec128 = _mm_mul_ps(mVec128, vs);
202 #elif defined(BT_USE_NEON) 203 mVec128 = vmulq_n_f32(mVec128, s);
218 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 219 __m128 vQ2 = q.get128();
221 __m128 A1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(0,1,2,0));
222 __m128 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
226 __m128 A2 = bt_pshufd_ps(mVec128, BT_SHUFFLE(1,2,0,1));
227 __m128 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
231 B1 = bt_pshufd_ps(mVec128, BT_SHUFFLE(2,0,1,2));
232 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
236 mVec128 = bt_splat_ps(mVec128, 3);
237 mVec128 = mVec128 * vQ2;
240 mVec128 = mVec128 - B1;
241 A1 = _mm_xor_ps(A1, vPPPM);
242 mVec128 = mVec128+ A1;
244 #elif defined(BT_USE_NEON) 246 float32x4_t vQ1 = mVec128;
247 float32x4_t vQ2 = q.get128();
248 float32x4_t A0, A1, B1, A2, B2, A3, B3;
249 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
253 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );
256 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );
259 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
261 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
263 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
264 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
266 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);
267 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);
269 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
270 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
272 A3 = vcombine_f32(vQ1zx, vQ1yz);
273 B3 = vcombine_f32(vQ2yz, vQ2xz);
275 A1 = vmulq_f32(A1, B1);
276 A2 = vmulq_f32(A2, B2);
277 A3 = vmulq_f32(A3, B3);
278 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1);
280 A1 = vaddq_f32(A1, A2);
281 A0 = vsubq_f32(A0, A3);
284 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
285 A0 = vaddq_f32(A0, A1);
301 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 304 vd = _mm_mul_ps(mVec128, q.mVec128);
306 __m128 t = _mm_movehl_ps(vd, vd);
307 vd = _mm_add_ps(vd, t);
308 t = _mm_shuffle_ps(vd, vd, 0x55);
309 vd = _mm_add_ss(vd, t);
311 return _mm_cvtss_f32(vd);
312 #elif defined(BT_USE_NEON) 313 float32x4_t vd = vmulq_f32(mVec128, q.mVec128);
314 float32x2_t
x = vpadd_f32(vget_low_f32(vd), vget_high_f32(vd));
316 return vget_lane_f32(x, 0);
341 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 344 vd = _mm_mul_ps(mVec128, mVec128);
346 __m128 t = _mm_movehl_ps(vd, vd);
347 vd = _mm_add_ps(vd, t);
348 t = _mm_shuffle_ps(vd, vd, 0x55);
349 vd = _mm_add_ss(vd, t);
351 vd = _mm_sqrt_ss(vd);
352 vd = _mm_div_ss(vOnes, vd);
353 vd = bt_pshufd_ps(vd, 0);
354 mVec128 = _mm_mul_ps(mVec128, vd);
367 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 368 __m128 vs = _mm_load_ss(&s);
369 vs = bt_pshufd_ps(vs, 0x00);
372 #elif defined(BT_USE_NEON) 448 return btVector3(m_floats[0] * s, m_floats[1] * s, m_floats[2] * s);
454 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 456 #elif defined(BT_USE_NEON) 457 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)vQInv));
468 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 470 #elif defined(BT_USE_NEON) 483 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 485 #elif defined(BT_USE_NEON) 497 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 499 #elif defined(BT_USE_NEON) 500 return btQuaternion((btSimdFloat4)veorq_s32((int32x4_t)mVec128, (int32x4_t)btvMzeroMask) );
512 if( diff.
dot(diff) > sum.
dot(sum) )
523 if( diff.
dot(diff) < sum.
dot(sum) )
595 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 596 __m128 vQ1 = q1.get128();
597 __m128 vQ2 = q2.get128();
598 __m128 A0, A1, B1, A2, B2;
600 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0));
601 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
605 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
606 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
610 B1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
611 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
615 A0 = bt_splat_ps(vQ1, 3);
621 A1 = _mm_xor_ps(A1, vPPPM);
626 #elif defined(BT_USE_NEON) 628 float32x4_t vQ1 = q1.get128();
629 float32x4_t vQ2 = q2.get128();
630 float32x4_t A0, A1, B1, A2, B2, A3, B3;
631 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
635 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );
638 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );
641 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
643 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
645 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
646 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
648 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);
649 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);
651 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
652 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
654 A3 = vcombine_f32(vQ1zx, vQ1yz);
655 B3 = vcombine_f32(vQ2yz, vQ2xz);
657 A1 = vmulq_f32(A1, B1);
658 A2 = vmulq_f32(A2, B2);
659 A3 = vmulq_f32(A3, B3);
660 A0 = vmulq_lane_f32(vQ2, vget_high_f32(vQ1), 1);
662 A1 = vaddq_f32(A1, A2);
663 A0 = vsubq_f32(A0, A3);
666 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
667 A0 = vaddq_f32(A0, A1);
673 q1.
w() * q2.
x() + q1.
x() * q2.
w() + q1.
y() * q2.
z() - q1.
z() * q2.
y(),
674 q1.
w() * q2.
y() + q1.
y() * q2.
w() + q1.
z() * q2.
x() - q1.
x() * q2.
z(),
675 q1.
w() * q2.
z() + q1.
z() * q2.
w() + q1.
x() * q2.
y() - q1.
y() * q2.
x(),
676 q1.
w() * q2.
w() - q1.
x() * q2.
x() - q1.
y() * q2.
y() - q1.
z() * q2.
z());
683 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 684 __m128 vQ1 = q.get128();
685 __m128 vQ2 = w.get128();
686 __m128 A1, B1, A2, B2, A3, B3;
688 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(3,3,3,0));
689 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(0,1,2,0));
693 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
694 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
698 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
699 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
704 A1 = _mm_xor_ps(A1, vPPPM);
709 #elif defined(BT_USE_NEON) 711 float32x4_t vQ1 = q.get128();
712 float32x4_t vQ2 = w.get128();
713 float32x4_t A1, B1, A2, B2, A3, B3;
714 float32x2_t vQ1wx, vQ2zx, vQ1yz, vQ2yz, vQ1zx, vQ2xz;
716 vQ1wx = vext_f32(vget_high_f32(vQ1), vget_low_f32(vQ1), 1);
720 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );
723 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );
727 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
729 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
730 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
732 A1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ1), 1), vQ1wx);
733 B1 = vcombine_f32(vget_low_f32(vQ2), vQ2zx);
735 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
736 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
738 A3 = vcombine_f32(vQ1zx, vQ1yz);
739 B3 = vcombine_f32(vQ2yz, vQ2xz);
741 A1 = vmulq_f32(A1, B1);
742 A2 = vmulq_f32(A2, B2);
743 A3 = vmulq_f32(A3, B3);
745 A1 = vaddq_f32(A1, A2);
748 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
750 A1 = vsubq_f32(A1, A3);
756 q.
w() * w.
x() + q.
y() * w.
z() - q.
z() * w.
y(),
757 q.
w() * w.
y() + q.
z() * w.
x() - q.
x() * w.
z(),
758 q.
w() * w.
z() + q.
x() * w.
y() - q.
y() * w.
x(),
759 -q.
x() * w.
x() - q.
y() * w.
y() - q.
z() * w.
z());
766 #if defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 767 __m128 vQ1 = w.get128();
768 __m128 vQ2 = q.get128();
769 __m128 A1, B1, A2, B2, A3, B3;
771 A1 = bt_pshufd_ps(vQ1, BT_SHUFFLE(0,1,2,0));
772 B1 = bt_pshufd_ps(vQ2, BT_SHUFFLE(3,3,3,0));
776 A2 = bt_pshufd_ps(vQ1, BT_SHUFFLE(1,2,0,1));
777 B2 = bt_pshufd_ps(vQ2, BT_SHUFFLE(2,0,1,1));
781 A3 = bt_pshufd_ps(vQ1, BT_SHUFFLE(2,0,1,2));
782 B3 = bt_pshufd_ps(vQ2, BT_SHUFFLE(1,2,0,2));
787 A1 = _mm_xor_ps(A1, vPPPM);
792 #elif defined(BT_USE_NEON) 794 float32x4_t vQ1 = w.get128();
795 float32x4_t vQ2 = q.get128();
796 float32x4_t A1, B1, A2, B2, A3, B3;
797 float32x2_t vQ1zx, vQ2wx, vQ1yz, vQ2zx, vQ2yz, vQ2xz;
802 tmp = vtrn_f32( vget_high_f32(vQ1), vget_low_f32(vQ1) );
805 tmp = vtrn_f32( vget_high_f32(vQ2), vget_low_f32(vQ2) );
808 vQ2wx = vext_f32(vget_high_f32(vQ2), vget_low_f32(vQ2), 1);
810 vQ1yz = vext_f32(vget_low_f32(vQ1), vget_high_f32(vQ1), 1);
812 vQ2yz = vext_f32(vget_low_f32(vQ2), vget_high_f32(vQ2), 1);
813 vQ2xz = vext_f32(vQ2zx, vQ2zx, 1);
815 A1 = vcombine_f32(vget_low_f32(vQ1), vQ1zx);
816 B1 = vcombine_f32(vdup_lane_f32(vget_high_f32(vQ2), 1), vQ2wx);
818 A2 = vcombine_f32(vQ1yz, vget_low_f32(vQ1));
819 B2 = vcombine_f32(vQ2zx, vdup_lane_f32(vget_low_f32(vQ2), 1));
821 A3 = vcombine_f32(vQ1zx, vQ1yz);
822 B3 = vcombine_f32(vQ2yz, vQ2xz);
824 A1 = vmulq_f32(A1, B1);
825 A2 = vmulq_f32(A2, B2);
826 A3 = vmulq_f32(A3, B3);
828 A1 = vaddq_f32(A1, A2);
831 A1 = (btSimdFloat4)veorq_s32((int32x4_t)A1, (int32x4_t)vPPPM);
833 A1 = vsubq_f32(A1, A3);
839 +w.
x() * q.
w() + w.
y() * q.
z() - w.
z() * q.
y(),
840 +w.
y() * q.
w() + w.
z() * q.
x() - w.
x() * q.
z(),
841 +w.
z() * q.
w() + w.
x() * q.
y() - w.
y() * q.
x(),
842 -w.
x() * q.
x() - w.
y() * q.
y() - w.
z() * q.
z());
883 return q1.
slerp(q2, t);
891 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE) 892 return btVector3(_mm_and_ps(q.get128(), btvFFF0fMask));
893 #elif defined(BT_USE_NEON) 894 return btVector3((float32x4_t)vandq_s32((int32x4_t)q.get128(), btvFFF0Mask));
944 for (
int i=0;i<4;i++)
950 for (
int i=0;i<4;i++)
958 for (
int i=0;i<4;i++)
964 for (
int i=0;i<4;i++)
972 for (
int i=0;i<4;i++)
978 for (
int i=0;i<4;i++)
983 #endif //BT_SIMD__QUATERNION_H_ static T sum(const btAlignedObjectArray< T > &items)
const btScalar & x() const
Return the x value.
btScalar length() const
Return the length of the quaternion.
btScalar getAngleShortestPath() const
Return the angle [0, Pi] of rotation represented by this quaternion along the shortest path...
btQuaternion operator-() const
Return the negative of this quaternion This simply negates each element.
const btScalar & getZ() const
Return the z value.
btQuaternion & operator*=(const btQuaternion &q)
Multiply this quaternion by q on the right.
void serializeDouble(struct btQuaternionDoubleData &dataOut) const
btQuaternion(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Constructor from Euler angles.
void setEulerZYX(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the quaternion using euler angles.
btScalar btSin(btScalar x)
static const btQuaternion & getIdentity()
void deSerializeDouble(const struct btQuaternionDoubleData &dataIn)
void deSerializeFloat(const struct btQuaternionFloatData &dataIn)
void btPlaneSpace1(const T &n, T &p, T &q)
btScalar btSqrt(btScalar y)
#define SIMD_FORCE_INLINE
const btScalar & getY() const
Return the y value.
btQuaternion & operator/=(const btScalar &s)
Inversely scale this quaternion.
btQuaternion & operator-=(const btQuaternion &q)
Subtract out a quaternion.
btScalar dot(const btQuaternion &q) const
Return the dot product between this quaternion and another.
btQuaternion operator/(const btScalar &s) const
Return an inversely scaled versionof this quaternion.
btQuaternion operator*(const btScalar &s) const
Return a scaled version of this quaternion.
btQuaternion & operator+=(const btQuaternion &q)
Add two quaternions.
const btScalar & getW() const
btVector3 & normalize()
Normalize this vector x^2 + y^2 + z^2 = 1.
btVector3 quatRotate(const btQuaternion &rotation, const btVector3 &v)
btQuaternion inverse() const
Return the inverse of this quaternion.
const btScalar & getZ() const
Return the z value.
btQuaternion operator-(const btQuaternion &q2) const
Return the difference between this quaternion and the other.
btQuaternion farthest(const btQuaternion &qd) const
btQuaternion shortestArcQuat(const btVector3 &v0, const btVector3 &v1)
const btScalar & x() const
Return the x value.
void setRotation(const btVector3 &axis, const btScalar &_angle)
Set the rotation using axis angle notation.
void serializeFloat(struct btQuaternionFloatData &dataOut) const
btQuaternion & normalize()
Normalize the quaternion Such that x^2 + y^2 + z^2 +w^2 = 1.
btQuaternion shortestArcQuatNormalize2(btVector3 &v0, btVector3 &v1)
btVector3 cross(const btVector3 &v) const
Return the cross product between this and another vector.
btScalar dot(const btVector3 &v) const
Return the dot product.
const btScalar & getY() const
Return the y value.
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
btQuaternion()
No initialization constructor.
const btScalar & y() const
Return the y value.
The btQuadWord class is base class for btVector3 and btQuaternion.
const btScalar & z() const
Return the z value.
btQuaternion normalized() const
Return a normalized version of this quaternion.
const btScalar & z() const
Return the z value.
btVector3 getAxis() const
Return the axis of the rotation represented by this quaternion.
void deSerialize(const struct btQuaternionData &dataIn)
btVector3 can be used to represent 3D points and vectors.
#define ATTRIBUTE_ALIGNED16(a)
btQuaternion(const btScalar &_x, const btScalar &_y, const btScalar &_z, const btScalar &_w)
Constructor from scalars.
btScalar btAcos(btScalar x)
btScalar getAngle() const
Return the angle [0, 2Pi] of rotation represented by this quaternion.
btQuaternion nearest(const btQuaternion &qd) const
btQuaternion & operator*=(const btScalar &s)
Scale this quaternion.
btScalar angle(const btQuaternion &q) const
Return the half angle between this quaternion and the other.
btScalar length2() const
Return the length squared of the quaternion.
const btScalar & y() const
Return the y value.
btScalar angleShortestPath(const btQuaternion &q) const
Return the angle between this quaternion and the other along the shortest path.
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
void setEuler(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the quaternion using Euler angles.
btQuaternion slerp(const btQuaternion &q, const btScalar &t) const
Return the quaternion which is the result of Spherical Linear Interpolation between this and the othe...
const btScalar & getX() const
Return the x value.
btScalar btAngle(const btQuaternion &q1, const btQuaternion &q2)
Return the angle between two quaternions.
const btScalar & w() const
Return the w value.
void serialize(struct btQuaternionData &dataOut) const
btQuaternion operator+(const btQuaternion &q2) const
Return the sum of this quaternion and the other.
btQuaternion(const btVector3 &_axis, const btScalar &_angle)
Axis angle Constructor.
const btScalar & getX() const
Return the x value.
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
btScalar btCos(btScalar x)
btScalar length() const
Return the length of the vector.
btScalar btFabs(btScalar x)