Bullet Collision Detection & Physics Library
btMatrix3x3.h
Go to the documentation of this file.
1 /*
2 Copyright (c) 2003-2006 Gino van den Bergen / Erwin Coumans http://continuousphysics.com/Bullet/
3 
4 This software is provided 'as-is', without any express or implied warranty.
5 In no event will the authors be held liable for any damages arising from the use of this software.
6 Permission is granted to anyone to use this software for any purpose,
7 including commercial applications, and to alter it and redistribute it freely,
8 subject to the following restrictions:
9 
10 1. The origin of this software must not be misrepresented; you must not claim that you wrote the original software. If you use this software in a product, an acknowledgment in the product documentation would be appreciated but is not required.
11 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software.
12 3. This notice may not be removed or altered from any source distribution.
13 */
14 
15 
16 #ifndef BT_MATRIX3x3_H
17 #define BT_MATRIX3x3_H
18 
19 #include "btVector3.h"
20 #include "btQuaternion.h"
21 #include <stdio.h>
22 
23 #ifdef BT_USE_SSE
24 //const __m128 ATTRIBUTE_ALIGNED16(v2220) = {2.0f, 2.0f, 2.0f, 0.0f};
25 //const __m128 ATTRIBUTE_ALIGNED16(vMPPP) = {-0.0f, +0.0f, +0.0f, +0.0f};
26 #define vMPPP (_mm_set_ps (+0.0f, +0.0f, +0.0f, -0.0f))
27 #endif
28 
29 #if defined(BT_USE_SSE)
30 #define v1000 (_mm_set_ps(0.0f,0.0f,0.0f,1.0f))
31 #define v0100 (_mm_set_ps(0.0f,0.0f,1.0f,0.0f))
32 #define v0010 (_mm_set_ps(0.0f,1.0f,0.0f,0.0f))
33 #elif defined(BT_USE_NEON)
34 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v1000) = {1.0f, 0.0f, 0.0f, 0.0f};
35 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0100) = {0.0f, 1.0f, 0.0f, 0.0f};
36 const btSimdFloat4 ATTRIBUTE_ALIGNED16(v0010) = {0.0f, 0.0f, 1.0f, 0.0f};
37 #endif
38 
39 #ifdef BT_USE_DOUBLE_PRECISION
40 #define btMatrix3x3Data btMatrix3x3DoubleData
41 #else
42 #define btMatrix3x3Data btMatrix3x3FloatData
43 #endif //BT_USE_DOUBLE_PRECISION
44 
45 
49 
51  btVector3 m_el[3];
52 
53 public:
56 
57  // explicit btMatrix3x3(const btScalar *m) { setFromOpenGLSubMatrix(m); }
58 
60  explicit btMatrix3x3(const btQuaternion& q) { setRotation(q); }
61  /*
62  template <typename btScalar>
63  Matrix3x3(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
64  {
65  setEulerYPR(yaw, pitch, roll);
66  }
67  */
69  btMatrix3x3(const btScalar& xx, const btScalar& xy, const btScalar& xz,
70  const btScalar& yx, const btScalar& yy, const btScalar& yz,
71  const btScalar& zx, const btScalar& zy, const btScalar& zz)
72  {
73  setValue(xx, xy, xz,
74  yx, yy, yz,
75  zx, zy, zz);
76  }
77 
78 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
79  SIMD_FORCE_INLINE btMatrix3x3 (const btSimdFloat4 v0, const btSimdFloat4 v1, const btSimdFloat4 v2 )
80  {
81  m_el[0].mVec128 = v0;
82  m_el[1].mVec128 = v1;
83  m_el[2].mVec128 = v2;
84  }
85 
86  SIMD_FORCE_INLINE btMatrix3x3 (const btVector3& v0, const btVector3& v1, const btVector3& v2 )
87  {
88  m_el[0] = v0;
89  m_el[1] = v1;
90  m_el[2] = v2;
91  }
92 
93  // Copy constructor
95  {
96  m_el[0].mVec128 = rhs.m_el[0].mVec128;
97  m_el[1].mVec128 = rhs.m_el[1].mVec128;
98  m_el[2].mVec128 = rhs.m_el[2].mVec128;
99  }
100 
101  // Assignment Operator
102  SIMD_FORCE_INLINE btMatrix3x3& operator=(const btMatrix3x3& m)
103  {
104  m_el[0].mVec128 = m.m_el[0].mVec128;
105  m_el[1].mVec128 = m.m_el[1].mVec128;
106  m_el[2].mVec128 = m.m_el[2].mVec128;
107 
108  return *this;
109  }
110 
111 #else
112 
115  {
116  m_el[0] = other.m_el[0];
117  m_el[1] = other.m_el[1];
118  m_el[2] = other.m_el[2];
119  }
120 
123  {
124  m_el[0] = other.m_el[0];
125  m_el[1] = other.m_el[1];
126  m_el[2] = other.m_el[2];
127  return *this;
128  }
129 
130 #endif
131 
135  {
136  return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
137  }
138 
139 
142  SIMD_FORCE_INLINE const btVector3& getRow(int i) const
143  {
144  btFullAssert(0 <= i && i < 3);
145  return m_el[i];
146  }
147 
151  {
152  btFullAssert(0 <= i && i < 3);
153  return m_el[i];
154  }
155 
159  {
160  btFullAssert(0 <= i && i < 3);
161  return m_el[i];
162  }
163 
167  btMatrix3x3& operator*=(const btMatrix3x3& m);
168 
172  btMatrix3x3& operator+=(const btMatrix3x3& m);
173 
177  btMatrix3x3& operator-=(const btMatrix3x3& m);
178 
182  {
183  m_el[0].setValue(m[0],m[4],m[8]);
184  m_el[1].setValue(m[1],m[5],m[9]);
185  m_el[2].setValue(m[2],m[6],m[10]);
186 
187  }
198  void setValue(const btScalar& xx, const btScalar& xy, const btScalar& xz,
199  const btScalar& yx, const btScalar& yy, const btScalar& yz,
200  const btScalar& zx, const btScalar& zy, const btScalar& zz)
201  {
202  m_el[0].setValue(xx,xy,xz);
203  m_el[1].setValue(yx,yy,yz);
204  m_el[2].setValue(zx,zy,zz);
205  }
206 
209  void setRotation(const btQuaternion& q)
210  {
211  btScalar d = q.length2();
212  btFullAssert(d != btScalar(0.0));
213  btScalar s = btScalar(2.0) / d;
214 
215  #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
216  __m128 vs, Q = q.get128();
217  __m128i Qi = btCastfTo128i(Q);
218  __m128 Y, Z;
219  __m128 V1, V2, V3;
220  __m128 V11, V21, V31;
221  __m128 NQ = _mm_xor_ps(Q, btvMzeroMask);
222  __m128i NQi = btCastfTo128i(NQ);
223 
224  V1 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,2,3))); // Y X Z W
225  V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0,0,1,3)); // -X -X Y W
226  V3 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(2,1,0,3))); // Z Y X W
227  V1 = _mm_xor_ps(V1, vMPPP); // change the sign of the first element
228 
229  V11 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,1,0,3))); // Y Y X W
230  V21 = _mm_unpackhi_ps(Q, Q); // Z Z W W
231  V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0,2,0,3)); // X Z -X -W
232 
233  V2 = V2 * V1; //
234  V1 = V1 * V11; //
235  V3 = V3 * V31; //
236 
237  V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2,3,1,3)); // -Z -W Y W
238  V11 = V11 * V21; //
239  V21 = _mm_xor_ps(V21, vMPPP); // change the sign of the first element
240  V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3,3,1,3)); // W W -Y -W
241  V31 = _mm_xor_ps(V31, vMPPP); // change the sign of the first element
242  Y = btCastiTo128f(_mm_shuffle_epi32 (NQi, BT_SHUFFLE(3,2,0,3))); // -W -Z -X -W
243  Z = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,1,3))); // Y X Y W
244 
245  vs = _mm_load_ss(&s);
246  V21 = V21 * Y;
247  V31 = V31 * Z;
248 
249  V1 = V1 + V11;
250  V2 = V2 + V21;
251  V3 = V3 + V31;
252 
253  vs = bt_splat3_ps(vs, 0);
254  // s ready
255  V1 = V1 * vs;
256  V2 = V2 * vs;
257  V3 = V3 * vs;
258 
259  V1 = V1 + v1000;
260  V2 = V2 + v0100;
261  V3 = V3 + v0010;
262 
263  m_el[0] = V1;
264  m_el[1] = V2;
265  m_el[2] = V3;
266  #else
267  btScalar xs = q.x() * s, ys = q.y() * s, zs = q.z() * s;
268  btScalar wx = q.w() * xs, wy = q.w() * ys, wz = q.w() * zs;
269  btScalar xx = q.x() * xs, xy = q.x() * ys, xz = q.x() * zs;
270  btScalar yy = q.y() * ys, yz = q.y() * zs, zz = q.z() * zs;
271  setValue(
272  btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
273  xy + wz, btScalar(1.0) - (xx + zz), yz - wx,
274  xz - wy, yz + wx, btScalar(1.0) - (xx + yy));
275  #endif
276  }
277 
278 
284  void setEulerYPR(const btScalar& yaw, const btScalar& pitch, const btScalar& roll)
285  {
286  setEulerZYX(roll, pitch, yaw);
287  }
288 
298  void setEulerZYX(btScalar eulerX,btScalar eulerY,btScalar eulerZ) {
300  btScalar ci ( btCos(eulerX));
301  btScalar cj ( btCos(eulerY));
302  btScalar ch ( btCos(eulerZ));
303  btScalar si ( btSin(eulerX));
304  btScalar sj ( btSin(eulerY));
305  btScalar sh ( btSin(eulerZ));
306  btScalar cc = ci * ch;
307  btScalar cs = ci * sh;
308  btScalar sc = si * ch;
309  btScalar ss = si * sh;
310 
311  setValue(cj * ch, sj * sc - cs, sj * cc + ss,
312  cj * sh, sj * ss + cc, sj * cs - sc,
313  -sj, cj * si, cj * ci);
314  }
315 
317  void setIdentity()
318  {
319 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
320  m_el[0] = v1000;
321  m_el[1] = v0100;
322  m_el[2] = v0010;
323 #else
324  setValue(btScalar(1.0), btScalar(0.0), btScalar(0.0),
325  btScalar(0.0), btScalar(1.0), btScalar(0.0),
326  btScalar(0.0), btScalar(0.0), btScalar(1.0));
327 #endif
328  }
329 
330  static const btMatrix3x3& getIdentity()
331  {
332 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)
333  static const btMatrix3x3
334  identityMatrix(v1000, v0100, v0010);
335 #else
336  static const btMatrix3x3
337  identityMatrix(
338  btScalar(1.0), btScalar(0.0), btScalar(0.0),
339  btScalar(0.0), btScalar(1.0), btScalar(0.0),
340  btScalar(0.0), btScalar(0.0), btScalar(1.0));
341 #endif
342  return identityMatrix;
343  }
344 
347  void getOpenGLSubMatrix(btScalar *m) const
348  {
349 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
350  __m128 v0 = m_el[0].mVec128;
351  __m128 v1 = m_el[1].mVec128;
352  __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
353  __m128 *vm = (__m128 *)m;
354  __m128 vT;
355 
356  v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
357 
358  vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
359  v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
360 
361  v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) ); // y0 y1 y2 0
362  v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) ); // x0 x1 x2 0
363  v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
364 
365  vm[0] = v0;
366  vm[1] = v1;
367  vm[2] = v2;
368 #elif defined(BT_USE_NEON)
369  // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
370  static const uint32x2_t zMask = (const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
371  float32x4_t *vm = (float32x4_t *)m;
372  float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
373  float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
374  float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
375  float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
376  float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
377  float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q ); // z0 z1 z2 0
378 
379  vm[0] = v0;
380  vm[1] = v1;
381  vm[2] = v2;
382 #else
383  m[0] = btScalar(m_el[0].x());
384  m[1] = btScalar(m_el[1].x());
385  m[2] = btScalar(m_el[2].x());
386  m[3] = btScalar(0.0);
387  m[4] = btScalar(m_el[0].y());
388  m[5] = btScalar(m_el[1].y());
389  m[6] = btScalar(m_el[2].y());
390  m[7] = btScalar(0.0);
391  m[8] = btScalar(m_el[0].z());
392  m[9] = btScalar(m_el[1].z());
393  m[10] = btScalar(m_el[2].z());
394  m[11] = btScalar(0.0);
395 #endif
396  }
397 
400  void getRotation(btQuaternion& q) const
401  {
402 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
403  btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
404  btScalar s, x;
405 
406  union {
407  btSimdFloat4 vec;
408  btScalar f[4];
409  } temp;
410 
411  if (trace > btScalar(0.0))
412  {
413  x = trace + btScalar(1.0);
414 
415  temp.f[0]=m_el[2].y() - m_el[1].z();
416  temp.f[1]=m_el[0].z() - m_el[2].x();
417  temp.f[2]=m_el[1].x() - m_el[0].y();
418  temp.f[3]=x;
419  //temp.f[3]= s * btScalar(0.5);
420  }
421  else
422  {
423  int i, j, k;
424  if(m_el[0].x() < m_el[1].y())
425  {
426  if( m_el[1].y() < m_el[2].z() )
427  { i = 2; j = 0; k = 1; }
428  else
429  { i = 1; j = 2; k = 0; }
430  }
431  else
432  {
433  if( m_el[0].x() < m_el[2].z())
434  { i = 2; j = 0; k = 1; }
435  else
436  { i = 0; j = 1; k = 2; }
437  }
438 
439  x = m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0);
440 
441  temp.f[3] = (m_el[k][j] - m_el[j][k]);
442  temp.f[j] = (m_el[j][i] + m_el[i][j]);
443  temp.f[k] = (m_el[k][i] + m_el[i][k]);
444  temp.f[i] = x;
445  //temp.f[i] = s * btScalar(0.5);
446  }
447 
448  s = btSqrt(x);
449  q.set128(temp.vec);
450  s = btScalar(0.5) / s;
451 
452  q *= s;
453 #else
454  btScalar trace = m_el[0].x() + m_el[1].y() + m_el[2].z();
455 
456  btScalar temp[4];
457 
458  if (trace > btScalar(0.0))
459  {
460  btScalar s = btSqrt(trace + btScalar(1.0));
461  temp[3]=(s * btScalar(0.5));
462  s = btScalar(0.5) / s;
463 
464  temp[0]=((m_el[2].y() - m_el[1].z()) * s);
465  temp[1]=((m_el[0].z() - m_el[2].x()) * s);
466  temp[2]=((m_el[1].x() - m_el[0].y()) * s);
467  }
468  else
469  {
470  int i = m_el[0].x() < m_el[1].y() ?
471  (m_el[1].y() < m_el[2].z() ? 2 : 1) :
472  (m_el[0].x() < m_el[2].z() ? 2 : 0);
473  int j = (i + 1) % 3;
474  int k = (i + 2) % 3;
475 
476  btScalar s = btSqrt(m_el[i][i] - m_el[j][j] - m_el[k][k] + btScalar(1.0));
477  temp[i] = s * btScalar(0.5);
478  s = btScalar(0.5) / s;
479 
480  temp[3] = (m_el[k][j] - m_el[j][k]) * s;
481  temp[j] = (m_el[j][i] + m_el[i][j]) * s;
482  temp[k] = (m_el[k][i] + m_el[i][k]) * s;
483  }
484  q.setValue(temp[0],temp[1],temp[2],temp[3]);
485 #endif
486  }
487 
492  void getEulerYPR(btScalar& yaw, btScalar& pitch, btScalar& roll) const
493  {
494 
495  // first use the normal calculus
496  yaw = btScalar(btAtan2(m_el[1].x(), m_el[0].x()));
497  pitch = btScalar(btAsin(-m_el[2].x()));
498  roll = btScalar(btAtan2(m_el[2].y(), m_el[2].z()));
499 
500  // on pitch = +/-HalfPI
501  if (btFabs(pitch)==SIMD_HALF_PI)
502  {
503  if (yaw>0)
504  yaw-=SIMD_PI;
505  else
506  yaw+=SIMD_PI;
507 
508  if (roll>0)
509  roll-=SIMD_PI;
510  else
511  roll+=SIMD_PI;
512  }
513  };
514 
515 
521  void getEulerZYX(btScalar& yaw, btScalar& pitch, btScalar& roll, unsigned int solution_number = 1) const
522  {
523  struct Euler
524  {
525  btScalar yaw;
526  btScalar pitch;
527  btScalar roll;
528  };
529 
530  Euler euler_out;
531  Euler euler_out2; //second solution
532  //get the pointer to the raw data
533 
534  // Check that pitch is not at a singularity
535  if (btFabs(m_el[2].x()) >= 1)
536  {
537  euler_out.yaw = 0;
538  euler_out2.yaw = 0;
539 
540  // From difference of angles formula
541  btScalar delta = btAtan2(m_el[0].x(),m_el[0].z());
542  if (m_el[2].x() > 0) //gimbal locked up
543  {
544  euler_out.pitch = SIMD_PI / btScalar(2.0);
545  euler_out2.pitch = SIMD_PI / btScalar(2.0);
546  euler_out.roll = euler_out.pitch + delta;
547  euler_out2.roll = euler_out.pitch + delta;
548  }
549  else // gimbal locked down
550  {
551  euler_out.pitch = -SIMD_PI / btScalar(2.0);
552  euler_out2.pitch = -SIMD_PI / btScalar(2.0);
553  euler_out.roll = -euler_out.pitch + delta;
554  euler_out2.roll = -euler_out.pitch + delta;
555  }
556  }
557  else
558  {
559  euler_out.pitch = - btAsin(m_el[2].x());
560  euler_out2.pitch = SIMD_PI - euler_out.pitch;
561 
562  euler_out.roll = btAtan2(m_el[2].y()/btCos(euler_out.pitch),
563  m_el[2].z()/btCos(euler_out.pitch));
564  euler_out2.roll = btAtan2(m_el[2].y()/btCos(euler_out2.pitch),
565  m_el[2].z()/btCos(euler_out2.pitch));
566 
567  euler_out.yaw = btAtan2(m_el[1].x()/btCos(euler_out.pitch),
568  m_el[0].x()/btCos(euler_out.pitch));
569  euler_out2.yaw = btAtan2(m_el[1].x()/btCos(euler_out2.pitch),
570  m_el[0].x()/btCos(euler_out2.pitch));
571  }
572 
573  if (solution_number == 1)
574  {
575  yaw = euler_out.yaw;
576  pitch = euler_out.pitch;
577  roll = euler_out.roll;
578  }
579  else
580  {
581  yaw = euler_out2.yaw;
582  pitch = euler_out2.pitch;
583  roll = euler_out2.roll;
584  }
585  }
586 
590  btMatrix3x3 scaled(const btVector3& s) const
591  {
592 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
593  return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
594 #else
595  return btMatrix3x3(
596  m_el[0].x() * s.x(), m_el[0].y() * s.y(), m_el[0].z() * s.z(),
597  m_el[1].x() * s.x(), m_el[1].y() * s.y(), m_el[1].z() * s.z(),
598  m_el[2].x() * s.x(), m_el[2].y() * s.y(), m_el[2].z() * s.z());
599 #endif
600  }
601 
603  btScalar determinant() const;
605  btMatrix3x3 adjoint() const;
607  btMatrix3x3 absolute() const;
609  btMatrix3x3 transpose() const;
611  btMatrix3x3 inverse() const;
612 
616  btVector3 solve33(const btVector3& b) const
617  {
618  btVector3 col1 = getColumn(0);
619  btVector3 col2 = getColumn(1);
620  btVector3 col3 = getColumn(2);
621 
622  btScalar det = btDot(col1, btCross(col2, col3));
623  if (btFabs(det)>SIMD_EPSILON)
624  {
625  det = 1.0f / det;
626  }
627  btVector3 x;
628  x[0] = det * btDot(b, btCross(col2, col3));
629  x[1] = det * btDot(col1, btCross(b, col3));
630  x[2] = det * btDot(col1, btCross(col2, b));
631  return x;
632  }
633 
634  btMatrix3x3 transposeTimes(const btMatrix3x3& m) const;
635  btMatrix3x3 timesTranspose(const btMatrix3x3& m) const;
636 
638  {
639  return m_el[0].x() * v.x() + m_el[1].x() * v.y() + m_el[2].x() * v.z();
640  }
642  {
643  return m_el[0].y() * v.x() + m_el[1].y() * v.y() + m_el[2].y() * v.z();
644  }
646  {
647  return m_el[0].z() * v.x() + m_el[1].z() * v.y() + m_el[2].z() * v.z();
648  }
649 
650 
660  void diagonalize(btMatrix3x3& rot, btScalar threshold, int maxSteps)
661  {
662  rot.setIdentity();
663  for (int step = maxSteps; step > 0; step--)
664  {
665  // find off-diagonal element [p][q] with largest magnitude
666  int p = 0;
667  int q = 1;
668  int r = 2;
669  btScalar max = btFabs(m_el[0][1]);
670  btScalar v = btFabs(m_el[0][2]);
671  if (v > max)
672  {
673  q = 2;
674  r = 1;
675  max = v;
676  }
677  v = btFabs(m_el[1][2]);
678  if (v > max)
679  {
680  p = 1;
681  q = 2;
682  r = 0;
683  max = v;
684  }
685 
686  btScalar t = threshold * (btFabs(m_el[0][0]) + btFabs(m_el[1][1]) + btFabs(m_el[2][2]));
687  if (max <= t)
688  {
689  if (max <= SIMD_EPSILON * t)
690  {
691  return;
692  }
693  step = 1;
694  }
695 
696  // compute Jacobi rotation J which leads to a zero for element [p][q]
697  btScalar mpq = m_el[p][q];
698  btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
699  btScalar theta2 = theta * theta;
700  btScalar cos;
701  btScalar sin;
702  if (theta2 * theta2 < btScalar(10 / SIMD_EPSILON))
703  {
704  t = (theta >= 0) ? 1 / (theta + btSqrt(1 + theta2))
705  : 1 / (theta - btSqrt(1 + theta2));
706  cos = 1 / btSqrt(1 + t * t);
707  sin = cos * t;
708  }
709  else
710  {
711  // approximation for large theta-value, i.e., a nearly diagonal matrix
712  t = 1 / (theta * (2 + btScalar(0.5) / theta2));
713  cos = 1 - btScalar(0.5) * t * t;
714  sin = cos * t;
715  }
716 
717  // apply rotation to matrix (this = J^T * this * J)
718  m_el[p][q] = m_el[q][p] = 0;
719  m_el[p][p] -= t * mpq;
720  m_el[q][q] += t * mpq;
721  btScalar mrp = m_el[r][p];
722  btScalar mrq = m_el[r][q];
723  m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
724  m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
725 
726  // apply rotation to rot (rot = rot * J)
727  for (int i = 0; i < 3; i++)
728  {
729  btVector3& row = rot[i];
730  mrp = row[p];
731  mrq = row[q];
732  row[p] = cos * mrp - sin * mrq;
733  row[q] = cos * mrq + sin * mrp;
734  }
735  }
736  }
737 
738 
739 
740 
748  btScalar cofac(int r1, int c1, int r2, int c2) const
749  {
750  return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
751  }
752 
753  void serialize(struct btMatrix3x3Data& dataOut) const;
754 
755  void serializeFloat(struct btMatrix3x3FloatData& dataOut) const;
756 
757  void deSerialize(const struct btMatrix3x3Data& dataIn);
758 
759  void deSerializeFloat(const struct btMatrix3x3FloatData& dataIn);
760 
761  void deSerializeDouble(const struct btMatrix3x3DoubleData& dataIn);
762 
763 };
764 
765 
768 {
769 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)
770  __m128 rv00, rv01, rv02;
771  __m128 rv10, rv11, rv12;
772  __m128 rv20, rv21, rv22;
773  __m128 mv0, mv1, mv2;
774 
775  rv02 = m_el[0].mVec128;
776  rv12 = m_el[1].mVec128;
777  rv22 = m_el[2].mVec128;
778 
779  mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask);
780  mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask);
781  mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask);
782 
783  // rv0
784  rv00 = bt_splat_ps(rv02, 0);
785  rv01 = bt_splat_ps(rv02, 1);
786  rv02 = bt_splat_ps(rv02, 2);
787 
788  rv00 = _mm_mul_ps(rv00, mv0);
789  rv01 = _mm_mul_ps(rv01, mv1);
790  rv02 = _mm_mul_ps(rv02, mv2);
791 
792  // rv1
793  rv10 = bt_splat_ps(rv12, 0);
794  rv11 = bt_splat_ps(rv12, 1);
795  rv12 = bt_splat_ps(rv12, 2);
796 
797  rv10 = _mm_mul_ps(rv10, mv0);
798  rv11 = _mm_mul_ps(rv11, mv1);
799  rv12 = _mm_mul_ps(rv12, mv2);
800 
801  // rv2
802  rv20 = bt_splat_ps(rv22, 0);
803  rv21 = bt_splat_ps(rv22, 1);
804  rv22 = bt_splat_ps(rv22, 2);
805 
806  rv20 = _mm_mul_ps(rv20, mv0);
807  rv21 = _mm_mul_ps(rv21, mv1);
808  rv22 = _mm_mul_ps(rv22, mv2);
809 
810  rv00 = _mm_add_ps(rv00, rv01);
811  rv10 = _mm_add_ps(rv10, rv11);
812  rv20 = _mm_add_ps(rv20, rv21);
813 
814  m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
815  m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
816  m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
817 
818 #elif defined(BT_USE_NEON)
819 
820  float32x4_t rv0, rv1, rv2;
821  float32x4_t v0, v1, v2;
822  float32x4_t mv0, mv1, mv2;
823 
824  v0 = m_el[0].mVec128;
825  v1 = m_el[1].mVec128;
826  v2 = m_el[2].mVec128;
827 
828  mv0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
829  mv1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
830  mv2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
831 
832  rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
833  rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
834  rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
835 
836  rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
837  rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
838  rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
839 
840  rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
841  rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
842  rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
843 
844  m_el[0].mVec128 = rv0;
845  m_el[1].mVec128 = rv1;
846  m_el[2].mVec128 = rv2;
847 #else
848  setValue(
849  m.tdotx(m_el[0]), m.tdoty(m_el[0]), m.tdotz(m_el[0]),
850  m.tdotx(m_el[1]), m.tdoty(m_el[1]), m.tdotz(m_el[1]),
851  m.tdotx(m_el[2]), m.tdoty(m_el[2]), m.tdotz(m_el[2]));
852 #endif
853  return *this;
854 }
855 
858 {
859 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
860  m_el[0].mVec128 = m_el[0].mVec128 + m.m_el[0].mVec128;
861  m_el[1].mVec128 = m_el[1].mVec128 + m.m_el[1].mVec128;
862  m_el[2].mVec128 = m_el[2].mVec128 + m.m_el[2].mVec128;
863 #else
864  setValue(
865  m_el[0][0]+m.m_el[0][0],
866  m_el[0][1]+m.m_el[0][1],
867  m_el[0][2]+m.m_el[0][2],
868  m_el[1][0]+m.m_el[1][0],
869  m_el[1][1]+m.m_el[1][1],
870  m_el[1][2]+m.m_el[1][2],
871  m_el[2][0]+m.m_el[2][0],
872  m_el[2][1]+m.m_el[2][1],
873  m_el[2][2]+m.m_el[2][2]);
874 #endif
875  return *this;
876 }
877 
879 operator*(const btMatrix3x3& m, const btScalar & k)
880 {
881 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
882  __m128 vk = bt_splat_ps(_mm_load_ss((float *)&k), 0x80);
883  return btMatrix3x3(
884  _mm_mul_ps(m[0].mVec128, vk),
885  _mm_mul_ps(m[1].mVec128, vk),
886  _mm_mul_ps(m[2].mVec128, vk));
887 #elif defined(BT_USE_NEON)
888  return btMatrix3x3(
889  vmulq_n_f32(m[0].mVec128, k),
890  vmulq_n_f32(m[1].mVec128, k),
891  vmulq_n_f32(m[2].mVec128, k));
892 #else
893  return btMatrix3x3(
894  m[0].x()*k,m[0].y()*k,m[0].z()*k,
895  m[1].x()*k,m[1].y()*k,m[1].z()*k,
896  m[2].x()*k,m[2].y()*k,m[2].z()*k);
897 #endif
898 }
899 
901 operator+(const btMatrix3x3& m1, const btMatrix3x3& m2)
902 {
903 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
904  return btMatrix3x3(
905  m1[0].mVec128 + m2[0].mVec128,
906  m1[1].mVec128 + m2[1].mVec128,
907  m1[2].mVec128 + m2[2].mVec128);
908 #else
909  return btMatrix3x3(
910  m1[0][0]+m2[0][0],
911  m1[0][1]+m2[0][1],
912  m1[0][2]+m2[0][2],
913 
914  m1[1][0]+m2[1][0],
915  m1[1][1]+m2[1][1],
916  m1[1][2]+m2[1][2],
917 
918  m1[2][0]+m2[2][0],
919  m1[2][1]+m2[2][1],
920  m1[2][2]+m2[2][2]);
921 #endif
922 }
923 
925 operator-(const btMatrix3x3& m1, const btMatrix3x3& m2)
926 {
927 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
928  return btMatrix3x3(
929  m1[0].mVec128 - m2[0].mVec128,
930  m1[1].mVec128 - m2[1].mVec128,
931  m1[2].mVec128 - m2[2].mVec128);
932 #else
933  return btMatrix3x3(
934  m1[0][0]-m2[0][0],
935  m1[0][1]-m2[0][1],
936  m1[0][2]-m2[0][2],
937 
938  m1[1][0]-m2[1][0],
939  m1[1][1]-m2[1][1],
940  m1[1][2]-m2[1][2],
941 
942  m1[2][0]-m2[2][0],
943  m1[2][1]-m2[2][1],
944  m1[2][2]-m2[2][2]);
945 #endif
946 }
947 
948 
951 {
952 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
953  m_el[0].mVec128 = m_el[0].mVec128 - m.m_el[0].mVec128;
954  m_el[1].mVec128 = m_el[1].mVec128 - m.m_el[1].mVec128;
955  m_el[2].mVec128 = m_el[2].mVec128 - m.m_el[2].mVec128;
956 #else
957  setValue(
958  m_el[0][0]-m.m_el[0][0],
959  m_el[0][1]-m.m_el[0][1],
960  m_el[0][2]-m.m_el[0][2],
961  m_el[1][0]-m.m_el[1][0],
962  m_el[1][1]-m.m_el[1][1],
963  m_el[1][2]-m.m_el[1][2],
964  m_el[2][0]-m.m_el[2][0],
965  m_el[2][1]-m.m_el[2][1],
966  m_el[2][2]-m.m_el[2][2]);
967 #endif
968  return *this;
969 }
970 
971 
974 {
975  return btTriple((*this)[0], (*this)[1], (*this)[2]);
976 }
977 
978 
981 {
982 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
983  return btMatrix3x3(
984  _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
985  _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
986  _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
987 #elif defined(BT_USE_NEON)
988  return btMatrix3x3(
989  (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
990  (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
991  (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
992 #else
993  return btMatrix3x3(
994  btFabs(m_el[0].x()), btFabs(m_el[0].y()), btFabs(m_el[0].z()),
995  btFabs(m_el[1].x()), btFabs(m_el[1].y()), btFabs(m_el[1].z()),
996  btFabs(m_el[2].x()), btFabs(m_el[2].y()), btFabs(m_el[2].z()));
997 #endif
998 }
999 
1002 {
1003 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1004  __m128 v0 = m_el[0].mVec128;
1005  __m128 v1 = m_el[1].mVec128;
1006  __m128 v2 = m_el[2].mVec128; // x2 y2 z2 w2
1007  __m128 vT;
1008 
1009  v2 = _mm_and_ps(v2, btvFFF0fMask); // x2 y2 z2 0
1010 
1011  vT = _mm_unpackhi_ps(v0, v1); // z0 z1 * *
1012  v0 = _mm_unpacklo_ps(v0, v1); // x0 x1 y0 y1
1013 
1014  v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) ); // y0 y1 y2 0
1015  v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) ); // x0 x1 x2 0
1016  v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT))); // z0 z1 z2 0
1017 
1018 
1019  return btMatrix3x3( v0, v1, v2 );
1020 #elif defined(BT_USE_NEON)
1021  // note: zeros the w channel. We can preserve it at the cost of two more vtrn instructions.
1022  static const uint32x2_t zMask = (const uint32x2_t) {static_cast<uint32_t>(-1), 0 };
1023  float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 ); // {x0 x1 z0 z1}, {y0 y1 w0 w1}
1024  float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) ); // {x2 0 }, {y2 0}
1025  float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
1026  float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
1027  float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
1028  float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q ); // z0 z1 z2 0
1029  return btMatrix3x3( v0, v1, v2 );
1030 #else
1031  return btMatrix3x3( m_el[0].x(), m_el[1].x(), m_el[2].x(),
1032  m_el[0].y(), m_el[1].y(), m_el[2].y(),
1033  m_el[0].z(), m_el[1].z(), m_el[2].z());
1034 #endif
1035 }
1036 
1039 {
1040  return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
1041  cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
1042  cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
1043 }
1044 
1047 {
1048  btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
1049  btScalar det = (*this)[0].dot(co);
1050  //btFullAssert(det != btScalar(0.0));
1051  btAssert(det != btScalar(0.0));
1052  btScalar s = btScalar(1.0) / det;
1053  return btMatrix3x3(co.x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
1054  co.y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
1055  co.z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
1056 }
1057 
1060 {
1061 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1062  // zeros w
1063 // static const __m128i xyzMask = (const __m128i){ -1ULL, 0xffffffffULL };
1064  __m128 row = m_el[0].mVec128;
1065  __m128 m0 = _mm_and_ps( m.getRow(0).mVec128, btvFFF0fMask );
1066  __m128 m1 = _mm_and_ps( m.getRow(1).mVec128, btvFFF0fMask);
1067  __m128 m2 = _mm_and_ps( m.getRow(2).mVec128, btvFFF0fMask );
1068  __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
1069  __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
1070  __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
1071  row = m_el[1].mVec128;
1072  r0 = _mm_add_ps( r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
1073  r1 = _mm_add_ps( r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
1074  r2 = _mm_add_ps( r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
1075  row = m_el[2].mVec128;
1076  r0 = _mm_add_ps( r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
1077  r1 = _mm_add_ps( r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
1078  r2 = _mm_add_ps( r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
1079  return btMatrix3x3( r0, r1, r2 );
1080 
1081 #elif defined BT_USE_NEON
1082  // zeros w
1083  static const uint32x4_t xyzMask = (const uint32x4_t){ static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0 };
1084  float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(0).mVec128, xyzMask );
1085  float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(1).mVec128, xyzMask );
1086  float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.getRow(2).mVec128, xyzMask );
1087  float32x4_t row = m_el[0].mVec128;
1088  float32x4_t r0 = vmulq_lane_f32( m0, vget_low_f32(row), 0);
1089  float32x4_t r1 = vmulq_lane_f32( m0, vget_low_f32(row), 1);
1090  float32x4_t r2 = vmulq_lane_f32( m0, vget_high_f32(row), 0);
1091  row = m_el[1].mVec128;
1092  r0 = vmlaq_lane_f32( r0, m1, vget_low_f32(row), 0);
1093  r1 = vmlaq_lane_f32( r1, m1, vget_low_f32(row), 1);
1094  r2 = vmlaq_lane_f32( r2, m1, vget_high_f32(row), 0);
1095  row = m_el[2].mVec128;
1096  r0 = vmlaq_lane_f32( r0, m2, vget_low_f32(row), 0);
1097  r1 = vmlaq_lane_f32( r1, m2, vget_low_f32(row), 1);
1098  r2 = vmlaq_lane_f32( r2, m2, vget_high_f32(row), 0);
1099  return btMatrix3x3( r0, r1, r2 );
1100 #else
1101  return btMatrix3x3(
1102  m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
1103  m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
1104  m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
1105  m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
1106  m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
1107  m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
1108  m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
1109  m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
1110  m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
1111 #endif
1112 }
1113 
1116 {
1117 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1118  __m128 a0 = m_el[0].mVec128;
1119  __m128 a1 = m_el[1].mVec128;
1120  __m128 a2 = m_el[2].mVec128;
1121 
1122  btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1123  __m128 mx = mT[0].mVec128;
1124  __m128 my = mT[1].mVec128;
1125  __m128 mz = mT[2].mVec128;
1126 
1127  __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
1128  __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
1129  __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
1130  r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
1131  r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
1132  r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
1133  r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
1134  r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
1135  r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
1136  return btMatrix3x3( r0, r1, r2);
1137 
1138 #elif defined BT_USE_NEON
1139  float32x4_t a0 = m_el[0].mVec128;
1140  float32x4_t a1 = m_el[1].mVec128;
1141  float32x4_t a2 = m_el[2].mVec128;
1142 
1143  btMatrix3x3 mT = m.transpose(); // we rely on transpose() zeroing w channel so that we don't have to do it here
1144  float32x4_t mx = mT[0].mVec128;
1145  float32x4_t my = mT[1].mVec128;
1146  float32x4_t mz = mT[2].mVec128;
1147 
1148  float32x4_t r0 = vmulq_lane_f32( mx, vget_low_f32(a0), 0);
1149  float32x4_t r1 = vmulq_lane_f32( mx, vget_low_f32(a1), 0);
1150  float32x4_t r2 = vmulq_lane_f32( mx, vget_low_f32(a2), 0);
1151  r0 = vmlaq_lane_f32( r0, my, vget_low_f32(a0), 1);
1152  r1 = vmlaq_lane_f32( r1, my, vget_low_f32(a1), 1);
1153  r2 = vmlaq_lane_f32( r2, my, vget_low_f32(a2), 1);
1154  r0 = vmlaq_lane_f32( r0, mz, vget_high_f32(a0), 0);
1155  r1 = vmlaq_lane_f32( r1, mz, vget_high_f32(a1), 0);
1156  r2 = vmlaq_lane_f32( r2, mz, vget_high_f32(a2), 0);
1157  return btMatrix3x3( r0, r1, r2 );
1158 
1159 #else
1160  return btMatrix3x3(
1161  m_el[0].dot(m[0]), m_el[0].dot(m[1]), m_el[0].dot(m[2]),
1162  m_el[1].dot(m[0]), m_el[1].dot(m[1]), m_el[1].dot(m[2]),
1163  m_el[2].dot(m[0]), m_el[2].dot(m[1]), m_el[2].dot(m[2]));
1164 #endif
1165 }
1166 
1168 operator*(const btMatrix3x3& m, const btVector3& v)
1169 {
1170 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)
1171  return v.dot3(m[0], m[1], m[2]);
1172 #else
1173  return btVector3(m[0].dot(v), m[1].dot(v), m[2].dot(v));
1174 #endif
1175 }
1176 
1177 
1179 operator*(const btVector3& v, const btMatrix3x3& m)
1180 {
1181 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1182 
1183  const __m128 vv = v.mVec128;
1184 
1185  __m128 c0 = bt_splat_ps( vv, 0);
1186  __m128 c1 = bt_splat_ps( vv, 1);
1187  __m128 c2 = bt_splat_ps( vv, 2);
1188 
1189  c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask) );
1190  c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask) );
1191  c0 = _mm_add_ps(c0, c1);
1192  c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask) );
1193 
1194  return btVector3(_mm_add_ps(c0, c2));
1195 #elif defined(BT_USE_NEON)
1196  const float32x4_t vv = v.mVec128;
1197  const float32x2_t vlo = vget_low_f32(vv);
1198  const float32x2_t vhi = vget_high_f32(vv);
1199 
1200  float32x4_t c0, c1, c2;
1201 
1202  c0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
1203  c1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
1204  c2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
1205 
1206  c0 = vmulq_lane_f32(c0, vlo, 0);
1207  c1 = vmulq_lane_f32(c1, vlo, 1);
1208  c2 = vmulq_lane_f32(c2, vhi, 0);
1209  c0 = vaddq_f32(c0, c1);
1210  c0 = vaddq_f32(c0, c2);
1211 
1212  return btVector3(c0);
1213 #else
1214  return btVector3(m.tdotx(v), m.tdoty(v), m.tdotz(v));
1215 #endif
1216 }
1217 
1219 operator*(const btMatrix3x3& m1, const btMatrix3x3& m2)
1220 {
1221 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1222 
1223  __m128 m10 = m1[0].mVec128;
1224  __m128 m11 = m1[1].mVec128;
1225  __m128 m12 = m1[2].mVec128;
1226 
1227  __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
1228 
1229  __m128 c0 = bt_splat_ps( m10, 0);
1230  __m128 c1 = bt_splat_ps( m11, 0);
1231  __m128 c2 = bt_splat_ps( m12, 0);
1232 
1233  c0 = _mm_mul_ps(c0, m2v);
1234  c1 = _mm_mul_ps(c1, m2v);
1235  c2 = _mm_mul_ps(c2, m2v);
1236 
1237  m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
1238 
1239  __m128 c0_1 = bt_splat_ps( m10, 1);
1240  __m128 c1_1 = bt_splat_ps( m11, 1);
1241  __m128 c2_1 = bt_splat_ps( m12, 1);
1242 
1243  c0_1 = _mm_mul_ps(c0_1, m2v);
1244  c1_1 = _mm_mul_ps(c1_1, m2v);
1245  c2_1 = _mm_mul_ps(c2_1, m2v);
1246 
1247  m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
1248 
1249  c0 = _mm_add_ps(c0, c0_1);
1250  c1 = _mm_add_ps(c1, c1_1);
1251  c2 = _mm_add_ps(c2, c2_1);
1252 
1253  m10 = bt_splat_ps( m10, 2);
1254  m11 = bt_splat_ps( m11, 2);
1255  m12 = bt_splat_ps( m12, 2);
1256 
1257  m10 = _mm_mul_ps(m10, m2v);
1258  m11 = _mm_mul_ps(m11, m2v);
1259  m12 = _mm_mul_ps(m12, m2v);
1260 
1261  c0 = _mm_add_ps(c0, m10);
1262  c1 = _mm_add_ps(c1, m11);
1263  c2 = _mm_add_ps(c2, m12);
1264 
1265  return btMatrix3x3(c0, c1, c2);
1266 
1267 #elif defined(BT_USE_NEON)
1268 
1269  float32x4_t rv0, rv1, rv2;
1270  float32x4_t v0, v1, v2;
1271  float32x4_t mv0, mv1, mv2;
1272 
1273  v0 = m1[0].mVec128;
1274  v1 = m1[1].mVec128;
1275  v2 = m1[2].mVec128;
1276 
1277  mv0 = (float32x4_t) vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask);
1278  mv1 = (float32x4_t) vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask);
1279  mv2 = (float32x4_t) vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask);
1280 
1281  rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
1282  rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
1283  rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
1284 
1285  rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
1286  rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
1287  rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
1288 
1289  rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
1290  rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
1291  rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
1292 
1293  return btMatrix3x3(rv0, rv1, rv2);
1294 
1295 #else
1296  return btMatrix3x3(
1297  m2.tdotx( m1[0]), m2.tdoty( m1[0]), m2.tdotz( m1[0]),
1298  m2.tdotx( m1[1]), m2.tdoty( m1[1]), m2.tdotz( m1[1]),
1299  m2.tdotx( m1[2]), m2.tdoty( m1[2]), m2.tdotz( m1[2]));
1300 #endif
1301 }
1302 
1303 /*
1304 SIMD_FORCE_INLINE btMatrix3x3 btMultTransposeLeft(const btMatrix3x3& m1, const btMatrix3x3& m2) {
1305 return btMatrix3x3(
1306 m1[0][0] * m2[0][0] + m1[1][0] * m2[1][0] + m1[2][0] * m2[2][0],
1307 m1[0][0] * m2[0][1] + m1[1][0] * m2[1][1] + m1[2][0] * m2[2][1],
1308 m1[0][0] * m2[0][2] + m1[1][0] * m2[1][2] + m1[2][0] * m2[2][2],
1309 m1[0][1] * m2[0][0] + m1[1][1] * m2[1][0] + m1[2][1] * m2[2][0],
1310 m1[0][1] * m2[0][1] + m1[1][1] * m2[1][1] + m1[2][1] * m2[2][1],
1311 m1[0][1] * m2[0][2] + m1[1][1] * m2[1][2] + m1[2][1] * m2[2][2],
1312 m1[0][2] * m2[0][0] + m1[1][2] * m2[1][0] + m1[2][2] * m2[2][0],
1313 m1[0][2] * m2[0][1] + m1[1][2] * m2[1][1] + m1[2][2] * m2[2][1],
1314 m1[0][2] * m2[0][2] + m1[1][2] * m2[1][2] + m1[2][2] * m2[2][2]);
1315 }
1316 */
1317 
1321 {
1322 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))
1323 
1324  __m128 c0, c1, c2;
1325 
1326  c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
1327  c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
1328  c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
1329 
1330  c0 = _mm_and_ps(c0, c1);
1331  c0 = _mm_and_ps(c0, c2);
1332 
1333  int m = _mm_movemask_ps((__m128)c0);
1334  return (0x7 == (m & 0x7));
1335 
1336 #else
1337  return
1338  ( m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
1339  m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
1340  m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
1341 #endif
1342 }
1343 
1346 {
1348 };
1349 
1352 {
1354 };
1355 
1356 
1357 
1358 
1360 {
1361  for (int i=0;i<3;i++)
1362  m_el[i].serialize(dataOut.m_el[i]);
1363 }
1364 
1366 {
1367  for (int i=0;i<3;i++)
1368  m_el[i].serializeFloat(dataOut.m_el[i]);
1369 }
1370 
1371 
1373 {
1374  for (int i=0;i<3;i++)
1375  m_el[i].deSerialize(dataIn.m_el[i]);
1376 }
1377 
1379 {
1380  for (int i=0;i<3;i++)
1381  m_el[i].deSerializeFloat(dataIn.m_el[i]);
1382 }
1383 
1385 {
1386  for (int i=0;i<3;i++)
1387  m_el[i].deSerializeDouble(dataIn.m_el[i]);
1388 }
1389 
1390 #endif //BT_MATRIX3x3_H
1391 
const btScalar & x() const
Return the x value.
Definition: btQuadWord.h:116
void deSerializeFloat(const struct btMatrix3x3FloatData &dataIn)
Definition: btMatrix3x3.h:1378
#define SIMD_EPSILON
Definition: btScalar.h:495
for serialization
Definition: btMatrix3x3.h:1345
void serialize(struct btMatrix3x3Data &dataOut) const
Definition: btMatrix3x3.h:1359
btVector3DoubleData m_el[3]
Definition: btMatrix3x3.h:1353
bool operator==(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Equality operator between two matrices It will test all elements are equal.
Definition: btMatrix3x3.h:1320
btScalar tdoty(const btVector3 &v) const
Definition: btMatrix3x3.h:641
btScalar tdotx(const btVector3 &v) const
Definition: btMatrix3x3.h:637
btMatrix3x3 timesTranspose(const btMatrix3x3 &m) const
Definition: btMatrix3x3.h:1115
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Definition: btVector3.h:650
btVector3 solve33(const btVector3 &b) const
Solve A * x = b, where b is a column vector.
Definition: btMatrix3x3.h:616
void setRotation(const btQuaternion &q)
Set the matrix from a quaternion.
Definition: btMatrix3x3.h:209
void serializeFloat(struct btMatrix3x3FloatData &dataOut) const
Definition: btMatrix3x3.h:1365
btScalar tdotz(const btVector3 &v) const
Definition: btMatrix3x3.h:645
btVector3 dot3(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2) const
Definition: btVector3.h:729
btScalar btSin(btScalar x)
Definition: btScalar.h:452
const btVector3 & getRow(int i) const
Get a row of the matrix as a vector.
Definition: btMatrix3x3.h:142
btScalar btSqrt(btScalar y)
Definition: btScalar.h:419
#define btAssert(x)
Definition: btScalar.h:114
unsigned int uint32_t
#define SIMD_FORCE_INLINE
Definition: btScalar.h:64
btScalar cofac(int r1, int c1, int r2, int c2) const
Calculate the matrix cofactor.
Definition: btMatrix3x3.h:748
btMatrix3x3 operator+(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Definition: btMatrix3x3.h:901
btMatrix3x3 & operator=(const btMatrix3x3 &other)
Assignment Operator.
Definition: btMatrix3x3.h:122
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion.
Definition: btQuaternion.h:870
#define btFullAssert(x)
Definition: btScalar.h:117
#define SIMD_HALF_PI
Definition: btScalar.h:479
btVector3 m_el[3]
Data storage for the matrix, each vector is a row of the matrix.
Definition: btMatrix3x3.h:51
btMatrix3x3 transpose() const
Return the transpose of the matrix.
Definition: btMatrix3x3.h:1001
btMatrix3x3(const btQuaternion &q)
Constructor from Quaternion.
Definition: btMatrix3x3.h:60
btVector3 btCross(const btVector3 &v1, const btVector3 &v2)
Return the cross product of two vectors.
Definition: btVector3.h:929
btVector3 getColumn(int i) const
Get a column of the matrix as a vector.
Definition: btMatrix3x3.h:134
void deSerialize(const struct btMatrix3x3Data &dataIn)
Definition: btMatrix3x3.h:1372
#define SIMD_PI
Definition: btScalar.h:477
btMatrix3x3 transposeTimes(const btMatrix3x3 &m) const
Definition: btMatrix3x3.h:1059
void diagonalize(btMatrix3x3 &rot, btScalar threshold, int maxSteps)
diagonalizes this matrix by the Jacobi method.
Definition: btMatrix3x3.h:660
const btScalar & x() const
Return the x value.
Definition: btVector3.h:585
#define btMatrix3x3Data
Definition: btMatrix3x3.h:42
btMatrix3x3 adjoint() const
Return the adjoint of the matrix.
Definition: btMatrix3x3.h:1038
void deSerializeDouble(const struct btMatrix3x3DoubleData &dataIn)
Definition: btMatrix3x3.h:1384
btMatrix3x3 & operator*=(const btMatrix3x3 &m)
Multiply by the target matrix on the right.
Definition: btMatrix3x3.h:767
btScalar btAtan2(btScalar x, btScalar y)
Definition: btScalar.h:469
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w.
Definition: btQuadWord.h:152
btMatrix3x3 operator*(const btMatrix3x3 &m, const btScalar &k)
Definition: btMatrix3x3.h:879
btMatrix3x3 scaled(const btVector3 &s) const
Create a scaled copy of the matrix.
Definition: btMatrix3x3.h:590
const btScalar & y() const
Return the y value.
Definition: btVector3.h:587
const btScalar & z() const
Return the z value.
Definition: btVector3.h:589
void getEulerZYX(btScalar &yaw, btScalar &pitch, btScalar &roll, unsigned int solution_number=1) const
Get the matrix represented as euler angles around ZYX.
Definition: btMatrix3x3.h:521
void setValue(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Set the values of the matrix explicitly (row major)
Definition: btMatrix3x3.h:198
btMatrix3x3(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Constructor with row major formatting.
Definition: btMatrix3x3.h:69
const btScalar & z() const
Return the z value.
Definition: btQuadWord.h:120
void getEulerYPR(btScalar &yaw, btScalar &pitch, btScalar &roll) const
Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR.
Definition: btMatrix3x3.h:492
btVector3 can be used to represent 3D points and vectors.
Definition: btVector3.h:83
#define ATTRIBUTE_ALIGNED16(a)
Definition: btScalar.h:65
btMatrix3x3 & operator-=(const btMatrix3x3 &m)
Substractss by the target matrix on the right.
Definition: btMatrix3x3.h:950
void setEulerYPR(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the matrix from euler angles using YPR around YXZ respectively.
Definition: btMatrix3x3.h:284
btMatrix3x3 & operator+=(const btMatrix3x3 &m)
Adds by the target matrix on the right.
Definition: btMatrix3x3.h:857
btMatrix3x3 operator-(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Definition: btMatrix3x3.h:925
btScalar determinant() const
Return the determinant of the matrix.
Definition: btMatrix3x3.h:973
btMatrix3x3 absolute() const
Return the matrix with all values non negative.
Definition: btMatrix3x3.h:980
btMatrix3x3()
No initializaion constructor.
Definition: btMatrix3x3.h:55
void getOpenGLSubMatrix(btScalar *m) const
Fill the rotational part of an OpenGL matrix and clear the shear/perspective.
Definition: btMatrix3x3.h:347
const btVector3 & operator[](int i) const
Get a const reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:158
btScalar length2() const
Return the length squared of the quaternion.
Definition: btQuaternion.h:326
btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector.
Definition: btMatrix3x3.h:150
The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with...
Definition: btMatrix3x3.h:48
const btScalar & y() const
Return the y value.
Definition: btQuadWord.h:118
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions.
Definition: btQuaternion.h:848
btMatrix3x3(const btMatrix3x3 &other)
Copy constructor.
Definition: btMatrix3x3.h:114
for serialization
Definition: btMatrix3x3.h:1351
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
Definition: btQuaternion.h:55
void setFromOpenGLSubMatrix(const btScalar *m)
Set from the rotational part of a 4x4 OpenGL matrix.
Definition: btMatrix3x3.h:181
btScalar btAsin(btScalar x)
Definition: btScalar.h:461
btScalar btDot(const btVector3 &v1, const btVector3 &v2)
Return the dot product between two vectors.
Definition: btVector3.h:899
btMatrix3x3 inverse() const
Return the inverse of the matrix.
Definition: btMatrix3x3.h:1046
const btScalar & w() const
Return the w value.
Definition: btQuadWord.h:122
btScalar btTriple(const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
Definition: btVector3.h:935
void getRotation(btQuaternion &q) const
Get the matrix represented as a quaternion.
Definition: btMatrix3x3.h:400
void setIdentity()
Set the matrix to the identity.
Definition: btMatrix3x3.h:317
static const btMatrix3x3 & getIdentity()
Definition: btMatrix3x3.h:330
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
Definition: btScalar.h:279
btScalar btCos(btScalar x)
Definition: btScalar.h:451
btVector3FloatData m_el[3]
Definition: btMatrix3x3.h:1347
btScalar btFabs(btScalar x)
Definition: btScalar.h:450
void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
Set the matrix from euler angles YPR around ZYX axes.
Definition: btMatrix3x3.h:298