16 #ifndef BT_MATRIX3x3_H    17 #define BT_MATRIX3x3_H    26 #define vMPPP (_mm_set_ps (+0.0f, +0.0f, +0.0f, -0.0f))    29 #if defined(BT_USE_SSE)    30 #define v1000 (_mm_set_ps(0.0f,0.0f,0.0f,1.0f))    31 #define v0100 (_mm_set_ps(0.0f,0.0f,1.0f,0.0f))    32 #define v0010 (_mm_set_ps(0.0f,1.0f,0.0f,0.0f))    33 #elif defined(BT_USE_NEON)    39 #ifdef BT_USE_DOUBLE_PRECISION    40 #define btMatrix3x3Data btMatrix3x3DoubleData     42 #define btMatrix3x3Data btMatrix3x3FloatData    43 #endif //BT_USE_DOUBLE_PRECISION    78 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)    96                 m_el[0].mVec128 = rhs.
m_el[0].mVec128;
    97                 m_el[1].mVec128 = rhs.
m_el[1].mVec128;
    98                 m_el[2].mVec128 = rhs.
m_el[2].mVec128;
   104                 m_el[0].mVec128 = m.
m_el[0].mVec128;
   105                 m_el[1].mVec128 = m.
m_el[1].mVec128;
   106                 m_el[2].mVec128 = m.
m_el[2].mVec128;
   116                 m_el[0] = other.
m_el[0];
   117                 m_el[1] = other.
m_el[1];
   118                 m_el[2] = other.
m_el[2];
   124                 m_el[0] = other.
m_el[0];
   125                 m_el[1] = other.
m_el[1];
   126                 m_el[2] = other.
m_el[2];
   136                 return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
   215     #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)   216         __m128  vs, Q = q.get128();
   217                 __m128i Qi = btCastfTo128i(Q);
   220         __m128  V11, V21, V31;
   221         __m128  NQ = _mm_xor_ps(Q, btvMzeroMask);
   222                 __m128i NQi = btCastfTo128i(NQ);
   224         V1 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,2,3)));        
   225                 V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0,0,1,3));     
   226         V3 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(2,1,0,3)));        
   227         V1 = _mm_xor_ps(V1, vMPPP);     
   229         V11     = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,1,0,3)));   
   230                 V21 = _mm_unpackhi_ps(Q, Q);                    
   231                 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0,2,0,3));       
   237         V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2,3,1,3));       
   239         V21 = _mm_xor_ps(V21, vMPPP);   
   240                 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3,3,1,3));       
   241         V31 = _mm_xor_ps(V31, vMPPP);   
   242                 Y = btCastiTo128f(_mm_shuffle_epi32 (NQi, BT_SHUFFLE(3,2,0,3)));        
   243                 Z = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,1,3))); 
   245                 vs = _mm_load_ss(&s);
   253         vs = bt_splat3_ps(vs, 0);
   267                 btScalar xs = q.
x() * s,   ys = q.
y() * s,   zs = q.
z() * s;
   268                 btScalar wx = q.
w() * xs,  wy = q.
w() * ys,  wz = q.
w() * zs;
   269                 btScalar xx = q.
x() * xs,  xy = q.
x() * ys,  xz = q.
x() * zs;
   270                 btScalar yy = q.
y() * ys,  yz = q.
y() * zs,  zz = q.
z() * zs;
   272             btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
   273                         xy + wz, 
btScalar(1.0) - (xx + zz), yz - wx,
   274                         xz - wy, yz + wx, 
btScalar(1.0) - (xx + yy));
   286                 setEulerZYX(roll, pitch, yaw);
   311                 setValue(cj * ch, sj * sc - cs, sj * cc + ss,
   312                         cj * sh, sj * ss + cc, sj * cs - sc, 
   313                         -sj,      cj * si,      cj * ci);
   319 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)   332 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)   334         identityMatrix(v1000, v0100, v0010);
   342                 return identityMatrix;
   349 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)   350         __m128 v0 = m_el[0].mVec128;
   351         __m128 v1 = m_el[1].mVec128;
   352         __m128 v2 = m_el[2].mVec128;    
   353         __m128 *vm = (__m128 *)m;
   356         v2 = _mm_and_ps(v2, btvFFF0fMask);  
   358         vT = _mm_unpackhi_ps(v0, v1);   
   359         v0 = _mm_unpacklo_ps(v0, v1);   
   361         v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) );   
   362         v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) );   
   363         v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));  
   368 #elif defined(BT_USE_NEON)   370         static const uint32x2_t zMask = (
const uint32x2_t) {
static_cast<uint32_t>(-1), 0 };
   371         float32x4_t *vm = (float32x4_t *)m;
   372         float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );  
   373         float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );       
   374         float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
   375         float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
   376         float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
   377         float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q );       
   402 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)   403         btScalar trace = m_el[0].
x() + m_el[1].
y() + m_el[2].
z();
   415             temp.f[0]=m_el[2].
y() - m_el[1].
z();
   416             temp.f[1]=m_el[0].
z() - m_el[2].
x();
   417             temp.f[2]=m_el[1].
x() - m_el[0].
y();
   424             if(m_el[0].x() < m_el[1].y()) 
   426                 if( m_el[1].y() < m_el[2].z() )
   427                     { i = 2; j = 0; k = 1; }
   429                     { i = 1; j = 2; k = 0; }
   433                 if( m_el[0].x() < m_el[2].z())
   434                     { i = 2; j = 0; k = 1; }
   436                     { i = 0; j = 1; k = 2; }
   439             x = m_el[i][i] - m_el[j][j] - m_el[k][k] + 
btScalar(1.0);
   441             temp.f[3] = (m_el[k][j] - m_el[j][k]);
   442             temp.f[j] = (m_el[j][i] + m_el[i][j]);
   443             temp.f[k] = (m_el[k][i] + m_el[i][k]);
   454                 btScalar trace = m_el[0].
x() + m_el[1].
y() + m_el[2].
z();
   464                         temp[0]=((m_el[2].
y() - m_el[1].
z()) * s);
   465                         temp[1]=((m_el[0].
z() - m_el[2].
x()) * s);
   466                         temp[2]=((m_el[1].
x() - m_el[0].
y()) * s);
   470                         int i = m_el[0].
x() < m_el[1].
y() ? 
   471                                 (m_el[1].
y() < m_el[2].
z() ? 2 : 1) :
   472                                 (m_el[0].x() < m_el[2].
z() ? 2 : 0); 
   480                         temp[3] = (m_el[k][j] - m_el[j][k]) * s;
   481                         temp[j] = (m_el[j][i] + m_el[i][j]) * s;
   482                         temp[k] = (m_el[k][i] + m_el[i][k]) * s;
   484                 q.
setValue(temp[0],temp[1],temp[2],temp[3]);
   535                 if (
btFabs(m_el[2].x()) >= 1)
   546                                 euler_out.roll = euler_out.pitch + delta;
   547                                 euler_out2.roll = euler_out.pitch + delta;
   553                                 euler_out.roll = -euler_out.pitch + delta;
   554                                 euler_out2.roll = -euler_out.pitch + delta;
   559                         euler_out.pitch = - 
btAsin(m_el[2].x());
   560                         euler_out2.pitch = 
SIMD_PI - euler_out.pitch;
   562                         euler_out.roll = 
btAtan2(m_el[2].y()/
btCos(euler_out.pitch), 
   563                                 m_el[2].
z()/
btCos(euler_out.pitch));
   564                         euler_out2.roll = 
btAtan2(m_el[2].y()/
btCos(euler_out2.pitch), 
   565                                 m_el[2].
z()/
btCos(euler_out2.pitch));
   567                         euler_out.yaw = 
btAtan2(m_el[1].x()/
btCos(euler_out.pitch), 
   568                                 m_el[0].
x()/
btCos(euler_out.pitch));
   569                         euler_out2.yaw = 
btAtan2(m_el[1].x()/
btCos(euler_out2.pitch), 
   570                                 m_el[0].
x()/
btCos(euler_out2.pitch));
   573                 if (solution_number == 1)
   576                         pitch = euler_out.pitch;
   577                         roll = euler_out.roll;
   581                         yaw = euler_out2.yaw; 
   582                         pitch = euler_out2.pitch;
   583                         roll = euler_out2.roll;
   592 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)   593                 return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
   596             m_el[0].x() * s.
x(), m_el[0].
y() * s.y(), m_el[0].
z() * s.z(),
   597                         m_el[1].
x() * s.x(), m_el[1].
y() * s.y(), m_el[1].
z() * s.z(),
   598                         m_el[2].
x() * s.x(), m_el[2].
y() * s.y(), m_el[2].
z() * s.z());
   639                 return m_el[0].
x() * v.
x() + m_el[1].
x() * v.
y() + m_el[2].
x() * v.
z();
   643                 return m_el[0].
y() * v.
x() + m_el[1].
y() * v.
y() + m_el[2].
y() * v.
z();
   647                 return m_el[0].
z() * v.
x() + m_el[1].
z() * v.
y() + m_el[2].
z() * v.
z();
   663                 for (
int step = maxSteps; step > 0; step--)
   698                         btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
   704                                 t = (theta >= 0) ? 1 / (theta + 
btSqrt(1 + theta2))
   705                                         : 1 / (theta - 
btSqrt(1 + theta2));
   706                                 cos = 1 / 
btSqrt(1 + t * t);
   712                                 t = 1 / (theta * (2 + 
btScalar(0.5) / theta2));
   718                         m_el[p][q] = m_el[q][p] = 0;
   719                         m_el[p][p] -= t * mpq;
   720                         m_el[q][q] += t * mpq;
   723                         m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
   724                         m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
   727                         for (
int i = 0; i < 3; i++)
   732                                 row[p] = cos * mrp - sin * mrq;
   733                                 row[q] = cos * mrq + sin * mrp;
   750                 return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
   769 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)   770     __m128 rv00, rv01, rv02;
   771     __m128 rv10, rv11, rv12;
   772     __m128 rv20, rv21, rv22;
   773     __m128 mv0, mv1, mv2;
   775     rv02 = m_el[0].mVec128;
   776     rv12 = m_el[1].mVec128;
   777     rv22 = m_el[2].mVec128;
   779     mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask); 
   780     mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask); 
   781     mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask); 
   784     rv00 = bt_splat_ps(rv02, 0);
   785     rv01 = bt_splat_ps(rv02, 1);
   786     rv02 = bt_splat_ps(rv02, 2);
   788     rv00 = _mm_mul_ps(rv00, mv0);
   789     rv01 = _mm_mul_ps(rv01, mv1);
   790     rv02 = _mm_mul_ps(rv02, mv2);
   793     rv10 = bt_splat_ps(rv12, 0);
   794     rv11 = bt_splat_ps(rv12, 1);
   795     rv12 = bt_splat_ps(rv12, 2);
   797     rv10 = _mm_mul_ps(rv10, mv0);
   798     rv11 = _mm_mul_ps(rv11, mv1);
   799     rv12 = _mm_mul_ps(rv12, mv2);
   802     rv20 = bt_splat_ps(rv22, 0);
   803     rv21 = bt_splat_ps(rv22, 1);
   804     rv22 = bt_splat_ps(rv22, 2);
   806     rv20 = _mm_mul_ps(rv20, mv0);
   807     rv21 = _mm_mul_ps(rv21, mv1);
   808     rv22 = _mm_mul_ps(rv22, mv2);
   810     rv00 = _mm_add_ps(rv00, rv01);
   811     rv10 = _mm_add_ps(rv10, rv11);
   812     rv20 = _mm_add_ps(rv20, rv21);
   814     m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
   815     m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
   816     m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
   818 #elif defined(BT_USE_NEON)   820     float32x4_t rv0, rv1, rv2;
   821     float32x4_t v0, v1, v2;
   822     float32x4_t mv0, mv1, mv2;
   824     v0 = m_el[0].mVec128;
   825     v1 = m_el[1].mVec128;
   826     v2 = m_el[2].mVec128;
   828     mv0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask); 
   829     mv1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask); 
   830     mv2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask); 
   832     rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
   833     rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
   834     rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
   836     rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
   837     rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
   838     rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
   840     rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
   841     rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
   842     rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
   844     m_el[0].mVec128 = rv0;
   845     m_el[1].mVec128 = rv1;
   846     m_el[2].mVec128 = rv2;
   859 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)   860     m_el[0].mVec128 = m_el[0].mVec128 + m.
m_el[0].mVec128;
   861     m_el[1].mVec128 = m_el[1].mVec128 + m.
m_el[1].mVec128;
   862     m_el[2].mVec128 = m_el[2].mVec128 + m.
m_el[2].mVec128;
   865                 m_el[0][0]+m.
m_el[0][0], 
   866                 m_el[0][1]+m.
m_el[0][1],
   867                 m_el[0][2]+m.
m_el[0][2],
   868                 m_el[1][0]+m.
m_el[1][0], 
   869                 m_el[1][1]+m.
m_el[1][1],
   870                 m_el[1][2]+m.
m_el[1][2],
   871                 m_el[2][0]+m.
m_el[2][0], 
   872                 m_el[2][1]+m.
m_el[2][1],
   873                 m_el[2][2]+m.
m_el[2][2]);
   881 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))   882     __m128 vk = bt_splat_ps(_mm_load_ss((
float *)&k), 0x80);
   884                 _mm_mul_ps(m[0].mVec128, vk), 
   885                 _mm_mul_ps(m[1].mVec128, vk), 
   886                 _mm_mul_ps(m[2].mVec128, vk)); 
   887 #elif defined(BT_USE_NEON)   889                 vmulq_n_f32(m[0].mVec128, k),
   890                 vmulq_n_f32(m[1].mVec128, k),
   891                 vmulq_n_f32(m[2].mVec128, k)); 
   894                 m[0].x()*k,m[0].y()*k,m[0].z()*k,
   895                 m[1].x()*k,m[1].y()*k,m[1].z()*k,
   896                 m[2].x()*k,m[2].y()*k,m[2].z()*k);
   903 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)   905         m1[0].mVec128 + m2[0].mVec128,
   906         m1[1].mVec128 + m2[1].mVec128,
   907         m1[2].mVec128 + m2[2].mVec128);
   927 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)   929         m1[0].mVec128 - m2[0].mVec128,
   930         m1[1].mVec128 - m2[1].mVec128,
   931         m1[2].mVec128 - m2[2].mVec128);
   952 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)   953     m_el[0].mVec128 = m_el[0].mVec128 - m.
m_el[0].mVec128;
   954     m_el[1].mVec128 = m_el[1].mVec128 - m.
m_el[1].mVec128;
   955     m_el[2].mVec128 = m_el[2].mVec128 - m.
m_el[2].mVec128;
   958         m_el[0][0]-m.
m_el[0][0], 
   959         m_el[0][1]-m.
m_el[0][1],
   960         m_el[0][2]-m.
m_el[0][2],
   961         m_el[1][0]-m.
m_el[1][0], 
   962         m_el[1][1]-m.
m_el[1][1],
   963         m_el[1][2]-m.
m_el[1][2],
   964         m_el[2][0]-m.
m_el[2][0], 
   965         m_el[2][1]-m.
m_el[2][1],
   966         m_el[2][2]-m.
m_el[2][2]);
   975         return btTriple((*
this)[0], (*
this)[1], (*
this)[2]);
   982 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))   984             _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
   985             _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
   986             _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
   987 #elif defined(BT_USE_NEON)   989             (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
   990             (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
   991             (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
  1003 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))  1004     __m128 v0 = m_el[0].mVec128;
  1005     __m128 v1 = m_el[1].mVec128;
  1006     __m128 v2 = m_el[2].mVec128;    
  1009     v2 = _mm_and_ps(v2, btvFFF0fMask);  
  1011     vT = _mm_unpackhi_ps(v0, v1);       
  1012     v0 = _mm_unpacklo_ps(v0, v1);       
  1014     v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) );       
  1015     v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) );       
  1016     v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));      
  1020 #elif defined(BT_USE_NEON)  1022     static const uint32x2_t zMask = (
const uint32x2_t) {
static_cast<uint32_t>(-1), 0 };
  1023     float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );  
  1024     float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );       
  1025     float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
  1026     float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
  1027     float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
  1028     float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q );       
  1031         return btMatrix3x3( m_el[0].x(), m_el[1].x(), m_el[2].x(),
  1032                         m_el[0].y(), m_el[1].y(), m_el[2].y(),
  1033                         m_el[0].z(), m_el[1].z(), m_el[2].z());
  1040         return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
  1041                 cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
  1042                 cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
  1048         btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
  1053         return btMatrix3x3(co.
x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
  1054                 co.
y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
  1055                 co.
z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
  1061 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))  1064     __m128 row = m_el[0].mVec128;
  1065     __m128 m0 = _mm_and_ps( m.
getRow(0).mVec128, btvFFF0fMask );
  1066     __m128 m1 = _mm_and_ps( m.
getRow(1).mVec128, btvFFF0fMask);
  1067     __m128 m2 = _mm_and_ps( m.
getRow(2).mVec128, btvFFF0fMask );
  1068     __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
  1069     __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
  1070     __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
  1071     row = m_el[1].mVec128;
  1072     r0 = _mm_add_ps( r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
  1073     r1 = _mm_add_ps( r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
  1074     r2 = _mm_add_ps( r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
  1075     row = m_el[2].mVec128;
  1076     r0 = _mm_add_ps( r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
  1077     r1 = _mm_add_ps( r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
  1078     r2 = _mm_add_ps( r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
  1081 #elif defined BT_USE_NEON  1083     static const uint32x4_t xyzMask = (
const uint32x4_t){ 
static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 
static_cast<uint32_t>(-1), 0 };
  1084     float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.
getRow(0).mVec128, xyzMask );
  1085     float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.
getRow(1).mVec128, xyzMask );
  1086     float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.
getRow(2).mVec128, xyzMask );
  1087     float32x4_t row = m_el[0].mVec128;
  1088     float32x4_t r0 = vmulq_lane_f32( m0, vget_low_f32(row), 0);
  1089     float32x4_t r1 = vmulq_lane_f32( m0, vget_low_f32(row), 1);
  1090     float32x4_t r2 = vmulq_lane_f32( m0, vget_high_f32(row), 0);
  1091     row = m_el[1].mVec128;
  1092     r0 = vmlaq_lane_f32( r0, m1, vget_low_f32(row), 0);
  1093     r1 = vmlaq_lane_f32( r1, m1, vget_low_f32(row), 1);
  1094     r2 = vmlaq_lane_f32( r2, m1, vget_high_f32(row), 0);
  1095     row = m_el[2].mVec128;
  1096     r0 = vmlaq_lane_f32( r0, m2, vget_low_f32(row), 0);
  1097     r1 = vmlaq_lane_f32( r1, m2, vget_low_f32(row), 1);
  1098     r2 = vmlaq_lane_f32( r2, m2, vget_high_f32(row), 0);
  1102                 m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
  1103                 m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
  1104                 m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
  1105                 m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
  1106                 m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
  1107                 m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
  1108                 m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
  1109                 m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
  1110                 m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
  1117 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))  1118     __m128 a0 = m_el[0].mVec128;
  1119     __m128 a1 = m_el[1].mVec128;
  1120     __m128 a2 = m_el[2].mVec128;
  1123     __m128 mx = mT[0].mVec128;
  1124     __m128 my = mT[1].mVec128;
  1125     __m128 mz = mT[2].mVec128;
  1127     __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
  1128     __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
  1129     __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
  1130     r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
  1131     r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
  1132     r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
  1133     r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
  1134     r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
  1135     r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
  1138 #elif defined BT_USE_NEON  1139     float32x4_t a0 = m_el[0].mVec128;
  1140     float32x4_t a1 = m_el[1].mVec128;
  1141     float32x4_t a2 = m_el[2].mVec128;
  1144     float32x4_t mx = mT[0].mVec128;
  1145     float32x4_t my = mT[1].mVec128;
  1146     float32x4_t mz = mT[2].mVec128;
  1148     float32x4_t r0 = vmulq_lane_f32( mx, vget_low_f32(a0), 0);
  1149     float32x4_t r1 = vmulq_lane_f32( mx, vget_low_f32(a1), 0);
  1150     float32x4_t r2 = vmulq_lane_f32( mx, vget_low_f32(a2), 0);
  1151     r0 = vmlaq_lane_f32( r0, my, vget_low_f32(a0), 1);
  1152     r1 = vmlaq_lane_f32( r1, my, vget_low_f32(a1), 1);
  1153     r2 = vmlaq_lane_f32( r2, my, vget_low_f32(a2), 1);
  1154     r0 = vmlaq_lane_f32( r0, mz, vget_high_f32(a0), 0);
  1155     r1 = vmlaq_lane_f32( r1, mz, vget_high_f32(a1), 0);
  1156     r2 = vmlaq_lane_f32( r2, mz, vget_high_f32(a2), 0);
  1161                 m_el[0].
dot(m[0]), m_el[0].
dot(m[1]), m_el[0].
dot(m[2]),
  1162                 m_el[1].
dot(m[0]), m_el[1].
dot(m[1]), m_el[1].
dot(m[2]),
  1163                 m_el[2].
dot(m[0]), m_el[2].
dot(m[1]), m_el[2].
dot(m[2]));
  1170 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)  1171     return v.
dot3(m[0], m[1], m[2]);
  1181 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))  1183     const __m128 vv = v.mVec128;
  1185     __m128 c0 = bt_splat_ps( vv, 0);
  1186     __m128 c1 = bt_splat_ps( vv, 1);
  1187     __m128 c2 = bt_splat_ps( vv, 2);
  1189     c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask) );
  1190     c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask) );
  1191     c0 = _mm_add_ps(c0, c1);
  1192     c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask) );
  1195 #elif defined(BT_USE_NEON)  1196     const float32x4_t vv = v.mVec128;
  1197     const float32x2_t vlo = vget_low_f32(vv);
  1198     const float32x2_t vhi = vget_high_f32(vv);
  1200     float32x4_t c0, c1, c2;
  1202     c0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
  1203     c1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
  1204     c2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
  1206     c0 = vmulq_lane_f32(c0, vlo, 0);
  1207     c1 = vmulq_lane_f32(c1, vlo, 1);
  1208     c2 = vmulq_lane_f32(c2, vhi, 0);
  1209     c0 = vaddq_f32(c0, c1);
  1210     c0 = vaddq_f32(c0, c2);
  1221 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))  1223     __m128 m10 = m1[0].mVec128;  
  1224     __m128 m11 = m1[1].mVec128;
  1225     __m128 m12 = m1[2].mVec128;
  1227     __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
  1229     __m128 c0 = bt_splat_ps( m10, 0);
  1230     __m128 c1 = bt_splat_ps( m11, 0);
  1231     __m128 c2 = bt_splat_ps( m12, 0);
  1233     c0 = _mm_mul_ps(c0, m2v);
  1234     c1 = _mm_mul_ps(c1, m2v);
  1235     c2 = _mm_mul_ps(c2, m2v);
  1237     m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
  1239     __m128 c0_1 = bt_splat_ps( m10, 1);
  1240     __m128 c1_1 = bt_splat_ps( m11, 1);
  1241     __m128 c2_1 = bt_splat_ps( m12, 1);
  1243     c0_1 = _mm_mul_ps(c0_1, m2v);
  1244     c1_1 = _mm_mul_ps(c1_1, m2v);
  1245     c2_1 = _mm_mul_ps(c2_1, m2v);
  1247     m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
  1249     c0 = _mm_add_ps(c0, c0_1);
  1250     c1 = _mm_add_ps(c1, c1_1);
  1251     c2 = _mm_add_ps(c2, c2_1);
  1253     m10 = bt_splat_ps( m10, 2);
  1254     m11 = bt_splat_ps( m11, 2);
  1255     m12 = bt_splat_ps( m12, 2);
  1257     m10 = _mm_mul_ps(m10, m2v);
  1258     m11 = _mm_mul_ps(m11, m2v);
  1259     m12 = _mm_mul_ps(m12, m2v);
  1261     c0 = _mm_add_ps(c0, m10);
  1262     c1 = _mm_add_ps(c1, m11);
  1263     c2 = _mm_add_ps(c2, m12);
  1267 #elif defined(BT_USE_NEON)  1269     float32x4_t rv0, rv1, rv2;
  1270     float32x4_t v0, v1, v2;
  1271     float32x4_t mv0, mv1, mv2;
  1277     mv0 = (float32x4_t) vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask); 
  1278     mv1 = (float32x4_t) vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask); 
  1279     mv2 = (float32x4_t) vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask); 
  1281     rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
  1282     rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
  1283     rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
  1285     rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
  1286     rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
  1287     rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
  1289     rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
  1290     rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
  1291     rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
  1322 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))  1326     c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
  1327     c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
  1328     c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
  1330     c0 = _mm_and_ps(c0, c1);
  1331     c0 = _mm_and_ps(c0, c2);
  1333         int m = _mm_movemask_ps((__m128)c0);
  1334         return (0x7 == (m & 0x7));
  1338     (   m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
  1339                 m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
  1340                 m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
  1361         for (
int i=0;i<3;i++)
  1362                 m_el[i].serialize(dataOut.m_el[i]);
  1367         for (
int i=0;i<3;i++)
  1368                 m_el[i].serializeFloat(dataOut.
m_el[i]);
  1374         for (
int i=0;i<3;i++)
  1375                 m_el[i].deSerialize(dataIn.m_el[i]);
  1380         for (
int i=0;i<3;i++)
  1381                 m_el[i].deSerializeFloat(dataIn.
m_el[i]);
  1386         for (
int i=0;i<3;i++)
  1387                 m_el[i].deSerializeDouble(dataIn.
m_el[i]);
  1390 #endif //BT_MATRIX3x3_H const btScalar & x() const
Return the x value. 
void deSerializeFloat(const struct btMatrix3x3FloatData &dataIn)
void serialize(struct btMatrix3x3Data &dataOut) const
btVector3DoubleData m_el[3]
bool operator==(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Equality operator between two matrices It will test all elements are equal. 
btScalar tdoty(const btVector3 &v) const
btScalar tdotx(const btVector3 &v) const
btMatrix3x3 timesTranspose(const btMatrix3x3 &m) const
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
btVector3 solve33(const btVector3 &b) const
Solve A * x = b, where b is a column vector. 
void setRotation(const btQuaternion &q)
Set the matrix from a quaternion. 
void serializeFloat(struct btMatrix3x3FloatData &dataOut) const
btScalar tdotz(const btVector3 &v) const
btVector3 dot3(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2) const
btScalar btSin(btScalar x)
const btVector3 & getRow(int i) const
Get a row of the matrix as a vector. 
btScalar btSqrt(btScalar y)
#define SIMD_FORCE_INLINE
btScalar cofac(int r1, int c1, int r2, int c2) const
Calculate the matrix cofactor. 
btMatrix3x3 operator+(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
btMatrix3x3 & operator=(const btMatrix3x3 &other)
Assignment Operator. 
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion. 
btVector3 m_el[3]
Data storage for the matrix, each vector is a row of the matrix. 
btMatrix3x3 transpose() const
Return the transpose of the matrix. 
btMatrix3x3(const btQuaternion &q)
Constructor from Quaternion. 
btVector3 btCross(const btVector3 &v1, const btVector3 &v2)
Return the cross product of two vectors. 
btVector3 getColumn(int i) const
Get a column of the matrix as a vector. 
void deSerialize(const struct btMatrix3x3Data &dataIn)
btMatrix3x3 transposeTimes(const btMatrix3x3 &m) const
void diagonalize(btMatrix3x3 &rot, btScalar threshold, int maxSteps)
diagonalizes this matrix by the Jacobi method. 
const btScalar & x() const
Return the x value. 
btMatrix3x3 adjoint() const
Return the adjoint of the matrix. 
void deSerializeDouble(const struct btMatrix3x3DoubleData &dataIn)
btMatrix3x3 & operator*=(const btMatrix3x3 &m)
Multiply by the target matrix on the right. 
btScalar btAtan2(btScalar x, btScalar y)
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w. 
btMatrix3x3 operator*(const btMatrix3x3 &m, const btScalar &k)
btMatrix3x3 scaled(const btVector3 &s) const
Create a scaled copy of the matrix. 
const btScalar & y() const
Return the y value. 
const btScalar & z() const
Return the z value. 
void getEulerZYX(btScalar &yaw, btScalar &pitch, btScalar &roll, unsigned int solution_number=1) const
Get the matrix represented as euler angles around ZYX. 
void setValue(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Set the values of the matrix explicitly (row major) 
btMatrix3x3(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Constructor with row major formatting. 
const btScalar & z() const
Return the z value. 
void getEulerYPR(btScalar &yaw, btScalar &pitch, btScalar &roll) const
Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR. 
btVector3 can be used to represent 3D points and vectors. 
#define ATTRIBUTE_ALIGNED16(a)
btMatrix3x3 & operator-=(const btMatrix3x3 &m)
Substractss by the target matrix on the right. 
void setEulerYPR(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the matrix from euler angles using YPR around YXZ respectively. 
btMatrix3x3 & operator+=(const btMatrix3x3 &m)
Adds by the target matrix on the right. 
btMatrix3x3 operator-(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
btScalar determinant() const
Return the determinant of the matrix. 
btMatrix3x3 absolute() const
Return the matrix with all values non negative. 
btMatrix3x3()
No initializaion constructor. 
void getOpenGLSubMatrix(btScalar *m) const
Fill the rotational part of an OpenGL matrix and clear the shear/perspective. 
const btVector3 & operator[](int i) const
Get a const reference to a row of the matrix as a vector. 
btScalar length2() const
Return the length squared of the quaternion. 
btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector. 
The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with...
const btScalar & y() const
Return the y value. 
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions. 
btMatrix3x3(const btMatrix3x3 &other)
Copy constructor. 
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
void setFromOpenGLSubMatrix(const btScalar *m)
Set from the rotational part of a 4x4 OpenGL matrix. 
btScalar btAsin(btScalar x)
btScalar btDot(const btVector3 &v1, const btVector3 &v2)
Return the dot product between two vectors. 
btMatrix3x3 inverse() const
Return the inverse of the matrix. 
const btScalar & w() const
Return the w value. 
btScalar btTriple(const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
void getRotation(btQuaternion &q) const
Get the matrix represented as a quaternion. 
void setIdentity()
Set the matrix to the identity. 
static const btMatrix3x3 & getIdentity()
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
btScalar btCos(btScalar x)
btVector3FloatData m_el[3]
btScalar btFabs(btScalar x)
void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
Set the matrix from euler angles YPR around ZYX axes.