16 #ifndef BT_MATRIX3x3_H    17 #define BT_MATRIX3x3_H    26 #define vMPPP (_mm_set_ps (+0.0f, +0.0f, +0.0f, -0.0f))    29 #if defined(BT_USE_SSE)    30 #define v1000 (_mm_set_ps(0.0f,0.0f,0.0f,1.0f))    31 #define v0100 (_mm_set_ps(0.0f,0.0f,1.0f,0.0f))    32 #define v0010 (_mm_set_ps(0.0f,1.0f,0.0f,0.0f))    33 #elif defined(BT_USE_NEON)    39 #ifdef BT_USE_DOUBLE_PRECISION    40 #define btMatrix3x3Data btMatrix3x3DoubleData     42 #define btMatrix3x3Data btMatrix3x3FloatData    43 #endif //BT_USE_DOUBLE_PRECISION    78 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)    96                 m_el[0].mVec128 = rhs.
m_el[0].mVec128;
    97                 m_el[1].mVec128 = rhs.
m_el[1].mVec128;
    98                 m_el[2].mVec128 = rhs.
m_el[2].mVec128;
   104                 m_el[0].mVec128 = m.
m_el[0].mVec128;
   105                 m_el[1].mVec128 = m.
m_el[1].mVec128;
   106                 m_el[2].mVec128 = m.
m_el[2].mVec128;
   116                 m_el[0] = other.
m_el[0];
   117                 m_el[1] = other.
m_el[1];
   118                 m_el[2] = other.
m_el[2];
   124                 m_el[0] = other.
m_el[0];
   125                 m_el[1] = other.
m_el[1];
   126                 m_el[2] = other.
m_el[2];
   136                 return btVector3(m_el[0][i],m_el[1][i],m_el[2][i]);
   215     #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)   216         __m128  vs, Q = q.get128();
   217                 __m128i Qi = btCastfTo128i(Q);
   220         __m128  V11, V21, V31;
   221         __m128  NQ = _mm_xor_ps(Q, btvMzeroMask);
   222                 __m128i NQi = btCastfTo128i(NQ);
   224         V1 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,2,3)));        
   225                 V2 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(0,0,1,3));     
   226         V3 = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(2,1,0,3)));        
   227         V1 = _mm_xor_ps(V1, vMPPP);     
   229         V11     = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,1,0,3)));   
   230                 V21 = _mm_unpackhi_ps(Q, Q);                    
   231                 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(0,2,0,3));       
   237         V11 = _mm_shuffle_ps(NQ, Q, BT_SHUFFLE(2,3,1,3));       
   239         V21 = _mm_xor_ps(V21, vMPPP);   
   240                 V31 = _mm_shuffle_ps(Q, NQ, BT_SHUFFLE(3,3,1,3));       
   241         V31 = _mm_xor_ps(V31, vMPPP);   
   242                 Y = btCastiTo128f(_mm_shuffle_epi32 (NQi, BT_SHUFFLE(3,2,0,3)));        
   243                 Z = btCastiTo128f(_mm_shuffle_epi32 (Qi, BT_SHUFFLE(1,0,1,3))); 
   245                 vs = _mm_load_ss(&s);
   253         vs = bt_splat3_ps(vs, 0);
   267                 btScalar xs = q.
x() * s,   ys = q.
y() * s,   zs = q.
z() * s;
   268                 btScalar wx = q.
w() * xs,  wy = q.
w() * ys,  wz = q.
w() * zs;
   269                 btScalar xx = q.
x() * xs,  xy = q.
x() * ys,  xz = q.
x() * zs;
   270                 btScalar yy = q.
y() * ys,  yz = q.
y() * zs,  zz = q.
z() * zs;
   272             btScalar(1.0) - (yy + zz), xy - wz, xz + wy,
   273                         xy + wz, 
btScalar(1.0) - (xx + zz), yz - wx,
   274                         xz - wy, yz + wx, 
btScalar(1.0) - (xx + yy));
   286                 setEulerZYX(roll, pitch, yaw);
   311                 setValue(cj * ch, sj * sc - cs, sj * cc + ss,
   312                         cj * sh, sj * ss + cc, sj * cs - sc, 
   313                         -sj,      cj * si,      cj * ci);
   319 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)   332 #if (defined(BT_USE_SSE_IN_API)&& defined (BT_USE_SSE)) || defined(BT_USE_NEON)   334         identityMatrix(v1000, v0100, v0010);
   342                 return identityMatrix;
   349 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)   350         __m128 v0 = m_el[0].mVec128;
   351         __m128 v1 = m_el[1].mVec128;
   352         __m128 v2 = m_el[2].mVec128;    
   353         __m128 *vm = (__m128 *)m;
   356         v2 = _mm_and_ps(v2, btvFFF0fMask);  
   358         vT = _mm_unpackhi_ps(v0, v1);   
   359         v0 = _mm_unpacklo_ps(v0, v1);   
   361         v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) );   
   362         v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) );   
   363         v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));  
   368 #elif defined(BT_USE_NEON)   370         static const uint32x2_t zMask = (
const uint32x2_t) {
static_cast<uint32_t>(-1), 0 };
   371         float32x4_t *vm = (float32x4_t *)m;
   372         float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );  
   373         float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );       
   374         float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
   375         float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
   376         float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
   377         float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q );       
   402 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)   403         btScalar trace = m_el[0].
x() + m_el[1].
y() + m_el[2].
z();
   415             temp.f[0]=m_el[2].
y() - m_el[1].
z();
   416             temp.f[1]=m_el[0].
z() - m_el[2].
x();
   417             temp.f[2]=m_el[1].
x() - m_el[0].
y();
   424             if(m_el[0].x() < m_el[1].y()) 
   426                 if( m_el[1].y() < m_el[2].z() )
   427                     { i = 2; j = 0; k = 1; }
   429                     { i = 1; j = 2; k = 0; }
   433                 if( m_el[0].x() < m_el[2].z())
   434                     { i = 2; j = 0; k = 1; }
   436                     { i = 0; j = 1; k = 2; }
   439             x = m_el[i][i] - m_el[j][j] - m_el[k][k] + 
btScalar(1.0);
   441             temp.f[3] = (m_el[k][j] - m_el[j][k]);
   442             temp.f[j] = (m_el[j][i] + m_el[i][j]);
   443             temp.f[k] = (m_el[k][i] + m_el[i][k]);
   454                 btScalar trace = m_el[0].
x() + m_el[1].
y() + m_el[2].
z();
   464                         temp[0]=((m_el[2].
y() - m_el[1].
z()) * s);
   465                         temp[1]=((m_el[0].
z() - m_el[2].
x()) * s);
   466                         temp[2]=((m_el[1].
x() - m_el[0].
y()) * s);
   470                         int i = m_el[0].
x() < m_el[1].
y() ? 
   471                                 (m_el[1].
y() < m_el[2].
z() ? 2 : 1) :
   472                                 (m_el[0].x() < m_el[2].
z() ? 2 : 0); 
   480                         temp[3] = (m_el[k][j] - m_el[j][k]) * s;
   481                         temp[j] = (m_el[j][i] + m_el[i][j]) * s;
   482                         temp[k] = (m_el[k][i] + m_el[i][k]) * s;
   484                 q.
setValue(temp[0],temp[1],temp[2],temp[3]);
   535                 if (
btFabs(m_el[2].x()) >= 1)
   546                                 euler_out.roll = euler_out.pitch + delta;
   547                                 euler_out2.roll = euler_out.pitch + delta;
   553                                 euler_out.roll = -euler_out.pitch + delta;
   554                                 euler_out2.roll = -euler_out.pitch + delta;
   559                         euler_out.pitch = - 
btAsin(m_el[2].x());
   560                         euler_out2.pitch = 
SIMD_PI - euler_out.pitch;
   562                         euler_out.roll = 
btAtan2(m_el[2].y()/
btCos(euler_out.pitch), 
   563                                 m_el[2].
z()/
btCos(euler_out.pitch));
   564                         euler_out2.roll = 
btAtan2(m_el[2].y()/
btCos(euler_out2.pitch), 
   565                                 m_el[2].
z()/
btCos(euler_out2.pitch));
   567                         euler_out.yaw = 
btAtan2(m_el[1].x()/
btCos(euler_out.pitch), 
   568                                 m_el[0].
x()/
btCos(euler_out.pitch));
   569                         euler_out2.yaw = 
btAtan2(m_el[1].x()/
btCos(euler_out2.pitch), 
   570                                 m_el[0].
x()/
btCos(euler_out2.pitch));
   573                 if (solution_number == 1)
   576                         pitch = euler_out.pitch;
   577                         roll = euler_out.roll;
   581                         yaw = euler_out2.yaw; 
   582                         pitch = euler_out2.pitch;
   583                         roll = euler_out2.roll;
   592 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)   593                 return btMatrix3x3(m_el[0] * s, m_el[1] * s, m_el[2] * s);
   596             m_el[0].x() * s.
x(), m_el[0].
y() * s.y(), m_el[0].
z() * s.z(),
   597                         m_el[1].
x() * s.x(), m_el[1].
y() * s.y(), m_el[1].
z() * s.z(),
   598                         m_el[2].
x() * s.x(), m_el[2].
y() * s.y(), m_el[2].
z() * s.z());
   639                 return m_el[0].
x() * v.
x() + m_el[1].
x() * v.
y() + m_el[2].
x() * v.
z();
   643                 return m_el[0].
y() * v.
x() + m_el[1].
y() * v.
y() + m_el[2].
y() * v.
z();
   647                 return m_el[0].
z() * v.
x() + m_el[1].
z() * v.
y() + m_el[2].
z() * v.
z();
   663                 for (
int step = maxSteps; step > 0; step--)
   698                         btScalar theta = (m_el[q][q] - m_el[p][p]) / (2 * mpq);
   704                                 t = (theta >= 0) ? 1 / (theta + 
btSqrt(1 + theta2))
   705                                         : 1 / (theta - 
btSqrt(1 + theta2));
   706                                 cos = 1 / 
btSqrt(1 + t * t);
   712                                 t = 1 / (theta * (2 + 
btScalar(0.5) / theta2));
   718                         m_el[p][q] = m_el[q][p] = 0;
   719                         m_el[p][p] -= t * mpq;
   720                         m_el[q][q] += t * mpq;
   723                         m_el[r][p] = m_el[p][r] = cos * mrp - sin * mrq;
   724                         m_el[r][q] = m_el[q][r] = cos * mrq + sin * mrp;
   727                         for (
int i = 0; i < 3; i++)
   732                                 row[p] = cos * mrp - sin * mrq;
   733                                 row[q] = cos * mrq + sin * mrp;
   750                 return m_el[r1][c1] * m_el[r2][c2] - m_el[r1][c2] * m_el[r2][c1];
   769 #if defined BT_USE_SIMD_VECTOR3 && defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE)   770     __m128 rv00, rv01, rv02;
   771     __m128 rv10, rv11, rv12;
   772     __m128 rv20, rv21, rv22;
   773     __m128 mv0, mv1, mv2;
   775     rv02 = m_el[0].mVec128;
   776     rv12 = m_el[1].mVec128;
   777     rv22 = m_el[2].mVec128;
   779     mv0 = _mm_and_ps(m[0].mVec128, btvFFF0fMask); 
   780     mv1 = _mm_and_ps(m[1].mVec128, btvFFF0fMask); 
   781     mv2 = _mm_and_ps(m[2].mVec128, btvFFF0fMask); 
   784     rv00 = bt_splat_ps(rv02, 0);
   785     rv01 = bt_splat_ps(rv02, 1);
   786     rv02 = bt_splat_ps(rv02, 2);
   788     rv00 = _mm_mul_ps(rv00, mv0);
   789     rv01 = _mm_mul_ps(rv01, mv1);
   790     rv02 = _mm_mul_ps(rv02, mv2);
   793     rv10 = bt_splat_ps(rv12, 0);
   794     rv11 = bt_splat_ps(rv12, 1);
   795     rv12 = bt_splat_ps(rv12, 2);
   797     rv10 = _mm_mul_ps(rv10, mv0);
   798     rv11 = _mm_mul_ps(rv11, mv1);
   799     rv12 = _mm_mul_ps(rv12, mv2);
   802     rv20 = bt_splat_ps(rv22, 0);
   803     rv21 = bt_splat_ps(rv22, 1);
   804     rv22 = bt_splat_ps(rv22, 2);
   806     rv20 = _mm_mul_ps(rv20, mv0);
   807     rv21 = _mm_mul_ps(rv21, mv1);
   808     rv22 = _mm_mul_ps(rv22, mv2);
   810     rv00 = _mm_add_ps(rv00, rv01);
   811     rv10 = _mm_add_ps(rv10, rv11);
   812     rv20 = _mm_add_ps(rv20, rv21);
   814     m_el[0].mVec128 = _mm_add_ps(rv00, rv02);
   815     m_el[1].mVec128 = _mm_add_ps(rv10, rv12);
   816     m_el[2].mVec128 = _mm_add_ps(rv20, rv22);
   818 #elif defined(BT_USE_NEON)   820     float32x4_t rv0, rv1, rv2;
   821     float32x4_t v0, v1, v2;
   822     float32x4_t mv0, mv1, mv2;
   824     v0 = m_el[0].mVec128;
   825     v1 = m_el[1].mVec128;
   826     v2 = m_el[2].mVec128;
   828     mv0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask); 
   829     mv1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask); 
   830     mv2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask); 
   832     rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
   833     rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
   834     rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
   836     rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
   837     rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
   838     rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
   840     rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
   841     rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
   842     rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
   844     m_el[0].mVec128 = rv0;
   845     m_el[1].mVec128 = rv1;
   846     m_el[2].mVec128 = rv2;
   859 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)   860     m_el[0].mVec128 = m_el[0].mVec128 + m.
m_el[0].mVec128;
   861     m_el[1].mVec128 = m_el[1].mVec128 + m.
m_el[1].mVec128;
   862     m_el[2].mVec128 = m_el[2].mVec128 + m.
m_el[2].mVec128;
   865                 m_el[0][0]+m.
m_el[0][0], 
   866                 m_el[0][1]+m.
m_el[0][1],
   867                 m_el[0][2]+m.
m_el[0][2],
   868                 m_el[1][0]+m.
m_el[1][0], 
   869                 m_el[1][1]+m.
m_el[1][1],
   870                 m_el[1][2]+m.
m_el[1][2],
   871                 m_el[2][0]+m.
m_el[2][0], 
   872                 m_el[2][1]+m.
m_el[2][1],
   873                 m_el[2][2]+m.
m_el[2][2]);
   881 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))   882     __m128 vk = bt_splat_ps(_mm_load_ss((
float *)&k), 0x80);
   884                 _mm_mul_ps(m[0].mVec128, vk), 
   885                 _mm_mul_ps(m[1].mVec128, vk), 
   886                 _mm_mul_ps(m[2].mVec128, vk)); 
   887 #elif defined(BT_USE_NEON)   889                 vmulq_n_f32(m[0].mVec128, k),
   890                 vmulq_n_f32(m[1].mVec128, k),
   891                 vmulq_n_f32(m[2].mVec128, k)); 
   894                 m[0].x()*k,m[0].y()*k,m[0].z()*k,
   895                 m[1].x()*k,m[1].y()*k,m[1].z()*k,
   896                 m[2].x()*k,m[2].y()*k,m[2].z()*k);
   903 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)   905         m1[0].mVec128 + m2[0].mVec128,
   906         m1[1].mVec128 + m2[1].mVec128,
   907         m1[2].mVec128 + m2[2].mVec128);
   927 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)   929         m1[0].mVec128 - m2[0].mVec128,
   930         m1[1].mVec128 - m2[1].mVec128,
   931         m1[2].mVec128 - m2[2].mVec128);
   952 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)   953     m_el[0].mVec128 = m_el[0].mVec128 - m.
m_el[0].mVec128;
   954     m_el[1].mVec128 = m_el[1].mVec128 - m.
m_el[1].mVec128;
   955     m_el[2].mVec128 = m_el[2].mVec128 - m.
m_el[2].mVec128;
   958         m_el[0][0]-m.
m_el[0][0], 
   959         m_el[0][1]-m.
m_el[0][1],
   960         m_el[0][2]-m.
m_el[0][2],
   961         m_el[1][0]-m.
m_el[1][0], 
   962         m_el[1][1]-m.
m_el[1][1],
   963         m_el[1][2]-m.
m_el[1][2],
   964         m_el[2][0]-m.
m_el[2][0], 
   965         m_el[2][1]-m.
m_el[2][1],
   966         m_el[2][2]-m.
m_el[2][2]);
   975         return btTriple((*
this)[0], (*
this)[1], (*
this)[2]);
   982 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))   984             _mm_and_ps(m_el[0].mVec128, btvAbsfMask),
   985             _mm_and_ps(m_el[1].mVec128, btvAbsfMask),
   986             _mm_and_ps(m_el[2].mVec128, btvAbsfMask));
   987 #elif defined(BT_USE_NEON)   989             (float32x4_t)vandq_s32((int32x4_t)m_el[0].mVec128, btv3AbsMask),
   990             (float32x4_t)vandq_s32((int32x4_t)m_el[1].mVec128, btv3AbsMask),
   991             (float32x4_t)vandq_s32((int32x4_t)m_el[2].mVec128, btv3AbsMask));
  1003 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))  1004     __m128 v0 = m_el[0].mVec128;
  1005     __m128 v1 = m_el[1].mVec128;
  1006     __m128 v2 = m_el[2].mVec128;    
  1009     v2 = _mm_and_ps(v2, btvFFF0fMask);  
  1011     vT = _mm_unpackhi_ps(v0, v1);       
  1012     v0 = _mm_unpacklo_ps(v0, v1);       
  1014     v1 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(2, 3, 1, 3) );       
  1015     v0 = _mm_shuffle_ps(v0, v2, BT_SHUFFLE(0, 1, 0, 3) );       
  1016     v2 = btCastdTo128f(_mm_move_sd(btCastfTo128d(v2), btCastfTo128d(vT)));      
  1020 #elif defined(BT_USE_NEON)  1022     static const uint32x2_t zMask = (
const uint32x2_t) {
static_cast<uint32_t>(-1), 0 };
  1023     float32x4x2_t top = vtrnq_f32( m_el[0].mVec128, m_el[1].mVec128 );  
  1024     float32x2x2_t bl = vtrn_f32( vget_low_f32(m_el[2].mVec128), vdup_n_f32(0.0f) );       
  1025     float32x4_t v0 = vcombine_f32( vget_low_f32(top.val[0]), bl.val[0] );
  1026     float32x4_t v1 = vcombine_f32( vget_low_f32(top.val[1]), bl.val[1] );
  1027     float32x2_t q = (float32x2_t) vand_u32( (uint32x2_t) vget_high_f32( m_el[2].mVec128), zMask );
  1028     float32x4_t v2 = vcombine_f32( vget_high_f32(top.val[0]), q );       
  1031         return btMatrix3x3( m_el[0].x(), m_el[1].x(), m_el[2].x(),
  1032                         m_el[0].y(), m_el[1].y(), m_el[2].y(),
  1033                         m_el[0].z(), m_el[1].z(), m_el[2].z());
  1040         return btMatrix3x3(cofac(1, 1, 2, 2), cofac(0, 2, 2, 1), cofac(0, 1, 1, 2),
  1041                 cofac(1, 2, 2, 0), cofac(0, 0, 2, 2), cofac(0, 2, 1, 0),
  1042                 cofac(1, 0, 2, 1), cofac(0, 1, 2, 0), cofac(0, 0, 1, 1));
  1048         btVector3 co(cofac(1, 1, 2, 2), cofac(1, 2, 2, 0), cofac(1, 0, 2, 1));
  1052         return btMatrix3x3(co.
x() * s, cofac(0, 2, 2, 1) * s, cofac(0, 1, 1, 2) * s,
  1053                 co.
y() * s, cofac(0, 0, 2, 2) * s, cofac(0, 2, 1, 0) * s,
  1054                 co.
z() * s, cofac(0, 1, 2, 0) * s, cofac(0, 0, 1, 1) * s);
  1060 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))  1063     __m128 row = m_el[0].mVec128;
  1064     __m128 m0 = _mm_and_ps( m.
getRow(0).mVec128, btvFFF0fMask );
  1065     __m128 m1 = _mm_and_ps( m.
getRow(1).mVec128, btvFFF0fMask);
  1066     __m128 m2 = _mm_and_ps( m.
getRow(2).mVec128, btvFFF0fMask );
  1067     __m128 r0 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0));
  1068     __m128 r1 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0x55));
  1069     __m128 r2 = _mm_mul_ps(m0, _mm_shuffle_ps(row, row, 0xaa));
  1070     row = m_el[1].mVec128;
  1071     r0 = _mm_add_ps( r0, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0)));
  1072     r1 = _mm_add_ps( r1, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0x55)));
  1073     r2 = _mm_add_ps( r2, _mm_mul_ps(m1, _mm_shuffle_ps(row, row, 0xaa)));
  1074     row = m_el[2].mVec128;
  1075     r0 = _mm_add_ps( r0, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0)));
  1076     r1 = _mm_add_ps( r1, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0x55)));
  1077     r2 = _mm_add_ps( r2, _mm_mul_ps(m2, _mm_shuffle_ps(row, row, 0xaa)));
  1080 #elif defined BT_USE_NEON  1082     static const uint32x4_t xyzMask = (
const uint32x4_t){ 
static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 
static_cast<uint32_t>(-1), 0 };
  1083     float32x4_t m0 = (float32x4_t) vandq_u32( (uint32x4_t) m.
getRow(0).mVec128, xyzMask );
  1084     float32x4_t m1 = (float32x4_t) vandq_u32( (uint32x4_t) m.
getRow(1).mVec128, xyzMask );
  1085     float32x4_t m2 = (float32x4_t) vandq_u32( (uint32x4_t) m.
getRow(2).mVec128, xyzMask );
  1086     float32x4_t row = m_el[0].mVec128;
  1087     float32x4_t r0 = vmulq_lane_f32( m0, vget_low_f32(row), 0);
  1088     float32x4_t r1 = vmulq_lane_f32( m0, vget_low_f32(row), 1);
  1089     float32x4_t r2 = vmulq_lane_f32( m0, vget_high_f32(row), 0);
  1090     row = m_el[1].mVec128;
  1091     r0 = vmlaq_lane_f32( r0, m1, vget_low_f32(row), 0);
  1092     r1 = vmlaq_lane_f32( r1, m1, vget_low_f32(row), 1);
  1093     r2 = vmlaq_lane_f32( r2, m1, vget_high_f32(row), 0);
  1094     row = m_el[2].mVec128;
  1095     r0 = vmlaq_lane_f32( r0, m2, vget_low_f32(row), 0);
  1096     r1 = vmlaq_lane_f32( r1, m2, vget_low_f32(row), 1);
  1097     r2 = vmlaq_lane_f32( r2, m2, vget_high_f32(row), 0);
  1101                 m_el[0].x() * m[0].x() + m_el[1].x() * m[1].x() + m_el[2].x() * m[2].x(),
  1102                 m_el[0].x() * m[0].y() + m_el[1].x() * m[1].y() + m_el[2].x() * m[2].y(),
  1103                 m_el[0].x() * m[0].z() + m_el[1].x() * m[1].z() + m_el[2].x() * m[2].z(),
  1104                 m_el[0].y() * m[0].x() + m_el[1].y() * m[1].x() + m_el[2].y() * m[2].x(),
  1105                 m_el[0].y() * m[0].y() + m_el[1].y() * m[1].y() + m_el[2].y() * m[2].y(),
  1106                 m_el[0].y() * m[0].z() + m_el[1].y() * m[1].z() + m_el[2].y() * m[2].z(),
  1107                 m_el[0].z() * m[0].x() + m_el[1].z() * m[1].x() + m_el[2].z() * m[2].x(),
  1108                 m_el[0].z() * m[0].y() + m_el[1].z() * m[1].y() + m_el[2].z() * m[2].y(),
  1109                 m_el[0].z() * m[0].z() + m_el[1].z() * m[1].z() + m_el[2].z() * m[2].z());
  1116 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))  1117     __m128 a0 = m_el[0].mVec128;
  1118     __m128 a1 = m_el[1].mVec128;
  1119     __m128 a2 = m_el[2].mVec128;
  1122     __m128 mx = mT[0].mVec128;
  1123     __m128 my = mT[1].mVec128;
  1124     __m128 mz = mT[2].mVec128;
  1126     __m128 r0 = _mm_mul_ps(mx, _mm_shuffle_ps(a0, a0, 0x00));
  1127     __m128 r1 = _mm_mul_ps(mx, _mm_shuffle_ps(a1, a1, 0x00));
  1128     __m128 r2 = _mm_mul_ps(mx, _mm_shuffle_ps(a2, a2, 0x00));
  1129     r0 = _mm_add_ps(r0, _mm_mul_ps(my, _mm_shuffle_ps(a0, a0, 0x55)));
  1130     r1 = _mm_add_ps(r1, _mm_mul_ps(my, _mm_shuffle_ps(a1, a1, 0x55)));
  1131     r2 = _mm_add_ps(r2, _mm_mul_ps(my, _mm_shuffle_ps(a2, a2, 0x55)));
  1132     r0 = _mm_add_ps(r0, _mm_mul_ps(mz, _mm_shuffle_ps(a0, a0, 0xaa)));
  1133     r1 = _mm_add_ps(r1, _mm_mul_ps(mz, _mm_shuffle_ps(a1, a1, 0xaa)));
  1134     r2 = _mm_add_ps(r2, _mm_mul_ps(mz, _mm_shuffle_ps(a2, a2, 0xaa)));
  1137 #elif defined BT_USE_NEON  1138     float32x4_t a0 = m_el[0].mVec128;
  1139     float32x4_t a1 = m_el[1].mVec128;
  1140     float32x4_t a2 = m_el[2].mVec128;
  1143     float32x4_t mx = mT[0].mVec128;
  1144     float32x4_t my = mT[1].mVec128;
  1145     float32x4_t mz = mT[2].mVec128;
  1147     float32x4_t r0 = vmulq_lane_f32( mx, vget_low_f32(a0), 0);
  1148     float32x4_t r1 = vmulq_lane_f32( mx, vget_low_f32(a1), 0);
  1149     float32x4_t r2 = vmulq_lane_f32( mx, vget_low_f32(a2), 0);
  1150     r0 = vmlaq_lane_f32( r0, my, vget_low_f32(a0), 1);
  1151     r1 = vmlaq_lane_f32( r1, my, vget_low_f32(a1), 1);
  1152     r2 = vmlaq_lane_f32( r2, my, vget_low_f32(a2), 1);
  1153     r0 = vmlaq_lane_f32( r0, mz, vget_high_f32(a0), 0);
  1154     r1 = vmlaq_lane_f32( r1, mz, vget_high_f32(a1), 0);
  1155     r2 = vmlaq_lane_f32( r2, mz, vget_high_f32(a2), 0);
  1160                 m_el[0].
dot(m[0]), m_el[0].
dot(m[1]), m_el[0].
dot(m[2]),
  1161                 m_el[1].
dot(m[0]), m_el[1].
dot(m[1]), m_el[1].
dot(m[2]),
  1162                 m_el[2].
dot(m[0]), m_el[2].
dot(m[1]), m_el[2].
dot(m[2]));
  1169 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))|| defined (BT_USE_NEON)  1170     return v.
dot3(m[0], m[1], m[2]);
  1180 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))  1182     const __m128 vv = v.mVec128;
  1184     __m128 c0 = bt_splat_ps( vv, 0);
  1185     __m128 c1 = bt_splat_ps( vv, 1);
  1186     __m128 c2 = bt_splat_ps( vv, 2);
  1188     c0 = _mm_mul_ps(c0, _mm_and_ps(m[0].mVec128, btvFFF0fMask) );
  1189     c1 = _mm_mul_ps(c1, _mm_and_ps(m[1].mVec128, btvFFF0fMask) );
  1190     c0 = _mm_add_ps(c0, c1);
  1191     c2 = _mm_mul_ps(c2, _mm_and_ps(m[2].mVec128, btvFFF0fMask) );
  1194 #elif defined(BT_USE_NEON)  1195     const float32x4_t vv = v.mVec128;
  1196     const float32x2_t vlo = vget_low_f32(vv);
  1197     const float32x2_t vhi = vget_high_f32(vv);
  1199     float32x4_t c0, c1, c2;
  1201     c0 = (float32x4_t) vandq_s32((int32x4_t)m[0].mVec128, btvFFF0Mask);
  1202     c1 = (float32x4_t) vandq_s32((int32x4_t)m[1].mVec128, btvFFF0Mask);
  1203     c2 = (float32x4_t) vandq_s32((int32x4_t)m[2].mVec128, btvFFF0Mask);
  1205     c0 = vmulq_lane_f32(c0, vlo, 0);
  1206     c1 = vmulq_lane_f32(c1, vlo, 1);
  1207     c2 = vmulq_lane_f32(c2, vhi, 0);
  1208     c0 = vaddq_f32(c0, c1);
  1209     c0 = vaddq_f32(c0, c2);
  1220 #if defined BT_USE_SIMD_VECTOR3 && (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))  1222     __m128 m10 = m1[0].mVec128;  
  1223     __m128 m11 = m1[1].mVec128;
  1224     __m128 m12 = m1[2].mVec128;
  1226     __m128 m2v = _mm_and_ps(m2[0].mVec128, btvFFF0fMask);
  1228     __m128 c0 = bt_splat_ps( m10, 0);
  1229     __m128 c1 = bt_splat_ps( m11, 0);
  1230     __m128 c2 = bt_splat_ps( m12, 0);
  1232     c0 = _mm_mul_ps(c0, m2v);
  1233     c1 = _mm_mul_ps(c1, m2v);
  1234     c2 = _mm_mul_ps(c2, m2v);
  1236     m2v = _mm_and_ps(m2[1].mVec128, btvFFF0fMask);
  1238     __m128 c0_1 = bt_splat_ps( m10, 1);
  1239     __m128 c1_1 = bt_splat_ps( m11, 1);
  1240     __m128 c2_1 = bt_splat_ps( m12, 1);
  1242     c0_1 = _mm_mul_ps(c0_1, m2v);
  1243     c1_1 = _mm_mul_ps(c1_1, m2v);
  1244     c2_1 = _mm_mul_ps(c2_1, m2v);
  1246     m2v = _mm_and_ps(m2[2].mVec128, btvFFF0fMask);
  1248     c0 = _mm_add_ps(c0, c0_1);
  1249     c1 = _mm_add_ps(c1, c1_1);
  1250     c2 = _mm_add_ps(c2, c2_1);
  1252     m10 = bt_splat_ps( m10, 2);
  1253     m11 = bt_splat_ps( m11, 2);
  1254     m12 = bt_splat_ps( m12, 2);
  1256     m10 = _mm_mul_ps(m10, m2v);
  1257     m11 = _mm_mul_ps(m11, m2v);
  1258     m12 = _mm_mul_ps(m12, m2v);
  1260     c0 = _mm_add_ps(c0, m10);
  1261     c1 = _mm_add_ps(c1, m11);
  1262     c2 = _mm_add_ps(c2, m12);
  1266 #elif defined(BT_USE_NEON)  1268     float32x4_t rv0, rv1, rv2;
  1269     float32x4_t v0, v1, v2;
  1270     float32x4_t mv0, mv1, mv2;
  1276     mv0 = (float32x4_t) vandq_s32((int32x4_t)m2[0].mVec128, btvFFF0Mask); 
  1277     mv1 = (float32x4_t) vandq_s32((int32x4_t)m2[1].mVec128, btvFFF0Mask); 
  1278     mv2 = (float32x4_t) vandq_s32((int32x4_t)m2[2].mVec128, btvFFF0Mask); 
  1280     rv0 = vmulq_lane_f32(mv0, vget_low_f32(v0), 0);
  1281     rv1 = vmulq_lane_f32(mv0, vget_low_f32(v1), 0);
  1282     rv2 = vmulq_lane_f32(mv0, vget_low_f32(v2), 0);
  1284     rv0 = vmlaq_lane_f32(rv0, mv1, vget_low_f32(v0), 1);
  1285     rv1 = vmlaq_lane_f32(rv1, mv1, vget_low_f32(v1), 1);
  1286     rv2 = vmlaq_lane_f32(rv2, mv1, vget_low_f32(v2), 1);
  1288     rv0 = vmlaq_lane_f32(rv0, mv2, vget_high_f32(v0), 0);
  1289     rv1 = vmlaq_lane_f32(rv1, mv2, vget_high_f32(v1), 0);
  1290     rv2 = vmlaq_lane_f32(rv2, mv2, vget_high_f32(v2), 0);
  1321 #if (defined (BT_USE_SSE_IN_API) && defined (BT_USE_SSE))  1325     c0 = _mm_cmpeq_ps(m1[0].mVec128, m2[0].mVec128);
  1326     c1 = _mm_cmpeq_ps(m1[1].mVec128, m2[1].mVec128);
  1327     c2 = _mm_cmpeq_ps(m1[2].mVec128, m2[2].mVec128);
  1329     c0 = _mm_and_ps(c0, c1);
  1330     c0 = _mm_and_ps(c0, c2);
  1332     return (0x7 == _mm_movemask_ps((__m128)c0));
  1335     (   m1[0][0] == m2[0][0] && m1[1][0] == m2[1][0] && m1[2][0] == m2[2][0] &&
  1336                 m1[0][1] == m2[0][1] && m1[1][1] == m2[1][1] && m1[2][1] == m2[2][1] &&
  1337                 m1[0][2] == m2[0][2] && m1[1][2] == m2[1][2] && m1[2][2] == m2[2][2] );
  1358         for (
int i=0;i<3;i++)
  1359                 m_el[i].serialize(dataOut.m_el[i]);
  1364         for (
int i=0;i<3;i++)
  1365                 m_el[i].serializeFloat(dataOut.
m_el[i]);
  1371         for (
int i=0;i<3;i++)
  1372                 m_el[i].deSerialize(dataIn.m_el[i]);
  1377         for (
int i=0;i<3;i++)
  1378                 m_el[i].deSerializeFloat(dataIn.
m_el[i]);
  1383         for (
int i=0;i<3;i++)
  1384                 m_el[i].deSerializeDouble(dataIn.
m_el[i]);
  1387 #endif //BT_MATRIX3x3_H btMatrix3x3 inverse() const 
Return the inverse of the matrix. 
void deSerializeFloat(const struct btMatrix3x3FloatData &dataIn)
btVector3DoubleData m_el[3]
btScalar tdoty(const btVector3 &v) const 
bool operator==(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
Equality operator between two matrices It will test all elements are equal. 
void serialize(struct btMatrix3x3Data &dataOut) const 
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
void setRotation(const btQuaternion &q)
Set the matrix from a quaternion. 
btScalar btSin(btScalar x)
const btScalar & z() const 
Return the z value. 
btScalar btSqrt(btScalar y)
#define SIMD_FORCE_INLINE
btMatrix3x3 transposeTimes(const btMatrix3x3 &m) const 
const btScalar & y() const 
Return the y value. 
btVector3 getColumn(int i) const 
Get a column of the matrix as a vector. 
const btVector3 & getRow(int i) const 
Get a row of the matrix as a vector. 
btMatrix3x3 operator+(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
btMatrix3x3 & operator=(const btMatrix3x3 &other)
Assignment Operator. 
btQuaternion inverse(const btQuaternion &q)
Return the inverse of a quaternion. 
const btScalar & w() const 
Return the w value. 
btVector3 m_el[3]
Data storage for the matrix, each vector is a row of the matrix. 
const btScalar & x() const 
Return the x value. 
btMatrix3x3(const btQuaternion &q)
Constructor from Quaternion. 
btVector3 btCross(const btVector3 &v1, const btVector3 &v2)
Return the cross product of two vectors. 
btScalar tdotx(const btVector3 &v) const 
btScalar tdotz(const btVector3 &v) const 
void deSerialize(const struct btMatrix3x3Data &dataIn)
void getRotation(btQuaternion &q) const 
Get the matrix represented as a quaternion. 
btMatrix3x3 absolute() const 
Return the matrix with all values non negative. 
void diagonalize(btMatrix3x3 &rot, btScalar threshold, int maxSteps)
diagonalizes this matrix by the Jacobi method. 
btMatrix3x3 scaled(const btVector3 &s) const 
Create a scaled copy of the matrix. 
void deSerializeDouble(const struct btMatrix3x3DoubleData &dataIn)
btMatrix3x3 & operator*=(const btMatrix3x3 &m)
Multiply by the target matrix on the right. 
btScalar btAtan2(btScalar x, btScalar y)
void setValue(const btScalar &_x, const btScalar &_y, const btScalar &_z)
Set x,y,z and zero w. 
const btVector3 & operator[](int i) const 
Get a const reference to a row of the matrix as a vector. 
btMatrix3x3 operator*(const btMatrix3x3 &m, const btScalar &k)
void setValue(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Set the values of the matrix explicitly (row major) 
btVector3 solve33(const btVector3 &b) const 
Solve A * x = b, where b is a column vector. 
btMatrix3x3(const btScalar &xx, const btScalar &xy, const btScalar &xz, const btScalar &yx, const btScalar &yy, const btScalar &yz, const btScalar &zx, const btScalar &zy, const btScalar &zz)
Constructor with row major formatting. 
btScalar length2() const 
Return the length squared of the quaternion. 
const btScalar & y() const 
Return the y value. 
void getOpenGLSubMatrix(btScalar *m) const 
Fill the rotational part of an OpenGL matrix and clear the shear/perspective. 
btVector3 can be used to represent 3D points and vectors. 
#define ATTRIBUTE_ALIGNED16(a)
btMatrix3x3 & operator-=(const btMatrix3x3 &m)
Substractss by the target matrix on the right. 
btMatrix3x3 adjoint() const 
Return the adjoint of the matrix. 
void serializeFloat(struct btMatrix3x3FloatData &dataOut) const 
void setEulerYPR(const btScalar &yaw, const btScalar &pitch, const btScalar &roll)
Set the matrix from euler angles using YPR around YXZ respectively. 
btMatrix3x3 & operator+=(const btMatrix3x3 &m)
Adds by the target matrix on the right. 
btMatrix3x3 operator-(const btMatrix3x3 &m1, const btMatrix3x3 &m2)
void getEulerYPR(btScalar &yaw, btScalar &pitch, btScalar &roll) const 
Get the matrix represented as euler angles around YXZ, roundtrip with setEulerYPR. 
btMatrix3x3()
No initializaion constructor. 
btMatrix3x3 transpose() const 
Return the transpose of the matrix. 
btVector3 dot3(const btVector3 &v0, const btVector3 &v1, const btVector3 &v2) const 
btVector3 & operator[](int i)
Get a mutable reference to a row of the matrix as a vector. 
const btScalar & x() const 
Return the x value. 
The btMatrix3x3 class implements a 3x3 rotation matrix, to perform linear algebra in combination with...
btScalar dot(const btQuaternion &q1, const btQuaternion &q2)
Calculate the dot product between two quaternions. 
btMatrix3x3(const btMatrix3x3 &other)
Copy constructor. 
btMatrix3x3 timesTranspose(const btMatrix3x3 &m) const 
The btQuaternion implements quaternion to perform linear algebra rotations in combination with btMatr...
void setFromOpenGLSubMatrix(const btScalar *m)
Set from the rotational part of a 4x4 OpenGL matrix. 
btScalar btAsin(btScalar x)
btScalar btDot(const btVector3 &v1, const btVector3 &v2)
Return the dot product between two vectors. 
btScalar cofac(int r1, int c1, int r2, int c2) const 
Calculate the matrix cofactor. 
btScalar btTriple(const btVector3 &v1, const btVector3 &v2, const btVector3 &v3)
void getEulerZYX(btScalar &yaw, btScalar &pitch, btScalar &roll, unsigned int solution_number=1) const 
Get the matrix represented as euler angles around ZYX. 
btScalar determinant() const 
Return the determinant of the matrix. 
void setIdentity()
Set the matrix to the identity. 
static const btMatrix3x3 & getIdentity()
float btScalar
The btScalar type abstracts floating point numbers, to easily switch between double and single floati...
btScalar btCos(btScalar x)
btVector3FloatData m_el[3]
btScalar btFabs(btScalar x)
const btScalar & z() const 
Return the z value. 
void setEulerZYX(btScalar eulerX, btScalar eulerY, btScalar eulerZ)
Set the matrix from euler angles YPR around ZYX axes.