23 #ifdef BT_USE_DOUBLE_PRECISION 
   24 #define btVector3Data btVector3DoubleData 
   25 #define btVector3DataName "btVector3DoubleData" 
   27 #define btVector3Data btVector3FloatData 
   28 #define btVector3DataName "btVector3FloatData" 
   29 #endif  //BT_USE_DOUBLE_PRECISION 
   31 #if defined BT_USE_SSE 
   36 #pragma warning(disable : 4556)  // value of intrinsic immediate argument '4294967239' is out of range '0 - 255' 
   39 #define BT_SHUFFLE(x, y, z, w) ((w) << 6 | (z) << 4 | (y) << 2 | (x)) 
   41 #define bt_pshufd_ps(_a, _mask) _mm_shuffle_ps((_a), (_a), (_mask)) 
   42 #define bt_splat3_ps(_a, _i) bt_pshufd_ps((_a), BT_SHUFFLE(_i, _i, _i, 3)) 
   43 #define bt_splat_ps(_a, _i) bt_pshufd_ps((_a), BT_SHUFFLE(_i, _i, _i, _i)) 
   45 #define btv3AbsiMask (_mm_set_epi32(0x00000000, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF)) 
   46 #define btvAbsMask (_mm_set_epi32(0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF)) 
   47 #define btvFFF0Mask (_mm_set_epi32(0x00000000, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF)) 
   48 #define btv3AbsfMask btCastiTo128f(btv3AbsiMask) 
   49 #define btvFFF0fMask btCastiTo128f(btvFFF0Mask) 
   50 #define btvxyzMaskf btvFFF0fMask 
   51 #define btvAbsfMask btCastiTo128f(btvAbsMask) 
   54 #define btvMzeroMask (_mm_set_ps(-0.0f, -0.0f, -0.0f, -0.0f)) 
   55 #define v1110 (_mm_set_ps(0.0f, 1.0f, 1.0f, 1.0f)) 
   56 #define vHalf (_mm_set_ps(0.5f, 0.5f, 0.5f, 0.5f)) 
   57 #define v1_5 (_mm_set_ps(1.5f, 1.5f, 1.5f, 1.5f)) 
   68 const float32x4_t 
ATTRIBUTE_ALIGNED16(btvMzeroMask) = (float32x4_t){-0.0f, -0.0f, -0.0f, -0.0f};
 
   69 const int32x4_t 
ATTRIBUTE_ALIGNED16(btvFFF0Mask) = (int32x4_t){static_cast<int32_t>(0xFFFFFFFF),
 
   70                                                                                                                            static_cast<int32_t>(0xFFFFFFFF), static_cast<int32_t>(0xFFFFFFFF), 0x0};
 
   71 const int32x4_t 
ATTRIBUTE_ALIGNED16(btvAbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF};
 
   72 const int32x4_t 
ATTRIBUTE_ALIGNED16(btv3AbsMask) = (int32x4_t){0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x0};
 
   86 #if defined(__SPU__) && defined(__CELLOS_LV2__) 
   92                 return *((
const vec_float4*)&m_floats[0]);
 
   96 #else                                            //__CELLOS_LV2__ __SPU__ 
   97 #if defined(BT_USE_SSE) || defined(BT_USE_NEON)  // _WIN32 || ARM 
  113 #endif  //__CELLOS_LV2__ __SPU__ 
  134 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON) 
  144                 mVec128 = rhs.mVec128;
 
  155 #endif  // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON) 
  161 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  162                 mVec128 = _mm_add_ps(mVec128, v.mVec128);
 
  163 #elif defined(BT_USE_NEON) 
  164                 mVec128 = vaddq_f32(mVec128, v.mVec128);
 
  177 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  178                 mVec128 = _mm_sub_ps(mVec128, v.mVec128);
 
  179 #elif defined(BT_USE_NEON) 
  180                 mVec128 = vsubq_f32(mVec128, v.mVec128);
 
  193 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  194                 __m128 vs = _mm_load_ss(&s);  
 
  195                 vs = bt_pshufd_ps(vs, 0x80);  
 
  196                 mVec128 = _mm_mul_ps(mVec128, vs);
 
  197 #elif defined(BT_USE_NEON) 
  198                 mVec128 = vmulq_n_f32(mVec128, s);
 
  213 #if 0  //defined(BT_USE_SSE_IN_API) 
  215                 __m128 vs = _mm_load_ss(&s);
 
  216                 vs = _mm_div_ss(v1110, vs);
 
  217                 vs = bt_pshufd_ps(vs, 0x00);    
 
  219                 mVec128 = _mm_mul_ps(mVec128, vs);
 
  231 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  232                 __m128 vd = _mm_mul_ps(mVec128, v.mVec128);
 
  233                 __m128 z = _mm_movehl_ps(vd, vd);
 
  234                 __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
 
  235                 vd = _mm_add_ss(vd, y);
 
  236                 vd = _mm_add_ss(vd, z);
 
  237                 return _mm_cvtss_f32(vd);
 
  238 #elif defined(BT_USE_NEON) 
  239                 float32x4_t vd = vmulq_f32(mVec128, v.mVec128);
 
  240                 float32x2_t x = vpadd_f32(vget_low_f32(vd), vget_low_f32(vd));
 
  241                 x = vadd_f32(x, vget_high_f32(vd));
 
  242                 return vget_lane_f32(x, 0);
 
  244                 return m_floats[0] * v.
m_floats[0] +
 
  307 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  309                 __m128 vd = _mm_mul_ps(mVec128, mVec128);
 
  310                 __m128 z = _mm_movehl_ps(vd, vd);
 
  311                 __m128 y = _mm_shuffle_ps(vd, vd, 0x55);
 
  312                 vd = _mm_add_ss(vd, y);
 
  313                 vd = _mm_add_ss(vd, z);
 
  316         vd = _mm_sqrt_ss(vd);
 
  317                 vd = _mm_div_ss(v1110, vd);
 
  318                 vd = bt_splat_ps(vd, 0x80);
 
  319                 mVec128 = _mm_mul_ps(mVec128, vd);
 
  323                 y = _mm_rsqrt_ss(vd);  
 
  327                 vd = _mm_mul_ss(vd, vHalf);  
 
  329                 vd = _mm_mul_ss(vd, y);  
 
  330                 vd = _mm_mul_ss(vd, y);  
 
  331                 z = _mm_sub_ss(z, vd);   
 
  333                 y = _mm_mul_ss(y, z);  
 
  335                 y = bt_splat_ps(y, 0x80);
 
  336                 mVec128 = _mm_mul_ps(mVec128, y);
 
  366 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  367                 return btVector3(_mm_and_ps(mVec128, btv3AbsfMask));
 
  368 #elif defined(BT_USE_NEON) 
  382 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  385                 T = bt_pshufd_ps(mVec128, BT_SHUFFLE(1, 2, 0, 3));    
 
  386                 V = bt_pshufd_ps(v.mVec128, BT_SHUFFLE(1, 2, 0, 3));  
 
  388                 V = _mm_mul_ps(V, mVec128);
 
  389                 T = _mm_mul_ps(T, v.mVec128);
 
  390                 V = _mm_sub_ps(V, T);
 
  392                 V = bt_pshufd_ps(V, BT_SHUFFLE(1, 2, 0, 3));
 
  394 #elif defined(BT_USE_NEON) 
  397                 float32x2_t Tlow = vget_low_f32(mVec128);
 
  398                 float32x2_t Vlow = vget_low_f32(v.mVec128);
 
  399                 T = vcombine_f32(vext_f32(Tlow, vget_high_f32(mVec128), 1), Tlow);
 
  400                 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v.mVec128), 1), Vlow);
 
  402                 V = vmulq_f32(V, mVec128);
 
  403                 T = vmulq_f32(T, v.mVec128);
 
  405                 Vlow = vget_low_f32(V);
 
  407                 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
 
  408                 V = (float32x4_t)vandq_s32((int32x4_t)V, btvFFF0Mask);
 
  421 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  423                 __m128 T = _mm_shuffle_ps(v1.mVec128, v1.mVec128, BT_SHUFFLE(1, 2, 0, 3));  
 
  424                 __m128 V = _mm_shuffle_ps(v2.mVec128, v2.mVec128, BT_SHUFFLE(1, 2, 0, 3));  
 
  426                 V = _mm_mul_ps(V, v1.mVec128);
 
  427                 T = _mm_mul_ps(T, v2.mVec128);
 
  428                 V = _mm_sub_ps(V, T);
 
  430                 V = _mm_shuffle_ps(V, V, BT_SHUFFLE(1, 2, 0, 3));
 
  433                 V = _mm_mul_ps(V, mVec128);
 
  434                 __m128 z = _mm_movehl_ps(V, V);
 
  435                 __m128 y = _mm_shuffle_ps(V, V, 0x55);
 
  436                 V = _mm_add_ss(V, y);
 
  437                 V = _mm_add_ss(V, z);
 
  438                 return _mm_cvtss_f32(V);
 
  440 #elif defined(BT_USE_NEON) 
  444                 float32x2_t Tlow = vget_low_f32(v1.mVec128);
 
  445                 float32x2_t Vlow = vget_low_f32(v2.mVec128);
 
  446                 T = vcombine_f32(vext_f32(Tlow, vget_high_f32(v1.mVec128), 1), Tlow);
 
  447                 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(v2.mVec128), 1), Vlow);
 
  449                 V = vmulq_f32(V, v1.mVec128);
 
  450                 T = vmulq_f32(T, v2.mVec128);
 
  452                 Vlow = vget_low_f32(V);
 
  454                 V = vcombine_f32(vext_f32(Vlow, vget_high_f32(V), 1), Vlow);
 
  457                 V = vmulq_f32(mVec128, V);
 
  458                 float32x2_t x = vpadd_f32(vget_low_f32(V), vget_low_f32(V));
 
  459                 x = vadd_f32(x, vget_high_f32(V));
 
  460                 return vget_lane_f32(x, 0);
 
  472                 return m_floats[0] < m_floats[1] ? (m_floats[0] < m_floats[2] ? 0 : 2) : (m_floats[1] < m_floats[2] ? 1 : 2);
 
  479                 return m_floats[0] < m_floats[1] ? (m_floats[1] < m_floats[2] ? 2 : 1) : (m_floats[0] < m_floats[2] ? 2 : 0);
 
  484                 return absolute().minAxis();
 
  489                 return absolute().maxAxis();
 
  494 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  495                 __m128 vrt = _mm_load_ss(&rt);  
 
  497                 __m128 vs = _mm_load_ss(&s);  
 
  498                 vs = bt_pshufd_ps(vs, 0x80);  
 
  499                 __m128 r0 = _mm_mul_ps(v0.mVec128, vs);
 
  500                 vrt = bt_pshufd_ps(vrt, 0x80);  
 
  501                 __m128 r1 = _mm_mul_ps(v1.mVec128, vrt);
 
  502                 __m128 tmp3 = _mm_add_ps(r0, r1);
 
  504 #elif defined(BT_USE_NEON) 
  505                 float32x4_t vl = vsubq_f32(v1.mVec128, v0.mVec128);
 
  506                 vl = vmulq_n_f32(vl, rt);
 
  507                 mVec128 = vaddq_f32(vl, v0.mVec128);
 
  523 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  524                 __m128 vt = _mm_load_ss(&t);  
 
  525                 vt = bt_pshufd_ps(vt, 0x80);  
 
  526                 __m128 vl = _mm_sub_ps(v.mVec128, mVec128);
 
  527                 vl = _mm_mul_ps(vl, vt);
 
  528                 vl = _mm_add_ps(vl, mVec128);
 
  531 #elif defined(BT_USE_NEON) 
  532                 float32x4_t vl = vsubq_f32(v.mVec128, mVec128);
 
  533                 vl = vmulq_n_f32(vl, t);
 
  534                 vl = vaddq_f32(vl, mVec128);
 
  539                                                  m_floats[1] + (v.
m_floats[1] - m_floats[1]) * t,
 
  540                                                  m_floats[2] + (v.
m_floats[2] - m_floats[2]) * t);
 
  548 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  549                 mVec128 = _mm_mul_ps(mVec128, v.mVec128);
 
  550 #elif defined(BT_USE_NEON) 
  551                 mVec128 = vmulq_f32(mVec128, v.mVec128);
 
  591 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  592                 return (0xf == _mm_movemask_ps((__m128)_mm_cmpeq_ps(mVec128, other.mVec128)));
 
  594                 return ((m_floats[3] == other.
m_floats[3]) &&
 
  595                                 (m_floats[2] == other.
m_floats[2]) &&
 
  596                                 (m_floats[1] == other.
m_floats[1]) &&
 
  597                                 (m_floats[0] == other.
m_floats[0]));
 
  603                 return !(*
this == other);
 
  611 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  612                 mVec128 = _mm_max_ps(mVec128, other.mVec128);
 
  613 #elif defined(BT_USE_NEON) 
  614                 mVec128 = vmaxq_f32(mVec128, other.mVec128);
 
  628 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  629                 mVec128 = _mm_min_ps(mVec128, other.mVec128);
 
  630 #elif defined(BT_USE_NEON) 
  631                 mVec128 = vminq_f32(mVec128, other.mVec128);
 
  650 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  652                 __m128 V = _mm_and_ps(mVec128, btvFFF0fMask);
 
  653                 __m128 V0 = _mm_xor_ps(btvMzeroMask, V);
 
  654                 __m128 V2 = _mm_movelh_ps(V0, V);
 
  656                 __m128 V1 = _mm_shuffle_ps(V, V0, 0xCE);
 
  658                 V0 = _mm_shuffle_ps(V0, V, 0xDB);
 
  659                 V2 = _mm_shuffle_ps(V2, V, 0xF9);
 
  673 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  674                 mVec128 = (__m128)_mm_xor_ps(mVec128, mVec128);
 
  675 #elif defined(BT_USE_NEON) 
  676                 int32x4_t vi = vdupq_n_s32(0);
 
  677                 mVec128 = vreinterpretq_f32_s32(vi);
 
  722 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  724                 __m128 a0 = _mm_mul_ps(v0.mVec128, this->mVec128);
 
  725                 __m128 a1 = _mm_mul_ps(v1.mVec128, this->mVec128);
 
  726                 __m128 a2 = _mm_mul_ps(v2.mVec128, this->mVec128);
 
  727                 __m128 b0 = _mm_unpacklo_ps(a0, a1);
 
  728                 __m128 b1 = _mm_unpackhi_ps(a0, a1);
 
  729                 __m128 b2 = _mm_unpacklo_ps(a2, _mm_setzero_ps());
 
  730                 __m128 r = _mm_movelh_ps(b0, b2);
 
  731                 r = _mm_add_ps(r, _mm_movehl_ps(b2, b0));
 
  732                 a2 = _mm_and_ps(a2, btvxyzMaskf);
 
  733                 r = _mm_add_ps(r, btCastdTo128f(_mm_move_sd(btCastfTo128d(a2), btCastfTo128d(b1))));
 
  736 #elif defined(BT_USE_NEON) 
  737                 static const uint32x4_t xyzMask = (
const uint32x4_t){static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), static_cast<uint32_t>(-1), 0};
 
  738                 float32x4_t a0 = vmulq_f32(v0.mVec128, this->mVec128);
 
  739                 float32x4_t a1 = vmulq_f32(v1.mVec128, this->mVec128);
 
  740                 float32x4_t a2 = vmulq_f32(v2.mVec128, this->mVec128);
 
  741                 float32x2x2_t zLo = vtrn_f32(vget_high_f32(a0), vget_high_f32(a1));
 
  742                 a2 = (float32x4_t)vandq_u32((uint32x4_t)a2, xyzMask);
 
  743                 float32x2_t b0 = vadd_f32(vpadd_f32(vget_low_f32(a0), vget_low_f32(a1)), zLo.val[0]);
 
  744                 float32x2_t b1 = vpadd_f32(vpadd_f32(vget_low_f32(a2), vget_high_f32(a2)), vdup_n_f32(0.0f));
 
  756 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  757         return btVector3(_mm_add_ps(v1.mVec128, v2.mVec128));
 
  758 #elif defined(BT_USE_NEON) 
  759         return btVector3(vaddq_f32(v1.mVec128, v2.mVec128));
 
  772 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  773         return btVector3(_mm_mul_ps(v1.mVec128, v2.mVec128));
 
  774 #elif defined(BT_USE_NEON) 
  775         return btVector3(vmulq_f32(v1.mVec128, v2.mVec128));
 
  788 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) 
  791         __m128 r = _mm_sub_ps(v1.mVec128, v2.mVec128);
 
  792         return btVector3(_mm_and_ps(r, btvFFF0fMask));
 
  793 #elif defined(BT_USE_NEON) 
  794         float32x4_t r = vsubq_f32(v1.mVec128, v2.mVec128);
 
  795         return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask));
 
  808 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) 
  809         __m128 r = _mm_xor_ps(v.mVec128, btvMzeroMask);
 
  810         return btVector3(_mm_and_ps(r, btvFFF0fMask));
 
  811 #elif defined(BT_USE_NEON) 
  812         return btVector3((btSimdFloat4)veorq_s32((int32x4_t)v.mVec128, (int32x4_t)btvMzeroMask));
 
  822 #if defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  823         __m128 vs = _mm_load_ss(&s);  
 
  824         vs = bt_pshufd_ps(vs, 0x80);  
 
  825         return btVector3(_mm_mul_ps(v.mVec128, vs));
 
  826 #elif defined(BT_USE_NEON) 
  827         float32x4_t r = vmulq_n_f32(v.mVec128, s);
 
  828         return btVector3((float32x4_t)vandq_s32((int32x4_t)r, btvFFF0Mask));
 
  846 #if 0  //defined(BT_USE_SSE_IN_API) 
  848         __m128 vs = _mm_load_ss(&s);
 
  849     vs = _mm_div_ss(v1110, vs);
 
  850         vs = bt_pshufd_ps(vs, 0x00);    
 
  852         return btVector3(_mm_mul_ps(v.mVec128, vs));
 
  862 #if defined BT_USE_SIMD_VECTOR3 && (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) 
  863         __m128 vec = _mm_div_ps(v1.mVec128, v2.mVec128);
 
  864         vec = _mm_and_ps(vec, btvFFF0fMask);
 
  866 #elif defined(BT_USE_NEON) 
  867         float32x4_t x, y, v, m;
 
  873         m = vrecpsq_f32(y, v);  
 
  875         m = vrecpsq_f32(y, v);  
 
  936         return v1.
lerp(v2, t);
 
  941         return (v - *
this).length2();
 
  946         return (v - *
this).length();
 
  960 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
  962         __m128 O = _mm_mul_ps(wAxis.mVec128, mVec128);
 
  964         __m128 C = wAxis.
cross(mVec128).mVec128;
 
  965         O = _mm_and_ps(O, btvFFF0fMask);
 
  968         __m128 vsin = _mm_load_ss(&ssin);  
 
  969         __m128 vcos = _mm_load_ss(&scos);  
 
  971         __m128 Y = bt_pshufd_ps(O, 0xC9);  
 
  972         __m128 Z = bt_pshufd_ps(O, 0xD2);  
 
  973         O = _mm_add_ps(O, Y);
 
  974         vsin = bt_pshufd_ps(vsin, 0x80);  
 
  975         O = _mm_add_ps(O, Z);
 
  976         vcos = bt_pshufd_ps(vcos, 0x80);  
 
  979         O = O * wAxis.mVec128;
 
  980         __m128 X = mVec128 - O;
 
  992         _y = wAxis.
cross(*
this);
 
  994         return (o + _x * 
btCos(_angle) + _y * 
btSin(_angle));
 
 1000 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined(BT_USE_NEON) 
 1001 #if defined _WIN32 || defined(BT_USE_SSE) 
 1002         const long scalar_cutoff = 10;
 
 1003         long _maxdot_large(
const float* array, 
const float* vec, 
unsigned long array_count, 
float* dotOut);
 
 1004 #elif defined BT_USE_NEON 
 1005         const long scalar_cutoff = 4;
 
 1006         extern long (*_maxdot_large)(
const float* array, 
const float* vec, 
unsigned long array_count, 
float* dotOut);
 
 1008         if (array_count < scalar_cutoff)
 
 1014                 for (i = 0; i < array_count; i++)
 
 1028 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined(BT_USE_NEON) 
 1029         return _maxdot_large((
float*)array, (
float*)&
m_floats[0], array_count, &dotOut);
 
 1035 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined(BT_USE_NEON) 
 1036 #if defined BT_USE_SSE 
 1037         const long scalar_cutoff = 10;
 
 1038         long _mindot_large(
const float* array, 
const float* vec, 
unsigned long array_count, 
float* dotOut);
 
 1039 #elif defined BT_USE_NEON 
 1040         const long scalar_cutoff = 4;
 
 1041         extern long (*_mindot_large)(
const float* array, 
const float* vec, 
unsigned long array_count, 
float* dotOut);
 
 1043 #error unhandled arch! 
 1046         if (array_count < scalar_cutoff)
 
 1053                 for (i = 0; i < array_count; i++)
 
 1068 #if (defined BT_USE_SSE && defined BT_USE_SIMD_VECTOR3 && defined BT_USE_SSE_IN_API) || defined(BT_USE_NEON) 
 1069         return _mindot_large((
float*)array, (
float*)&
m_floats[0], array_count, &dotOut);
 
 1070 #endif  //BT_USE_SIMD_VECTOR3 
 1084 #if (defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE)) || defined(BT_USE_NEON) 
 1092                 mVec128 = rhs.mVec128;
 
 1098                 mVec128 = v.mVec128;
 
 1101 #endif  // #if defined (BT_USE_SSE_IN_API) || defined (BT_USE_NEON) 
 1105 #if defined BT_USE_SIMD_VECTOR3 && defined(BT_USE_SSE_IN_API) && defined(BT_USE_SSE) 
 1106                 return btVector4(_mm_and_ps(mVec128, btvAbsfMask));
 
 1107 #elif defined(BT_USE_NEON) 
 1210 #ifdef BT_USE_DOUBLE_PRECISION 
 1211         unsigned char* dest = (
unsigned char*)&destVal;
 
 1212         const unsigned char* src = (
const unsigned char*)&sourceVal;
 
 1222         unsigned char* dest = (
unsigned char*)&destVal;
 
 1223         const unsigned char* src = (
const unsigned char*)&sourceVal;
 
 1228 #endif  //BT_USE_DOUBLE_PRECISION 
 1233         for (
int i = 0; i < 4; i++)
 
 1243         for (
int i = 0; i < 4; i++)
 
 1247         vector = swappedVec;
 
 1256                 btScalar a = n[1] * n[1] + n[2] * n[2];
 
 1263                 q[1] = -n[0] * p[2];
 
 1269                 btScalar a = n[0] * n[0] + n[1] * n[1];
 
 1275                 q[0] = -n[2] * p[1];
 
 1294         for (
int i = 0; i < 4; i++)
 
 1300         for (
int i = 0; i < 4; i++)
 
 1307         for (
int i = 0; i < 4; i++)
 
 1313         for (
int i = 0; i < 4; i++)
 
 1320         for (
int i = 0; i < 4; i++)
 
 1326         for (
int i = 0; i < 4; i++)
 
 1332         for (
int i = 0; i < 4; i++)
 
 1336 #endif  //BT_VECTOR3_H