16 #if defined(_WIN32) || defined(__i386__) 
   17 #define BT_USE_SSE_IN_API 
   30 #if defined(__CELLOS_LV2__) && defined(__SPU__) 
   31 #include <spu_intrinsics.h> 
   32 static inline vec_float4 vec_dot3(vec_float4 vec0, vec_float4 vec1)
 
   35         result = spu_mul(vec0, vec1);
 
   36         result = spu_madd(spu_rlqwbyte(vec0, 4), spu_rlqwbyte(vec1, 4), result);
 
   37         return spu_madd(spu_rlqwbyte(vec0, 8), spu_rlqwbyte(vec1, 8), result);
 
   72         btVector3 vec = localDirOrg * localScaling;
 
   74 #if defined(__CELLOS_LV2__) && defined(__SPU__) 
   78         vec_float4 v_distMax = {-FLT_MAX, 0, 0, 0};
 
   79         vec_int4 v_idxMax = {-999, 0, 0, 0};
 
   81         int numverts = numPoints;
 
   83         for (; v < (int)numverts - 4; v += 4)
 
   85                 vec_float4 p0 = vec_dot3(points[v].get128(), localDir.get128());
 
   86                 vec_float4 p1 = vec_dot3(points[v + 1].get128(), localDir.get128());
 
   87                 vec_float4 p2 = vec_dot3(points[v + 2].get128(), localDir.get128());
 
   88                 vec_float4 p3 = vec_dot3(points[v + 3].get128(), localDir.get128());
 
   89                 const vec_int4 i0 = {v, 0, 0, 0};
 
   90                 const vec_int4 i1 = {v + 1, 0, 0, 0};
 
   91                 const vec_int4 i2 = {v + 2, 0, 0, 0};
 
   92                 const vec_int4 i3 = {v + 3, 0, 0, 0};
 
   93                 vec_uint4 retGt01 = spu_cmpgt(p0, p1);
 
   94                 vec_float4 pmax01 = spu_sel(p1, p0, retGt01);
 
   95                 vec_int4 imax01 = spu_sel(i1, i0, retGt01);
 
   96                 vec_uint4 retGt23 = spu_cmpgt(p2, p3);
 
   97                 vec_float4 pmax23 = spu_sel(p3, p2, retGt23);
 
   98                 vec_int4 imax23 = spu_sel(i3, i2, retGt23);
 
   99                 vec_uint4 retGt0123 = spu_cmpgt(pmax01, pmax23);
 
  100                 vec_float4 pmax0123 = spu_sel(pmax23, pmax01, retGt0123);
 
  101                 vec_int4 imax0123 = spu_sel(imax23, imax01, retGt0123);
 
  102                 vec_uint4 retGtMax = spu_cmpgt(v_distMax, pmax0123);
 
  103                 v_distMax = spu_sel(pmax0123, v_distMax, retGtMax);
 
  104                 v_idxMax = spu_sel(imax0123, v_idxMax, retGtMax);
 
  106         for (; v < (int)numverts; v++)
 
  108                 vec_float4 p = vec_dot3(points[v].get128(), localDir.get128());
 
  109                 const vec_int4 i = {v, 0, 0, 0};
 
  110                 vec_uint4 retGtMax = spu_cmpgt(v_distMax, p);
 
  111                 v_distMax = spu_sel(p, v_distMax, retGtMax);
 
  112                 v_idxMax = spu_sel(i, v_idxMax, retGtMax);
 
  114         int ptIndex = spu_extract(v_idxMax, 0);
 
  115         const btVector3& supVec = points[ptIndex] * localScaling;
 
  120         long ptIndex = vec.
maxDot(points, numPoints, maxDot);
 
  126         btVector3 supVec = points[ptIndex] * localScaling;
 
  144 #if defined(__APPLE__) && (defined(BT_USE_SSE) || defined(BT_USE_NEON)) 
  145 #if defined(BT_USE_SSE) 
  146                         return btVector3(_mm_xor_ps(_mm_and_ps(localDir.mVec128, (__m128){-0.0f, -0.0f, -0.0f, -0.0f}), halfExtents.mVec128));
 
  147 #elif defined(BT_USE_NEON) 
  148                         return btVector3((float32x4_t)(((uint32x4_t)localDir.mVec128 & (uint32x4_t){0x80000000, 0x80000000, 0x80000000, 0x80000000}) ^ (uint32x4_t)halfExtents.mVec128));
 
  150 #error unknown vector arch 
  154                                                          btFsels(localDir.
y(), halfExtents.
y(), -halfExtents.
y()),
 
  155                                                          btFsels(localDir.
z(), halfExtents.
z(), -halfExtents.
z()));
 
  163                         btVector3 dots = dir.
dot3(vertices[0], vertices[1], vertices[2]);
 
  174                         int cylinderUpAxis = cylShape->
getUpAxis();
 
  175                         int XX(1), YY(0), ZZ(2);
 
  177                         switch (cylinderUpAxis)
 
  206                         btScalar halfHeight = halfExtents[cylinderUpAxis];
 
  216                                 tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight;
 
  223                                 tmp[YY] = v[YY] < 0.0 ? -halfHeight : halfHeight;
 
  234                         int capsuleUpAxis = capsuleShape->getUpAxis();
 
  255                                 pos[capsuleUpAxis] = halfHeight;
 
  258                                 newDot = vec.
dot(vtx);
 
  268                                 pos[capsuleUpAxis] = -halfHeight;
 
  271                                 newDot = vec.
dot(vtx);
 
  384                         btVector3 extent(margin, margin, margin);
 
  385                         aabbMin = center - extent;
 
  386                         aabbMax = center + extent;
 
  396                         halfExtents += 
btVector3(margin, margin, margin);
 
  399                         btVector3 extent = halfExtents.
dot3(abs_b[0], abs_b[1], abs_b[2]);
 
  401                         aabbMin = center - extent;
 
  402                         aabbMax = center + extent;
 
  409                         for (
int i = 0; i < 3; i++)
 
  417                                 aabbMax[i] = tmp[i] + margin;
 
  420                                 aabbMin[i] = tmp[i] - margin;
 
  428                         int m_upAxis = capsuleShape->
getUpAxis();
 
  432                         btVector3 extent = halfExtents.
dot3(abs_b[0], abs_b[1], abs_b[2]);
 
  433                         aabbMin = center - extent;
 
  434                         aabbMax = center + extent;
 
  447                         this->
getAabb(t, aabbMin, aabbMax);