102 #include <emmintrin.h> 
  104 #define btVecSplat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e, e, e, e)) 
  105 static inline __m128 btSimdDot3(__m128 vec0, __m128 vec1)
 
  107         __m128 result = _mm_mul_ps(vec0, vec1);
 
  108         return _mm_add_ps(btVecSplat(result, 0), _mm_add_ps(btVecSplat(result, 1), btVecSplat(result, 2)));
 
  111 #if defined(BT_ALLOW_SSE4) 
  115 #define USE_FMA3_INSTEAD_FMA4 1 
  116 #define USE_SSE4_DOT 1 
  118 #define SSE4_DP(a, b) _mm_dp_ps(a, b, 0x7f) 
  119 #define SSE4_DP_FP(a, b) _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7f)) 
  122 #define DOT_PRODUCT(a, b) SSE4_DP(a, b) 
  124 #define DOT_PRODUCT(a, b) btSimdDot3(a, b) 
  128 #if USE_FMA3_INSTEAD_FMA4 
  130 #define FMADD(a, b, c) _mm_fmadd_ps(a, b, c) 
  132 #define FMNADD(a, b, c) _mm_fnmadd_ps(a, b, c) 
  135 #define FMADD(a, b, c) _mm_macc_ps(a, b, c) 
  137 #define FMNADD(a, b, c) _mm_nmacc_ps(a, b, c) 
  141 #define FMADD(a, b, c) _mm_add_ps(c, _mm_mul_ps(a, b)) 
  143 #define FMNADD(a, b, c) _mm_sub_ps(c, _mm_mul_ps(a, b)) 
  156         deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
 
  157         deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
 
  160         resultLowerLess = _mm_cmplt_ps(
sum, lowerLimit1);
 
  161         resultUpperLess = _mm_cmplt_ps(
sum, upperLimit1);
 
  162         __m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp);
 
  163         deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse));
 
  164         c.
m_appliedImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, 
sum));
 
  165         __m128 upperMinApplied = _mm_sub_ps(upperLimit1, cpAppliedImp);
 
  166         deltaImpulse = _mm_or_ps(_mm_and_ps(resultUpperLess, deltaImpulse), _mm_andnot_ps(resultUpperLess, upperMinApplied));
 
  170         __m128 impulseMagnitude = deltaImpulse;
 
  181 #if defined(BT_ALLOW_SSE4) 
  188         deltaImpulse = FMNADD(deltaVel1Dotn, tmp, deltaImpulse);
 
  189         deltaImpulse = FMNADD(deltaVel2Dotn, tmp, deltaImpulse);
 
  191         const __m128 maskLower = _mm_cmpgt_ps(tmp, lowerLimit);
 
  192         const __m128 maskUpper = _mm_cmpgt_ps(upperLimit, tmp);
 
  193         deltaImpulse = _mm_blendv_ps(_mm_sub_ps(lowerLimit, c.
m_appliedImpulse), _mm_blendv_ps(_mm_sub_ps(upperLimit, c.
m_appliedImpulse), deltaImpulse, maskUpper), maskLower);
 
  194         c.
m_appliedImpulse = _mm_blendv_ps(lowerLimit, _mm_blendv_ps(upperLimit, tmp, maskUpper), maskLower);
 
  202         return gResolveSingleConstraintRowGeneric_sse2(bodyA, bodyB, c);
 
  214         deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
 
  215         deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
 
  218         resultLowerLess = _mm_cmplt_ps(
sum, lowerLimit1);
 
  219         resultUpperLess = _mm_cmplt_ps(
sum, upperLimit1);
 
  220         __m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp);
 
  221         deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse));
 
  222         c.
m_appliedImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, 
sum));
 
  225         __m128 impulseMagnitude = deltaImpulse;
 
  242         deltaImpulse = FMNADD(deltaVel1Dotn, tmp, deltaImpulse);
 
  243         deltaImpulse = FMNADD(deltaVel2Dotn, tmp, deltaImpulse);
 
  245         const __m128 mask = _mm_cmpgt_ps(tmp, lowerLimit);
 
  246         deltaImpulse = _mm_blendv_ps(_mm_sub_ps(lowerLimit, c.
m_appliedImpulse), deltaImpulse, mask);
 
  255         return gResolveSingleConstraintRowLowerLimit_sse2(bodyA, bodyB, c);
 
  256 #endif  //BT_ALLOW_SSE4 
  328         deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
 
  329         deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
 
  332         resultLowerLess = _mm_cmplt_ps(
sum, lowerLimit1);
 
  333         resultUpperLess = _mm_cmplt_ps(
sum, upperLimit1);
 
  334         __m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp);
 
  335         deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse));
 
  336         c.
m_appliedPushImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess, 
sum));
 
  339         __m128 impulseMagnitude = deltaImpulse;
 
  378 #endif  //BT_ALLOW_SSE4 
  400         return gResolveSingleConstraintRowGeneric_sse2;
 
  404         return gResolveSingleConstraintRowLowerLimit_sse2;
 
  409         return gResolveSingleConstraintRowGeneric_sse4_1_fma3;
 
  413         return gResolveSingleConstraintRowLowerLimit_sse4_1_fma3;
 
  415 #endif  //BT_ALLOW_SSE4 
  428         const unsigned long un = static_cast<unsigned long>(n);
 
  433         if (un <= 0x00010000UL)
 
  436                 if (un <= 0x00000100UL)
 
  439                         if (un <= 0x00000010UL)
 
  442                                 if (un <= 0x00000004UL)
 
  445                                         if (un <= 0x00000002UL)
 
  454         return (
int)(r % un);
 
  495         if (
btFabs(rel_vel) < velocityThreshold)
 
  498         btScalar rest = restitution * -rel_vel;
 
  510                 loc_lateral *= friction_scaling;
 
  516 void btSequentialImpulseConstraintSolver::setupFrictionConstraint(
btSolverConstraint& solverConstraint, 
const btVector3& normalAxis, 
int solverBodyIdA, 
int solverBodyIdB, 
btManifoldPoint& cp, 
const btVector3& rel_pos1, 
const btVector3& rel_pos2, 
btCollisionObject* colObj0, 
btCollisionObject* colObj1, 
btScalar relaxation, 
const btContactSolverInfo& infoGlobal, 
btScalar desiredVelocity, 
btScalar cfmSlip)
 
  575                 btScalar denom = relaxation / (denom0 + denom1);
 
  584                 rel_vel = vel1Dotn + vel2Dotn;
 
  588                 btScalar velocityError = desiredVelocity - rel_vel;
 
  597                         penetrationImpulse = positionalError * solverConstraint.
m_jacDiagABInv;
 
  600                 solverConstraint.
m_rhs = penetrationImpulse + velocityImpulse;
 
  602                 solverConstraint.
m_cfm = cfmSlip;
 
  608 btSolverConstraint& 
btSequentialImpulseConstraintSolver::addFrictionConstraint(
const btVector3& normalAxis, 
int solverBodyIdA, 
int solverBodyIdB, 
int frictionIndex, 
btManifoldPoint& cp, 
const btVector3& rel_pos1, 
const btVector3& rel_pos2, 
btCollisionObject* colObj0, 
btCollisionObject* colObj1, 
btScalar relaxation, 
const btContactSolverInfo& infoGlobal, 
btScalar desiredVelocity, 
btScalar cfmSlip)
 
  613                                                         colObj0, colObj1, relaxation, infoGlobal, desiredVelocity, cfmSlip);
 
  614         return solverConstraint;
 
  636         solverConstraint.
m_friction = combinedTorsionalFriction;
 
  667                 rel_vel = vel1Dotn + vel2Dotn;
 
  673                 solverConstraint.
m_rhs = velocityImpulse;
 
  674                 solverConstraint.
m_cfm = cfmSlip;
 
  680 btSolverConstraint& 
btSequentialImpulseConstraintSolver::addTorsionalFrictionConstraint(
const btVector3& normalAxis, 
int solverBodyIdA, 
int solverBodyIdB, 
int frictionIndex, 
btManifoldPoint& cp, 
btScalar combinedTorsionalFriction, 
const btVector3& rel_pos1, 
const btVector3& rel_pos2, 
btCollisionObject* colObj0, 
btCollisionObject* colObj1, 
btScalar relaxation, 
btScalar desiredVelocity, 
btScalar cfmSlip)
 
  685                                                                          colObj0, colObj1, relaxation, desiredVelocity, cfmSlip);
 
  686         return solverConstraint;
 
  692         int solverBodyId = -1;
 
  699                 if (solverBodyId < 0)
 
  717                 const int INVALID_SOLVER_BODY_ID = -1;
 
  724                 if (solverBodyId == INVALID_SOLVER_BODY_ID)
 
  737                 if (!isMultiBodyType)
 
  753 #else   // BT_THREADSAFE 
  755         int solverBodyIdA = -1;
 
  787         return solverBodyIdA;
 
  788 #endif  // BT_THREADSAFE 
  793                                                                                                                                  int solverBodyIdA, 
int solverBodyIdB,
 
  812         relaxation = infoGlobal.
m_sor;
 
  850 #ifdef COMPUTE_IMPULSE_DENOM 
  867 #endif  //COMPUTE_IMPULSE_DENOM 
  869                 btScalar denom = relaxation / (denom0 + denom1 + cfm);
 
  940                 btScalar rel_vel = vel1Dotn + vel2Dotn;
 
  943                 btScalar velocityError = restitution - rel_vel;  
 
  949                         velocityError -= penetration * invTimeStep;
 
  953                         positionalError = -penetration * erp * invTimeStep;
 
  962                         solverConstraint.
m_rhs = penetrationImpulse + velocityImpulse;  
 
  968                         solverConstraint.
m_rhs = velocityImpulse;
 
  978                                                                                                                                            int solverBodyIdA, 
int solverBodyIdB,
 
 1041         int rollingFriction = 1;
 
 1063                         rel_pos2 = pos2 - colObj1->getWorldTransform().getOrigin();
 
 1074                         setupContactConstraint(solverConstraint, solverBodyIdA, solverBodyIdB, cp, infoGlobal, relaxation, rel_pos1, rel_pos2);
 
 1083                                         addTorsionalFrictionConstraint(cp.
m_normalWorldOnB, solverBodyIdA, solverBodyIdB, frictionIndex, cp, cp.
m_combinedSpinningFriction, rel_pos1, rel_pos2, colObj0, colObj1, relaxation);
 
 1093                                         if (axis0.
length() > 0.001)
 
 1096                                         if (axis1.
length() > 0.001)
 
 1127                                         addFrictionConstraint(cp.
m_lateralFrictionDir1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal);
 
 1135                                                 addFrictionConstraint(cp.
m_lateralFrictionDir2, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal);
 
 1144                                         addFrictionConstraint(cp.
m_lateralFrictionDir1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal);
 
 1150                                                 addFrictionConstraint(cp.
m_lateralFrictionDir2, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal);
 
 1161                                 addFrictionConstraint(cp.
m_lateralFrictionDir1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal, cp.
m_contactMotion1, cp.
m_frictionCFM);
 
 1164                                         addFrictionConstraint(cp.
m_lateralFrictionDir2, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal, cp.
m_contactMotion2, cp.
m_frictionCFM);
 
 1177         for (i = 0; i < numManifolds; i++)
 
 1179                 manifold = manifoldPtr[i];
 
 1233         info2.erp = infoGlobal.
m_erp;
 
 1241         info2.m_constraintError = ¤tConstraintRow->
m_rhs;
 
 1244         info2.cfm = ¤tConstraintRow->
m_cfm;
 
 1245         info2.m_lowerLimit = ¤tConstraintRow->
m_lowerLimit;
 
 1246         info2.m_upperLimit = ¤tConstraintRow->
m_upperLimit;
 
 1304                         rel_vel = vel1Dotn + vel2Dotn;
 
 1307                         btScalar velocityError = restitution - rel_vel * info2.m_damping;
 
 1310                         solverConstraint.
m_rhs = penetrationImpulse + velocityImpulse;
 
 1319         for (
int j = 0; j < numConstraints; j++)
 
 1326         int totalNumRows = 0;
 
 1330         for (
int i = 0; i < numConstraints; i++)
 
 1342                 if (constraints[i]->isEnabled())
 
 1358         for (
int i = 0; i < numConstraints; i++)
 
 1364                         btAssert(currentRow < totalNumRows);
 
 1374                         convertJoint(currentConstraintRow, constraint, info1, solverBodyIdA, solverBodyIdB, infoGlobal);
 
 1383         for (
int i = 0; i < numBodies; i++)
 
 1389 #endif  // BT_THREADSAFE 
 1397         for (
int i = 0; i < numBodies; i++)
 
 1441 #ifdef BT_ADDITIONAL_DEBUG 
 1443         for (
int i = 0; i < numConstraints; i++)
 
 1451                                 for (
int b = 0; b < numBodies; b++)
 
 1464                                 for (
int b = 0; b < numBodies; b++)
 
 1477         for (
int i = 0; i < numManifolds; i++)
 
 1479                 if (!manifoldPtr[i]->getBody0()->isStaticOrKinematicObject())
 
 1482                         for (
int b = 0; b < numBodies; b++)
 
 1484                                 if (manifoldPtr[i]->getBody0() == bodies[b])
 
 1492                 if (!manifoldPtr[i]->getBody1()->isStaticOrKinematicObject())
 
 1495                         for (
int b = 0; b < numBodies; b++)
 
 1497                                 if (manifoldPtr[i]->getBody1() == bodies[b])
 
 1506 #endif  //BT_ADDITIONAL_DEBUG 
 1531                 for (i = 0; i < numNonContactPool; i++)
 
 1535                 for (i = 0; i < numConstraintPool; i++)
 
 1539                 for (i = 0; i < numFrictionPool; i++)
 
 1551         btScalar leastSquaresResidual = 0.f;
 
 1561                         for (
int j = 0; j < numNonContactPool; ++j)
 
 1572                                 for (
int j = 0; j < numConstraintPool; ++j)
 
 1580                                 for (
int j = 0; j < numFrictionPool; ++j)
 
 1598                         leastSquaresResidual = 
btMax(leastSquaresResidual, residual * residual);
 
 1604                 for (
int j = 0; j < numConstraints; j++)
 
 1606                         if (constraints[j]->isEnabled())
 
 1622                         for (
int c = 0; c < numPoolConstraints; c++)
 
 1629                                         leastSquaresResidual = 
btMax(leastSquaresResidual, residual * residual);
 
 1633                                 bool applyFriction = 
true;
 
 1645                                                         leastSquaresResidual = 
btMax(leastSquaresResidual, residual * residual);
 
 1659                                                         leastSquaresResidual = 
btMax(leastSquaresResidual, residual * residual);
 
 1671                         for (j = 0; j < numPoolConstraints; j++)
 
 1675                                 leastSquaresResidual = 
btMax(leastSquaresResidual, residual * residual);
 
 1681                         for (j = 0; j < numFrictionPoolConstraints; j++)
 
 1692                                         leastSquaresResidual = 
btMax(leastSquaresResidual, residual * residual);
 
 1698                 for (
int j = 0; j < numRollingFrictionPoolConstraints; j++)
 
 1704                                 btScalar rollingFrictionMagnitude = rollingFrictionConstraint.
m_friction * totalImpulse;
 
 1705                                 if (rollingFrictionMagnitude > rollingFrictionConstraint.
m_friction)
 
 1706                                         rollingFrictionMagnitude = rollingFrictionConstraint.
m_friction;
 
 1708                                 rollingFrictionConstraint.
m_lowerLimit = -rollingFrictionMagnitude;
 
 1709                                 rollingFrictionConstraint.
m_upperLimit = rollingFrictionMagnitude;
 
 1712                                 leastSquaresResidual = 
btMax(leastSquaresResidual, residual * residual);
 
 1716         return leastSquaresResidual;
 
 1721         BT_PROFILE(
"solveGroupCacheFriendlySplitImpulseIterations");
 
 1726                         for (iteration = 0; iteration < infoGlobal.
m_numIterations; iteration++)
 
 1728                                 btScalar leastSquaresResidual = 0.f;
 
 1732                                         for (j = 0; j < numPoolConstraints; j++)
 
 1737                                                 leastSquaresResidual = 
btMax(leastSquaresResidual, residual * residual);
 
 1740                                 if (leastSquaresResidual <= infoGlobal.m_leastSquaresResidualThreshold || iteration >= (infoGlobal.
m_numIterations - 1))
 
 1742 #ifdef VERBOSE_RESIDUAL_PRINTF 
 1743                                         printf(
"residual = %f at iteration #%d\n", leastSquaresResidual, iteration);
 
 1754         BT_PROFILE(
"solveGroupCacheFriendlyIterations");
 
 1762                 for (
int iteration = 0; iteration < 
maxIterations; iteration++)
 
 1769 #ifdef VERBOSE_RESIDUAL_PRINTF 
 1781         for (
int j = iBegin; j < iEnd; j++)
 
 1801         for (
int j = iBegin; j < iEnd; j++)
 
 1824         for (
int i = iBegin; i < iEnd; i++)