18 # define BT_ADDITIONAL_DEBUG
104 #include <emmintrin.h>
106 #define btVecSplat(x, e) _mm_shuffle_ps(x, x, _MM_SHUFFLE(e, e, e, e))
107 static inline __m128 btSimdDot3(__m128 vec0, __m128 vec1)
109 __m128 result = _mm_mul_ps(vec0, vec1);
110 return _mm_add_ps(btVecSplat(result, 0), _mm_add_ps(btVecSplat(result, 1), btVecSplat(result, 2)));
113 #if defined(BT_ALLOW_SSE4)
117 #define USE_FMA3_INSTEAD_FMA4 1
118 #define USE_SSE4_DOT 1
120 #define SSE4_DP(a, b) _mm_dp_ps(a, b, 0x7f)
121 #define SSE4_DP_FP(a, b) _mm_cvtss_f32(_mm_dp_ps(a, b, 0x7f))
124 #define DOT_PRODUCT(a, b) SSE4_DP(a, b)
126 #define DOT_PRODUCT(a, b) btSimdDot3(a, b)
130 #if USE_FMA3_INSTEAD_FMA4
132 #define FMADD(a, b, c) _mm_fmadd_ps(a, b, c)
134 #define FMNADD(a, b, c) _mm_fnmadd_ps(a, b, c)
137 #define FMADD(a, b, c) _mm_macc_ps(a, b, c)
139 #define FMNADD(a, b, c) _mm_nmacc_ps(a, b, c)
143 #define FMADD(a, b, c) _mm_add_ps(c, _mm_mul_ps(a, b))
145 #define FMNADD(a, b, c) _mm_sub_ps(c, _mm_mul_ps(a, b))
158 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
159 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
162 resultLowerLess = _mm_cmplt_ps(
sum, lowerLimit1);
163 resultUpperLess = _mm_cmplt_ps(
sum, upperLimit1);
164 __m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp);
165 deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse));
166 c.
m_appliedImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess,
sum));
167 __m128 upperMinApplied = _mm_sub_ps(upperLimit1, cpAppliedImp);
168 deltaImpulse = _mm_or_ps(_mm_and_ps(resultUpperLess, deltaImpulse), _mm_andnot_ps(resultUpperLess, upperMinApplied));
172 __m128 impulseMagnitude = deltaImpulse;
183 #if defined(BT_ALLOW_SSE4)
190 deltaImpulse = FMNADD(deltaVel1Dotn, tmp, deltaImpulse);
191 deltaImpulse = FMNADD(deltaVel2Dotn, tmp, deltaImpulse);
193 const __m128 maskLower = _mm_cmpgt_ps(tmp, lowerLimit);
194 const __m128 maskUpper = _mm_cmpgt_ps(upperLimit, tmp);
195 deltaImpulse = _mm_blendv_ps(_mm_sub_ps(lowerLimit, c.
m_appliedImpulse), _mm_blendv_ps(_mm_sub_ps(upperLimit, c.
m_appliedImpulse), deltaImpulse, maskUpper), maskLower);
196 c.
m_appliedImpulse = _mm_blendv_ps(lowerLimit, _mm_blendv_ps(upperLimit, tmp, maskUpper), maskLower);
204 return gResolveSingleConstraintRowGeneric_sse2(bodyA, bodyB, c);
216 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
217 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
220 resultLowerLess = _mm_cmplt_ps(
sum, lowerLimit1);
221 resultUpperLess = _mm_cmplt_ps(
sum, upperLimit1);
222 __m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp);
223 deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse));
224 c.
m_appliedImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess,
sum));
227 __m128 impulseMagnitude = deltaImpulse;
244 deltaImpulse = FMNADD(deltaVel1Dotn, tmp, deltaImpulse);
245 deltaImpulse = FMNADD(deltaVel2Dotn, tmp, deltaImpulse);
247 const __m128 mask = _mm_cmpgt_ps(tmp, lowerLimit);
248 deltaImpulse = _mm_blendv_ps(_mm_sub_ps(lowerLimit, c.
m_appliedImpulse), deltaImpulse, mask);
257 return gResolveSingleConstraintRowLowerLimit_sse2(bodyA, bodyB, c);
258 #endif //BT_ALLOW_SSE4
330 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel1Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
331 deltaImpulse = _mm_sub_ps(deltaImpulse, _mm_mul_ps(deltaVel2Dotn, _mm_set1_ps(c.
m_jacDiagABInv)));
334 resultLowerLess = _mm_cmplt_ps(
sum, lowerLimit1);
335 resultUpperLess = _mm_cmplt_ps(
sum, upperLimit1);
336 __m128 lowMinApplied = _mm_sub_ps(lowerLimit1, cpAppliedImp);
337 deltaImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowMinApplied), _mm_andnot_ps(resultLowerLess, deltaImpulse));
338 c.
m_appliedPushImpulse = _mm_or_ps(_mm_and_ps(resultLowerLess, lowerLimit1), _mm_andnot_ps(resultLowerLess,
sum));
341 __m128 impulseMagnitude = deltaImpulse;
380 #endif //BT_ALLOW_SSE4
402 return gResolveSingleConstraintRowGeneric_sse2;
406 return gResolveSingleConstraintRowLowerLimit_sse2;
411 return gResolveSingleConstraintRowGeneric_sse4_1_fma3;
415 return gResolveSingleConstraintRowLowerLimit_sse4_1_fma3;
417 #endif //BT_ALLOW_SSE4
430 const unsigned long un =
static_cast<unsigned long>(n);
435 if (un <= 0x00010000UL)
438 if (un <= 0x00000100UL)
441 if (un <= 0x00000010UL)
444 if (un <= 0x00000004UL)
447 if (un <= 0x00000002UL)
456 return (
int)(r % un);
497 if (
btFabs(rel_vel) < velocityThreshold)
500 btScalar rest = restitution * -rel_vel;
512 loc_lateral *= friction_scaling;
518 void btSequentialImpulseConstraintSolver::setupFrictionConstraint(
btSolverConstraint& solverConstraint,
const btVector3& normalAxis,
int solverBodyIdA,
int solverBodyIdB,
btManifoldPoint& cp,
const btVector3& rel_pos1,
const btVector3& rel_pos2,
btCollisionObject* colObj0,
btCollisionObject* colObj1,
btScalar relaxation,
const btContactSolverInfo& infoGlobal,
btScalar desiredVelocity,
btScalar cfmSlip)
577 btScalar denom = relaxation / (denom0 + denom1);
586 rel_vel = vel1Dotn + vel2Dotn;
590 btScalar velocityError = desiredVelocity - rel_vel;
599 penetrationImpulse = positionalError * solverConstraint.
m_jacDiagABInv;
602 solverConstraint.
m_rhs = penetrationImpulse + velocityImpulse;
604 solverConstraint.
m_cfm = cfmSlip;
610 btSolverConstraint&
btSequentialImpulseConstraintSolver::addFrictionConstraint(
const btVector3& normalAxis,
int solverBodyIdA,
int solverBodyIdB,
int frictionIndex,
btManifoldPoint& cp,
const btVector3& rel_pos1,
const btVector3& rel_pos2,
btCollisionObject* colObj0,
btCollisionObject* colObj1,
btScalar relaxation,
const btContactSolverInfo& infoGlobal,
btScalar desiredVelocity,
btScalar cfmSlip)
615 colObj0, colObj1, relaxation, infoGlobal, desiredVelocity, cfmSlip);
616 return solverConstraint;
638 solverConstraint.
m_friction = combinedTorsionalFriction;
669 rel_vel = vel1Dotn + vel2Dotn;
675 solverConstraint.
m_rhs = velocityImpulse;
676 solverConstraint.
m_cfm = cfmSlip;
682 btSolverConstraint&
btSequentialImpulseConstraintSolver::addTorsionalFrictionConstraint(
const btVector3& normalAxis,
int solverBodyIdA,
int solverBodyIdB,
int frictionIndex,
btManifoldPoint& cp,
btScalar combinedTorsionalFriction,
const btVector3& rel_pos1,
const btVector3& rel_pos2,
btCollisionObject* colObj0,
btCollisionObject* colObj1,
btScalar relaxation,
btScalar desiredVelocity,
btScalar cfmSlip)
687 colObj0, colObj1, relaxation, desiredVelocity, cfmSlip);
688 return solverConstraint;
694 int solverBodyId = -1;
698 if (isRigidBodyType && !isStaticOrKinematic)
703 if (solverBodyId < 0)
711 else if (isRigidBodyType && isKinematic)
721 const int INVALID_SOLVER_BODY_ID = -1;
728 if (solverBodyId == INVALID_SOLVER_BODY_ID)
741 if (!isMultiBodyType)
757 #else // BT_THREADSAFE
759 int solverBodyIdA = -1;
791 return solverBodyIdA;
792 #endif // BT_THREADSAFE
797 int solverBodyIdA,
int solverBodyIdB,
816 relaxation = infoGlobal.
m_sor;
854 #ifdef COMPUTE_IMPULSE_DENOM
871 #endif //COMPUTE_IMPULSE_DENOM
873 btScalar denom = relaxation / (denom0 + denom1 + cfm);
944 btScalar rel_vel = vel1Dotn + vel2Dotn;
947 btScalar velocityError = restitution - rel_vel;
953 velocityError -= penetration * invTimeStep;
957 positionalError = -penetration * erp * invTimeStep;
966 solverConstraint.
m_rhs = penetrationImpulse + velocityImpulse;
972 solverConstraint.
m_rhs = velocityImpulse;
982 int solverBodyIdA,
int solverBodyIdB,
1019 int rollingFriction = 1;
1041 rel_pos2 = pos2 - colObj1->getWorldTransform().getOrigin();
1052 setupContactConstraint(solverConstraint, solverBodyIdA, solverBodyIdB, cp, infoGlobal, relaxation, rel_pos1, rel_pos2);
1061 addTorsionalFrictionConstraint(cp.
m_normalWorldOnB, solverBodyIdA, solverBodyIdB, frictionIndex, cp, cp.
m_combinedSpinningFriction, rel_pos1, rel_pos2, colObj0, colObj1, relaxation);
1071 if (axis0.
length() > 0.001)
1074 if (axis1.
length() > 0.001)
1105 addFrictionConstraint(cp.
m_lateralFrictionDir1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal);
1113 addFrictionConstraint(cp.
m_lateralFrictionDir2, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal);
1122 addFrictionConstraint(cp.
m_lateralFrictionDir1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal);
1128 addFrictionConstraint(cp.
m_lateralFrictionDir2, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal);
1139 addFrictionConstraint(cp.
m_lateralFrictionDir1, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal, cp.
m_contactMotion1, cp.
m_frictionCFM);
1142 addFrictionConstraint(cp.
m_lateralFrictionDir2, solverBodyIdA, solverBodyIdB, frictionIndex, cp, rel_pos1, rel_pos2, colObj0, colObj1, relaxation, infoGlobal, cp.
m_contactMotion2, cp.
m_frictionCFM);
1155 for (i = 0; i < numManifolds; i++)
1157 manifold = manifoldPtr[i];
1222 info2.
cfm = ¤tConstraintRow->
m_cfm;
1282 rel_vel = vel1Dotn + vel2Dotn;
1288 solverConstraint.
m_rhs = penetrationImpulse + velocityImpulse;
1297 for (
int j = 0; j < numConstraints; j++)
1304 int totalNumRows = 0;
1308 for (
int i = 0; i < numConstraints; i++)
1320 if (constraints[i]->isEnabled())
1336 for (
int i = 0; i < numConstraints; i++)
1342 btAssert(currentRow < totalNumRows);
1352 convertJoint(currentConstraintRow, constraint, info1, solverBodyIdA, solverBodyIdB, infoGlobal);
1361 for (
int i = 0; i < numBodies; i++)
1367 #endif // BT_THREADSAFE
1375 for (
int i = 0; i < numBodies; i++)
1419 #ifdef BT_ADDITIONAL_DEBUG
1421 for (
int i = 0; i < numConstraints; i++)
1429 for (
int b = 0; b < numBodies; b++)
1442 for (
int b = 0; b < numBodies; b++)
1455 for (
int i = 0; i < numManifolds; i++)
1457 if (!manifoldPtr[i]->getBody0()->isStaticOrKinematicObject())
1460 for (
int b = 0; b < numBodies; b++)
1462 if (manifoldPtr[i]->getBody0() == bodies[b])
1470 if (!manifoldPtr[i]->getBody1()->isStaticOrKinematicObject())
1473 for (
int b = 0; b < numBodies; b++)
1475 if (manifoldPtr[i]->getBody1() == bodies[b])
1484 #endif //BT_ADDITIONAL_DEBUG
1509 for (i = 0; i < numNonContactPool; i++)
1513 for (i = 0; i < numConstraintPool; i++)
1517 for (i = 0; i < numFrictionPool; i++)
1529 btScalar leastSquaresResidual = 0.f;
1539 for (
int j = 0; j < numNonContactPool; ++j)
1550 for (
int j = 0; j < numConstraintPool; ++j)
1558 for (
int j = 0; j < numFrictionPool; ++j)
1576 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1582 for (
int j = 0; j < numConstraints; j++)
1584 if (constraints[j]->isEnabled())
1600 for (
int c = 0; c < numPoolConstraints; c++)
1607 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1611 bool applyFriction =
true;
1623 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1637 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1649 for (j = 0; j < numPoolConstraints; j++)
1653 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1659 for (j = 0; j < numFrictionPoolConstraints; j++)
1670 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1676 for (
int j = 0; j < numRollingFrictionPoolConstraints; j++)
1682 btScalar rollingFrictionMagnitude = rollingFrictionConstraint.
m_friction * totalImpulse;
1683 if (rollingFrictionMagnitude > rollingFrictionConstraint.
m_friction)
1684 rollingFrictionMagnitude = rollingFrictionConstraint.
m_friction;
1686 rollingFrictionConstraint.
m_lowerLimit = -rollingFrictionMagnitude;
1687 rollingFrictionConstraint.
m_upperLimit = rollingFrictionMagnitude;
1690 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1694 return leastSquaresResidual;
1699 BT_PROFILE(
"solveGroupCacheFriendlySplitImpulseIterations");
1704 for (iteration = 0; iteration < infoGlobal.
m_numIterations; iteration++)
1706 btScalar leastSquaresResidual = 0.f;
1710 for (j = 0; j < numPoolConstraints; j++)
1715 leastSquaresResidual =
btMax(leastSquaresResidual, residual * residual);
1718 if (leastSquaresResidual <= infoGlobal.m_leastSquaresResidualThreshold || iteration >= (infoGlobal.
m_numIterations - 1))
1720 #ifdef VERBOSE_RESIDUAL_PRINTF
1721 printf(
"residual = %f at iteration #%d\n", leastSquaresResidual, iteration);
1732 BT_PROFILE(
"solveGroupCacheFriendlyIterations");
1740 for (
int iteration = 0; iteration < maxIterations; iteration++)
1747 #ifdef VERBOSE_RESIDUAL_PRINTF
1767 for (
int j = iBegin; j < iEnd; j++)
1787 for (
int j = iBegin; j < iEnd; j++)
1810 for (
int i = iBegin; i < iEnd; i++)