120 #define btLCP_FAST  // use fast btLCP object 
  124 #define BTATYPE btScalar ** 
  125 #define BTAROW(i) (m_A[i]) 
  132 #define BTNUB_OPTIMIZATIONS 
  147         btScalar Z11, m11, Z21, m21, p1, q1, p2, *ex;
 
  151         for (i = 0; i < n; i += 2)
 
  157                 ell = L + i * lskip1;
 
  160                 for (j = i - 2; j >= 0; j -= 2)
 
  174                         p2 = ell[1 + lskip1];
 
  203                 Z21 = ex[1] - Z21 - p1 * Z11;
 
  222         btScalar Z11, m11, Z12, m12, Z21, m21, Z22, m22, p1, q1, p2, q2, *ex;
 
  226         for (i = 0; i < n; i += 2)
 
  234                 ell = L + i * lskip1;
 
  237                 for (j = i - 2; j >= 0; j -= 2)
 
  258                         p2 = ell[1 + lskip1];
 
  294                 Z12 = ex[lskip1] - Z12;
 
  297                 Z21 = ex[1] - Z21 - p1 * Z11;
 
  299                 Z22 = ex[1 + lskip1] - Z22 - p1 * Z12;
 
  300                 ex[1 + lskip1] = Z22;
 
  308         btScalar sum, *ell, *dee, dd, p1, p2, q1, q2, Z11, m11, Z21, m21, Z22, m22;
 
  311         for (i = 0; i <= n - 2; i += 2)
 
  320                 ell = A + i * nskip1;
 
  322                 for (j = i - 6; j >= 0; j -= 6)
 
  338                         p2 = ell[1 + nskip1];
 
  343                         ell[1 + nskip1] = q2;
 
  351                         p2 = ell[2 + nskip1];
 
  356                         ell[2 + nskip1] = q2;
 
  364                         p2 = ell[3 + nskip1];
 
  369                         ell[3 + nskip1] = q2;
 
  377                         p2 = ell[4 + nskip1];
 
  382                         ell[4 + nskip1] = q2;
 
  390                         p2 = ell[5 + nskip1];
 
  395                         ell[5 + nskip1] = q2;
 
  427                 Z21 = ell[nskip1] - Z21;
 
  428                 Z22 = ell[1 + nskip1] - Z22;
 
  454                         ell = A + i * nskip1;
 
  456                         for (j = i - 6; j >= 0; j -= 6)
 
  536         btScalar Z11, Z21, Z31, Z41, p1, q1, p2, p3, p4, *ex;
 
  538         int lskip2, lskip3, i, j;
 
  543         for (i = 0; i <= n - 4; i += 4)
 
  551                 ell = L + i * lskip1;
 
  554                 for (j = i - 12; j >= 0; j -= 12)
 
  570                         p2 = ell[1 + lskip1];
 
  571                         p3 = ell[1 + lskip2];
 
  572                         p4 = ell[1 + lskip3];
 
  581                         p2 = ell[2 + lskip1];
 
  582                         p3 = ell[2 + lskip2];
 
  583                         p4 = ell[2 + lskip3];
 
  592                         p2 = ell[3 + lskip1];
 
  593                         p3 = ell[3 + lskip2];
 
  594                         p4 = ell[3 + lskip3];
 
  603                         p2 = ell[4 + lskip1];
 
  604                         p3 = ell[4 + lskip2];
 
  605                         p4 = ell[4 + lskip3];
 
  614                         p2 = ell[5 + lskip1];
 
  615                         p3 = ell[5 + lskip2];
 
  616                         p4 = ell[5 + lskip3];
 
  625                         p2 = ell[6 + lskip1];
 
  626                         p3 = ell[6 + lskip2];
 
  627                         p4 = ell[6 + lskip3];
 
  636                         p2 = ell[7 + lskip1];
 
  637                         p3 = ell[7 + lskip2];
 
  638                         p4 = ell[7 + lskip3];
 
  647                         p2 = ell[8 + lskip1];
 
  648                         p3 = ell[8 + lskip2];
 
  649                         p4 = ell[8 + lskip3];
 
  658                         p2 = ell[9 + lskip1];
 
  659                         p3 = ell[9 + lskip2];
 
  660                         p4 = ell[9 + lskip3];
 
  669                         p2 = ell[10 + lskip1];
 
  670                         p3 = ell[10 + lskip2];
 
  671                         p4 = ell[10 + lskip3];
 
  680                         p2 = ell[11 + lskip1];
 
  681                         p3 = ell[11 + lskip2];
 
  682                         p4 = ell[11 + lskip3];
 
  716                 Z21 = ex[1] - Z21 - p1 * Z11;
 
  719                 p2 = ell[1 + lskip2];
 
  720                 Z31 = ex[2] - Z31 - p1 * Z11 - p2 * Z21;
 
  723                 p2 = ell[1 + lskip3];
 
  724                 p3 = ell[2 + lskip3];
 
  725                 Z41 = ex[3] - Z41 - p1 * Z11 - p2 * Z21 - p3 * Z31;
 
  735                 ell = L + i * lskip1;
 
  738                 for (j = i - 12; j >= 0; j -= 12)
 
  835         btScalar Z11, m11, Z21, m21, Z31, m31, Z41, m41, p1, q1, p2, p3, p4, *ex;
 
  840         L = L + (n - 1) * (lskip1 + 1);
 
  847         for (i = 0; i <= n - 4; i += 4)
 
  858                 for (j = i - 4; j >= 0; j -= 4)
 
  953                 Z21 = ex[-1] - Z21 - p1 * Z11;
 
  956                 p2 = ell[-2 + lskip1];
 
  957                 Z31 = ex[-2] - Z31 - p1 * Z11 - p2 * Z21;
 
  960                 p2 = ell[-3 + lskip1];
 
  961                 p3 = ell[-3 + lskip2];
 
  962                 Z41 = ex[-3] - Z41 - p1 * Z11 - p2 * Z21 - p3 * Z31;
 
  975                 for (j = i - 4; j >= 0; j -= 4)
 
 1030         for (
int i = 0; i < n; i++)
 
 1038         btAssert(L && d && b && n > 0 && nskip >= n);
 
 1053                                                           int do_fast_row_swaps)
 
 1055         btAssert(A && n > 0 && i1 >= 0 && i2 >= 0 && i1 < n && i2 < n &&
 
 1056                          nskip >= n && i1 < i2);
 
 1061         for (
int i = i1 + 1; i < i2; ++i)
 
 1067         A_i1[i2] = A_i1[i1];
 
 1068         A_i1[i1] = A_i2[i1];
 
 1069         A_i2[i1] = A_i2[i2];
 
 1071         if (do_fast_row_swaps)
 
 1079                 for (
int k = 0; k <= i2; ++k)
 
 1087         for (
int j = i2 + 1; j < n; ++j)
 
 1097         for (
int k = 0; k < i1; ++k)
 
 1104         for (
int i = i1 + 1; i < i2; A_i += nskip, ++i)
 
 1112                 A_i1[i1] = A_i2[i2];
 
 1116         for (
int j = i2 + 1; j < n; A_j += nskip, ++j)
 
 1128                                                   btScalar *hi, 
int *p, 
bool *state, 
int *findex,
 
 1129                                                   int n, 
int i1, 
int i2, 
int nskip,
 
 1130                                                   int do_fast_row_swaps)
 
 1135         btAssert(n > 0 && i1 >= 0 && i2 >= 0 && i1 < n && i2 < n && nskip >= n && i1 <= i2);
 
 1136         if (i1 == i2) 
return;
 
 1165         state[i1] = state[i2];
 
 1171                 findex[i1] = findex[i2];
 
 1226                   bool *_state, 
int *_findex, 
int *p, 
int *c, 
btScalar **Arows);
 
 1243         void solve1(
btScalar *a, 
int i, 
int dir = 1, 
int only_transfer = 0);
 
 1250                          bool *_state, 
int *_findex, 
int *p, 
int *c, 
btScalar **Arows) : m_n(_n), m_nskip(_nskip), m_nub(_nub), m_nC(0), m_nN(0),
 
 1281                 for (
int k = 0; k < n; aptr += nskip, ++k) A[k] = aptr;
 
 1288                 for (
int k = 0; k < n; ++k) p[k] = k;  
 
 1323                 for (
int k = 
m_nub; k < n; ++k)
 
 1325                         if (findex && findex[k] >= 0) 
continue;
 
 1328                                 btSwapProblem(
m_A, 
m_x, 
m_b, 
m_w, lo, hi, 
m_p, 
m_state, findex, n, 
m_nub, k, 
m_nskip, 0);
 
 1338                 const int nub = 
m_nub;
 
 1342                         for (
int j = 0; j < nub; Lrow += nskip, ++j) memcpy(Lrow, 
BTAROW(j), (j + 1) * 
sizeof(
btScalar));
 
 1350                         for (
int k = 0; k < nub; ++k) C[k] = k;
 
 1358                 const int nub = 
m_nub;
 
 1361                 for (
int k = 
m_n - 1; k >= nub; k--)
 
 1365                                 btSwapProblem(
m_A, 
m_x, 
m_b, 
m_w, 
m_lo, 
m_hi, 
m_p, 
m_state, findex, 
m_n, k, 
m_n - 1 - num_at_end, 
m_nskip, 1);
 
 1393                                 const int nC = 
m_nC;
 
 1395                                 for (
int j = 0; j < nC; ++j) Ltgt[j] = ell[j];
 
 1397                         const int nC = 
m_nC;
 
 1405                 btSwapProblem(
m_A, 
m_x, 
m_b, 
m_w, 
m_lo, 
m_hi, 
m_p, 
m_state, 
m_findex, 
m_n, 
m_nC, i, 
m_nskip, 1);
 
 1407                 const int nC = 
m_nC;
 
 1422 #ifdef BTNUB_OPTIMIZATIONS 
 1424                                 const int nub = 
m_nub;
 
 1426                                 for (; j < nub; ++j) Dell[j] = aptr[j];
 
 1427                                 const int nC = 
m_nC;
 
 1428                                 for (; j < nC; ++j) Dell[j] = aptr[C[j]];
 
 1430                                 const int nC = 
m_nC;
 
 1431                                 for (
int j = 0; j < nC; ++j) Dell[j] = aptr[C[j]];
 
 1436                                 const int nC = 
m_nC;
 
 1439                                 for (
int j = 0; j < nC; ++j) Ltgt[j] = ell[j] = Dell[j] * d[j];
 
 1441                         const int nC = 
m_nC;
 
 1449                 btSwapProblem(
m_A, 
m_x, 
m_b, 
m_w, 
m_lo, 
m_hi, 
m_p, 
m_state, 
m_findex, 
m_n, 
m_nC, i, 
m_nskip, 1);
 
 1451                 const int nC = 
m_nC;
 
 1466         btAssert(A && n > 0 && nskip >= n && r >= 0 && r < n);
 
 1467         if (r >= n - 1) 
return;
 
 1471                         const size_t move_size = (n - r - 1) * 
sizeof(
btScalar);
 
 1473                         for (
int i = 0; i < r; Adst += nskip, ++i)
 
 1476                                 memmove(Adst, Asrc, move_size);
 
 1480                         const size_t cpy_size = r * 
sizeof(
btScalar);
 
 1482                         for (
int i = r; i < (n - 1); ++i)
 
 1485                                 memcpy(Adst, Asrc, cpy_size);
 
 1491                 const size_t cpy_size = (n - r - 1) * 
sizeof(
btScalar);
 
 1492                 btScalar *Adst = A + r * (nskip + 1);
 
 1493                 for (
int i = r; i < (n - 1); ++i)
 
 1495                         btScalar *Asrc = Adst + (nskip + 1);
 
 1496                         memcpy(Adst, Asrc, cpy_size);
 
 1504         btAssert(L && d && a && n > 0 && nskip >= n);
 
 1507         scratch.
resize(2 * nskip);
 
 1514         for (
int j = 1; j < n; ++j)
 
 1526                 btScalar alphanew = alpha1 + (W11 * W11) * dee;
 
 1532                 alphanew = alpha2 - (W21 * W21) * dee;
 
 1537                 btScalar k2 = W21 * gamma1 * W11 - W21;
 
 1539                 for (
int p = 1; p < n; ll += nskip, ++p)
 
 1543                         W1[p] = Wp - W11 * ell;
 
 1544                         W2[p] = k1 * Wp + k2 * ell;
 
 1549         for (
int j = 1; j < n; ll += nskip + 1, ++j)
 
 1555                 btScalar alphanew = alpha1 + (k1 * k1) * dee;
 
 1561                 alphanew = alpha2 - (k2 * k2) * dee;
 
 1569                 for (
int p = j + 1; p < n; l += nskip, ++p)
 
 1575                         Wp = W2[p] - k2 * ell;
 
 1583 #define _BTGETA(i, j) (A[i][j]) 
 1585 #define BTGETA(i, j) ((i > j) ? _BTGETA(i, j) : _BTGETA(j, i)) 
 1589         return nskip * 2 * 
sizeof(
btScalar);
 
 1595         btAssert(A && p && L && d && n1 > 0 && n2 > 0 && r >= 0 && r < n2 &&
 
 1596                          n1 >= n2 && nskip >= n1);
 
 1598         for (
int i = 0; i < n2; ++i)
 
 1610                 scratch.
resize(nskip * 2 + n2);
 
 1615                         const int p_0 = p[0];
 
 1616                         for (
int i = 0; i < n2; ++i)
 
 1618                                 a[i] = -
BTGETA(p[i], p_0);
 
 1628                                 for (
int i = 0; i < r; ++Lcurr, ++i)
 
 1631                                         t[i] = *Lcurr / d[i];
 
 1637                                 const int *pp_r = p + r, p_r = *pp_r;
 
 1638                                 const int n2_minus_r = n2 - r;
 
 1639                                 for (
int i = 0; i < n2_minus_r; Lcurr += nskip, ++i)
 
 1645                         btLDLTAddTL(L + r * nskip + r, d + r, a, n2 - r, nskip, scratch);
 
 1651         if (r < (n2 - 1)) memmove(d + r, d + r + 1, (n2 - r - 1) * 
sizeof(
btScalar));
 
 1661                 const int nC = 
m_nC;
 
 1675                                         for (k = j + 1; k < nC; ++k)
 
 1689                                 if (j < (nC - 1)) memmove(C + j, C + j + 1, (nC - j - 1) * 
sizeof(
int));
 
 1695                 btSwapProblem(
m_A, 
m_x, 
m_b, 
m_w, 
m_lo, 
m_hi, 
m_p, 
m_state, 
m_findex, 
m_n, i, nC - 1, 
m_nskip, 1);
 
 1709         const int nC = 
m_nC;
 
 1711         const int nN = 
m_nN;
 
 1712         for (
int i = 0; i < nN; ++i)
 
 1720         const int nC = 
m_nC;
 
 1725                 const int nN = 
m_nN;
 
 1726                 for (
int j = 0; j < nN; ++j) ptgt[j] += aptr[j];
 
 1730                 const int nN = 
m_nN;
 
 1731                 for (
int j = 0; j < nN; ++j) ptgt[j] -= aptr[j];
 
 1737         const int nC = 
m_nC;
 
 1738         for (
int i = 0; i < nC; ++i)
 
 1746         const int nC = 
m_nC;
 
 1747         btScalar *ptgt = p + nC, *qsrc = q + nC;
 
 1748         const int nN = 
m_nN;
 
 1749         for (
int i = 0; i < nN; ++i)
 
 1751                 ptgt[i] += s * qsrc[i];
 
 1769 #ifdef BTNUB_OPTIMIZATIONS 
 1771                         const int nub = 
m_nub;
 
 1773                         for (; j < nub; ++j) Dell[j] = aptr[j];
 
 1774                         const int nC = 
m_nC;
 
 1775                         for (; j < nC; ++j) Dell[j] = aptr[C[j]];
 
 1777                         const int nC = 
m_nC;
 
 1778                         for (
int j = 0; j < nC; ++j) Dell[j] = aptr[C[j]];
 
 1784                         const int nC = 
m_nC;
 
 1785                         for (
int j = 0; j < nC; ++j) ell[j] = Dell[j] * d[j];
 
 1792                                 const int nC = 
m_nC;
 
 1793                                 for (
int j = 0; j < nC; ++j) tmp[j] = ell[j];
 
 1800                                 const int nC = 
m_nC;
 
 1801                                 for (
int j = 0; j < nC; ++j) a[C[j]] = -tmp[j];
 
 1807                                 const int nC = 
m_nC;
 
 1808                                 for (
int j = 0; j < nC; ++j) a[C[j]] = tmp[j];
 
 1822                 for (
int j = 0; j < n; ++j) x[p[j]] = tmp[j];
 
 1829                 for (
int j = 0; j < n; ++j) w[p[j]] = tmp[j];
 
 1833 #endif  // btLCP_FAST 
 1844         btAssert(n > 0 && A && x && b && lo && hi && nub >= 0 && nub <= n);
 
 1850                 for (
int k = 0; k < n; ++k)
 
 1851                         btAssert(lo[k] <= 0 && hi[k] >= 0);
 
 1862                 memcpy(x, b, n * 
sizeof(
btScalar));
 
 1867         const int nskip = (n);
 
 1868         scratchMem.
L.
resize(n * nskip);
 
 1886         btLCP lcp(n, nskip, nub, A, x, b, w, lo, hi, &scratchMem.
L[0], &scratchMem.
d[0], &scratchMem.
Dell[0], &scratchMem.
ell[0], &scratchMem.
delta_w[0], &scratchMem.
state[0], findex, &scratchMem.
p[0], &scratchMem.
C[0], &scratchMem.
Arows[0]);
 
 1887         int adj_nub = lcp.
getNub();
 
 1898         bool hit_first_friction_index = 
false;
 
 1899         for (
int i = adj_nub; i < n; ++i)
 
 1915                 if (!hit_first_friction_index && findex && findex[i] >= 0)
 
 1918                         for (
int j = 0; j < n; ++j) scratchMem.
delta_w[scratchMem.
p[j]] = x[j];
 
 1921                         for (
int k = i; k < n; ++k)
 
 1931                                         hi[k] = 
btFabs(hi[k] * wfk);
 
 1935                         hit_first_friction_index = 
true;
 
 1953                 if (lo[i] == 0 && w[i] >= 0)
 
 1956                         scratchMem.
state[i] = 
false;
 
 1958                 else if (hi[i] == 0 && w[i] <= 0)
 
 1961                         scratchMem.
state[i] = 
true;
 
 2015                                                 btScalar s2 = (hi[i] - x[i]) * dirf;  
 
 2027                                                 btScalar s2 = (lo[i] - x[i]) * dirf;  
 
 2037                                         const int numN = lcp.
numN();
 
 2038                                         for (
int k = 0; k < numN; ++k)
 
 2040                                                 const int indexN_k = lcp.
indexN(k);
 
 2041                                                 if (!scratchMem.
state[indexN_k] ? scratchMem.
delta_w[indexN_k] < 0 : scratchMem.
delta_w[indexN_k] > 0)
 
 2044                                                         if (lo[indexN_k] == 0 && hi[indexN_k] == 0) 
continue;
 
 2057                                         const int numC = lcp.
numC();
 
 2058                                         for (
int k = adj_nub; k < numC; ++k)
 
 2060                                                 const int indexC_k = lcp.
indexC(k);
 
 2063                                                         btScalar s2 = (lo[indexC_k] - x[indexC_k]) / scratchMem.
delta_x[indexC_k];
 
 2073                                                         btScalar s2 = (hi[indexC_k] - x[indexC_k]) / scratchMem.
delta_x[indexC_k];
 
 2109                                 w[i] += s * scratchMem.
delta_w[i];
 
 2121                                                 scratchMem.
state[i] = 
false;
 
 2126                                                 scratchMem.
state[i] = 
true;
 
 2135                                                 scratchMem.
state[si] = 
false;
 
 2140                                                 scratchMem.
state[si] = 
true;
 
 2145                                 if (cmd <= 3) 
break;