19 #if BT_USE_OPENMP && BT_THREADSAFE 
   23 #endif  // #if BT_USE_OPENMP && BT_THREADSAFE 
   25 #if BT_USE_PPL && BT_THREADSAFE 
   32 #endif  // #if BT_USE_PPL && BT_THREADSAFE 
   34 #if BT_USE_TBB && BT_THREADSAFE 
   37 #define __TBB_NO_IMPLICIT_LINKAGE 1 
   39 #include <tbb/task_scheduler_init.h> 
   40 #include <tbb/parallel_for.h> 
   41 #include <tbb/blocked_range.h> 
   43 #endif  // #if BT_USE_TBB && BT_THREADSAFE 
   53 #if __cplusplus >= 201103L 
   57 #define USE_CPP11_ATOMICS 1 
   59 #elif defined(_MSC_VER) 
   62 #define USE_MSVC_INTRINSICS 1 
   64 #elif defined(__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 7)) 
   68 #define USE_GCC_BUILTIN_ATOMICS 1 
   70 #elif defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ >= 1) 
   73 #define USE_GCC_BUILTIN_ATOMICS_OLD 1 
   82 #define THREAD_LOCAL_STATIC thread_local static 
   86         std::atomic<int>* aDest = 
reinterpret_cast<std::atomic<int>*
>(&
mLock);
 
   88         return std::atomic_compare_exchange_weak_explicit(aDest, &expected, 
int(1), std::memory_order_acq_rel, std::memory_order_acquire);
 
  102         std::atomic<int>* aDest = 
reinterpret_cast<std::atomic<int>*
>(&
mLock);
 
  103         std::atomic_store_explicit(aDest, 
int(0), std::memory_order_release);
 
  106 #elif USE_MSVC_INTRINSICS 
  108 #define WIN32_LEAN_AND_MEAN 
  113 #define THREAD_LOCAL_STATIC __declspec(thread) static 
  117         volatile long* aDest = reinterpret_cast<long*>(&
mLock);
 
  118         return (0 == _InterlockedCompareExchange(aDest, 1, 0));
 
  132         volatile long* aDest = reinterpret_cast<long*>(&
mLock);
 
  133         _InterlockedExchange(aDest, 0);
 
  136 #elif USE_GCC_BUILTIN_ATOMICS 
  138 #define THREAD_LOCAL_STATIC static __thread 
  144         const int memOrderSuccess = __ATOMIC_ACQ_REL;
 
  145         const int memOrderFail = __ATOMIC_ACQUIRE;
 
  146         return __atomic_compare_exchange_n(&
mLock, &expected, 
int(1), weak, memOrderSuccess, memOrderFail);
 
  160         __atomic_store_n(&
mLock, 
int(0), __ATOMIC_RELEASE);
 
  163 #elif USE_GCC_BUILTIN_ATOMICS_OLD 
  165 #define THREAD_LOCAL_STATIC static __thread 
  169         return __sync_bool_compare_and_swap(&
mLock, 
int(0), 
int(1));
 
  184         __sync_fetch_and_and(&
mLock, 
int(0));
 
  187 #else  //#elif USE_MSVC_INTRINSICS 
  189 #error "no threading primitives defined -- unknown platform" 
  191 #endif  //#else //#elif USE_MSVC_INTRINSICS 
  193 #else  //#if BT_THREADSAFE 
  198         btAssert(!
"unimplemented btSpinMutex::lock() called");
 
  203         btAssert(!
"unimplemented btSpinMutex::unlock() called");
 
  208         btAssert(!
"unimplemented btSpinMutex::tryLock() called");
 
  212 #define THREAD_LOCAL_STATIC static 
  214 #endif  // #else //#if BT_THREADSAFE 
  234                         btAssert(!
"thread counter exceeded");
 
  274 #define BT_DETECT_BAD_THREAD_INDEX 0 
  276 #if BT_DETECT_BAD_THREAD_INDEX 
  278 typedef DWORD ThreadId_t;
 
  279 const static ThreadId_t kInvalidThreadId = 0;
 
  282 static ThreadId_t getDebugThreadId()
 
  284         return GetCurrentThreadId();
 
  287 #endif  // #if BT_DETECT_BAD_THREAD_INDEX 
  292         const unsigned int kNullIndex = ~0
U;
 
  294         if (sThreadIndex == kNullIndex)
 
  299 #if BT_DETECT_BAD_THREAD_INDEX 
  302                 ThreadId_t tid = getDebugThreadId();
 
  304                 if (gDebugThreadIds[sThreadIndex] == kInvalidThreadId)
 
  307                         gDebugThreadIds[sThreadIndex] = tid;
 
  311                         if (gDebugThreadIds[sThreadIndex] != tid)
 
  315                                 btAssert(!
"there are 2 or more threads with the same thread-index!");
 
  320 #endif  // #if BT_DETECT_BAD_THREAD_INDEX 
  391                 btAssert(!
"btSetTaskScheduler must be called from the main thread!");
 
  416 #if BT_DETECT_BAD_THREAD_INDEX 
  422                         gDebugThreadIds[i] = kInvalidThreadId;
 
  425 #endif  // #if BT_DETECT_BAD_THREAD_INDEX 
  430 #else  // #if BT_THREADSAFE 
  433         btAssert(!
"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE");
 
  436 #endif  // #if BT_THREADSAFE 
  443 #if BT_DETECT_BAD_THREAD_INDEX 
  449                         gDebugThreadIds[i] = kInvalidThreadId;
 
  452 #endif  // #if BT_DETECT_BAD_THREAD_INDEX 
  457 #else  // #if BT_THREADSAFE 
  460         btAssert(!
"called btParallelFor in non-threadsafe build. enable BT_THREADSAFE");
 
  461         return body.
sumLoop(iBegin, iEnd);
 
  463 #endif  //#else // #if BT_THREADSAFE 
  480                 body.forLoop(iBegin, iEnd);
 
  485                 return body.sumLoop(iBegin, iEnd);
 
  489 #if BT_USE_OPENMP && BT_THREADSAFE 
  504                 return omp_get_max_threads();
 
  516                 omp_set_num_threads(1);  
 
  517                 omp_set_num_threads(m_numThreads);
 
  518                 m_savedThreadCounter = 0;
 
  528 #pragma omp parallel for schedule(static, 1) 
  529                 for (
int i = iBegin; i < iEnd; i += grainSize)
 
  532                         body.forLoop(i, (std::min)(i + grainSize, iEnd));
 
  541 #pragma omp parallel for schedule(static, 1) reduction(+ \ 
  543                 for (
int i = iBegin; i < iEnd; i += grainSize)
 
  546                         sum += body.sumLoop(i, (std::min)(i + grainSize, iEnd));
 
  552 #endif  // #if BT_USE_OPENMP && BT_THREADSAFE 
  554 #if BT_USE_TBB && BT_THREADSAFE 
  561         tbb::task_scheduler_init* m_tbbSchedulerInit;
 
  567                 m_tbbSchedulerInit = NULL;
 
  569         ~btTaskSchedulerTBB()
 
  571                 if (m_tbbSchedulerInit)
 
  573                         delete m_tbbSchedulerInit;
 
  574                         m_tbbSchedulerInit = NULL;
 
  580                 return tbb::task_scheduler_init::default_num_threads();
 
  589                 if (m_tbbSchedulerInit)
 
  592                         delete m_tbbSchedulerInit;
 
  593                         m_tbbSchedulerInit = NULL;
 
  595                 m_tbbSchedulerInit = 
new tbb::task_scheduler_init(m_numThreads);
 
  596                 m_savedThreadCounter = 0;
 
  602         struct ForBodyAdapter
 
  607                 void operator()(
const tbb::blocked_range<int>& range)
 const 
  610                         mBody->
forLoop(range.begin(), range.end());
 
  616                 ForBodyAdapter tbbBody(&body);
 
  618                 tbb::parallel_for(tbb::blocked_range<int>(iBegin, iEnd, grainSize),
 
  620                                                   tbb::simple_partitioner());
 
  623         struct SumBodyAdapter
 
  629                 SumBodyAdapter(
const SumBodyAdapter& src, 
tbb::split) : mBody(src.mBody), mSum(
btScalar(0)) {}
 
  630                 void join(
const SumBodyAdapter& src) { mSum += src.mSum; }
 
  631                 void operator()(
const tbb::blocked_range<int>& range)
 
  634                         mSum += mBody->
sumLoop(range.begin(), range.end());
 
  640                 SumBodyAdapter tbbBody(&body);
 
  642                 tbb::parallel_deterministic_reduce(tbb::blocked_range<int>(iBegin, iEnd, grainSize), tbbBody);
 
  647 #endif  // #if BT_USE_TBB && BT_THREADSAFE 
  649 #if BT_USE_PPL && BT_THREADSAFE 
  656         concurrency::combinable<btScalar> m_sum;  
 
  664                 return concurrency::GetProcessorCount();
 
  674                 m_numThreads = (std::max)(1, (std::min)(maxThreadCount, numThreads));
 
  675                 using namespace concurrency;
 
  676                 if (CurrentScheduler::Id() != -1)
 
  678                         CurrentScheduler::Detach();
 
  680                 SchedulerPolicy policy;
 
  684                         policy.SetConcurrencyLimits(1, 1);
 
  685                         CurrentScheduler::Create(policy);
 
  686                         CurrentScheduler::Detach();
 
  688                 policy.SetConcurrencyLimits(m_numThreads, m_numThreads);
 
  689                 CurrentScheduler::Create(policy);
 
  690                 m_savedThreadCounter = 0;
 
  696         struct ForBodyAdapter
 
  702                 ForBodyAdapter(
const btIParallelForBody* body, 
int grainSize, 
int end) : mBody(body), mGrainSize(grainSize), mIndexEnd(end) {}
 
  703                 void operator()(
int i)
 const 
  706                         mBody->
forLoop(i, (std::min)(i + mGrainSize, mIndexEnd));
 
  713                 ForBodyAdapter pplBody(&body, grainSize, iEnd);
 
  716                 concurrency::parallel_for(iBegin,
 
  722         struct SumBodyAdapter
 
  725                 concurrency::combinable<btScalar>* mSum;
 
  729                 SumBodyAdapter(
const btIParallelSumBody* body, concurrency::combinable<btScalar>* 
sum, 
int grainSize, 
int end) : mBody(body), mSum(
sum), mGrainSize(grainSize), mIndexEnd(end) {}
 
  730                 void operator()(
int i)
 const 
  733                         mSum->local() += mBody->
sumLoop(i, (std::min)(i + mGrainSize, mIndexEnd));
 
  741                 SumBodyAdapter pplBody(&body, &m_sum, grainSize, iEnd);
 
  744                 concurrency::parallel_for(iBegin,
 
  749                 return m_sum.combine(sumFunc);
 
  752 #endif  // #if BT_USE_PPL && BT_THREADSAFE 
  758         return &sTaskScheduler;
 
  764 #if BT_USE_OPENMP && BT_THREADSAFE 
  765         static btTaskSchedulerOpenMP sTaskScheduler;
 
  766         return &sTaskScheduler;
 
  775 #if BT_USE_TBB && BT_THREADSAFE 
  776         static btTaskSchedulerTBB sTaskScheduler;
 
  777         return &sTaskScheduler;
 
  786 #if BT_USE_PPL && BT_THREADSAFE 
  787         static btTaskSchedulerPPL sTaskScheduler;
 
  788         return &sTaskScheduler;