PBRT
/home/felix/UBC/projects/AdaptiveLightfieldSampling/pbrt_v2/src/core/parallel.h
00001 
00002 /*
00003     pbrt source code Copyright(c) 1998-2012 Matt Pharr and Greg Humphreys.
00004 
00005     This file is part of pbrt.
00006 
00007     Redistribution and use in source and binary forms, with or without
00008     modification, are permitted provided that the following conditions are
00009     met:
00010 
00011     - Redistributions of source code must retain the above copyright
00012       notice, this list of conditions and the following disclaimer.
00013 
00014     - Redistributions in binary form must reproduce the above copyright
00015       notice, this list of conditions and the following disclaimer in the
00016       documentation and/or other materials provided with the distribution.
00017 
00018     THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
00019     IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
00020     TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
00021     PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
00022     HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
00023     SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
00024     LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
00025     DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
00026     THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
00027     (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
00028     OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
00029 
00030  */
00031 
00032 #if defined(_MSC_VER)
00033 #pragma once
00034 #endif
00035 
00036 #ifndef PBRT_CORE_PARALLEL_H
00037 #define PBRT_CORE_PARALLEL_H
00038 
00039 // core/parallel.h*
00040 #include "pbrt.h"
00041 #if defined(PBRT_IS_APPLE_PPC)
00042 #include <libkern/OSAtomic.h>
00043 #endif // PBRT_IS_APPLE_PPC
00044 
00045 #if defined(PBRT_IS_WINDOWS)
00046 #include <windows.h>
00047 #else
00048 #include <pthread.h>
00049 #include <semaphore.h>
00050 #endif
00051 #include "core/probes.h"
00052 
00053 // Parallel Declarations
00054 #if defined(PBRT_IS_WINDOWS)
00055         #if _MSC_VER >= 1300
00056                 extern "C" void _ReadWriteBarrier();
00057                 #pragma intrinsic(_ReadWriteBarrier)
00058         #else
00059                 #define _ReadWriteBarrier()
00060         #endif
00061 
00062         typedef volatile LONG AtomicInt32;
00063 
00064         #ifdef PBRT_HAS_64_BIT_ATOMICS
00065                 typedef volatile LONGLONG AtomicInt64;
00066         #endif // 64-bit
00067 #else
00068         typedef volatile int32_t AtomicInt32;
00069         #ifdef PBRT_HAS_64_BIT_ATOMICS
00070                 typedef volatile int64_t AtomicInt64;
00071         #endif
00072 #endif // !PBRT_IS_WINDOWS
00073 inline int32_t AtomicAdd(AtomicInt32 *v, int32_t delta) {
00074     PBRT_ATOMIC_MEMORY_OP();
00075 #if defined(PBRT_IS_WINDOWS)
00076     // Do atomic add with MSVC inline assembly
00077 #if (PBRT_POINTER_SIZE == 8)
00078     return InterlockedAdd(v, delta);
00079 #else
00080     int32_t result;
00081     _ReadWriteBarrier();
00082     __asm {
00083         __asm mov edx, v
00084         __asm mov eax, delta
00085         __asm lock xadd [edx], eax
00086         __asm mov result, eax
00087     }
00088     _ReadWriteBarrier();
00089     return result + delta;
00090 #endif
00091 #elif defined(PBRT_IS_APPLE_PPC)
00092     return OSAtomicAdd32Barrier(delta, v);
00093 #else
00094     // Do atomic add with gcc x86 inline assembly
00095     int32_t origValue;
00096     __asm__ __volatile__("lock\n"
00097                          "xaddl %0,%1"
00098                          : "=r"(origValue), "=m"(*v) : "0"(delta)
00099                          : "memory");
00100     return origValue + delta;
00101 #endif
00102 }
00103 
00104 
00105 inline int32_t AtomicCompareAndSwap(AtomicInt32 *v, int32_t newValue,
00106                                     int32_t oldValue);
00107 inline int32_t AtomicCompareAndSwap(AtomicInt32 *v, int32_t newValue, int32_t oldValue) {
00108     PBRT_ATOMIC_MEMORY_OP();
00109 #if defined(PBRT_IS_WINDOWS)
00110     return InterlockedCompareExchange(v, newValue, oldValue);
00111 #elif defined(PBRT_IS_APPLE_PPC)
00112     return OSAtomicCompareAndSwap32Barrier(oldValue, newValue, v);
00113 #else
00114     int32_t result;
00115     __asm__ __volatile__("lock\ncmpxchgl %2,%1"
00116                           : "=a"(result), "=m"(*v)
00117                           : "q"(newValue), "0"(oldValue)
00118                           : "memory");
00119     return result;
00120 #endif
00121 }
00122 
00123 
00124 template <typename T>
00125 inline T *AtomicCompareAndSwapPointer(T **v, T *newValue, T *oldValue) {
00126     PBRT_ATOMIC_MEMORY_OP();
00127 #if defined(PBRT_IS_WINDOWS)
00128     return InterlockedCompareExchange(v, newValue, oldValue);
00129 #elif defined(PBRT_IS_APPLE_PPC)
00130   #ifdef PBRT_HAS_64_BIT_ATOMICS
00131     return OSAtomicCompareAndSwap64Barrier(oldValue, newValue, v);
00132   #else
00133     return OSAtomicCompareAndSwap32Barrier(oldValue, newValue, v);
00134   #endif
00135 #else
00136     T *result;
00137     __asm__ __volatile__("lock\ncmpxchg"
00138 #ifdef PBRT_HAS_64_BIT_ATOMICS
00139                                        "q"
00140 #else
00141                                        "l"
00142 #endif // 64 bit atomics
00143                                         " %2,%1"
00144                           : "=a"(result), "=m"(*v)
00145                           : "q"(newValue), "0"(oldValue)
00146                           : "memory");
00147     return result;
00148 #endif
00149 }
00150 
00151 
00152 #ifdef PBRT_HAS_64_BIT_ATOMICS
00153 inline int64_t AtomicAdd(AtomicInt64 *v, int64_t delta) {
00154     PBRT_ATOMIC_MEMORY_OP();
00155 #ifdef PBRT_IS_WINDOWS
00156     return InterlockedAdd64(v, delta);
00157 #elif defined(PBRT_IS_APPLE_PPC)
00158     return OSAtomicAdd64Barrier(delta, v);
00159 #else
00160     int64_t result;
00161     __asm__ __volatile__("lock\nxaddq %0,%1"
00162                           : "=r"(result), "=m"(*v)
00163                           : "0"(delta)
00164                           : "memory");
00165    return result + delta;
00166 #endif
00167 }
00168 
00169 
00170 
00171 inline int64_t AtomicCompareAndSwap(AtomicInt64 *v, int64_t newValue, int64_t oldValue) {
00172     PBRT_ATOMIC_MEMORY_OP();
00173 #if defined(PBRT_IS_WINDOWS)
00174     return InterlockedCompareExchange64(v, newValue, oldValue);
00175 #elif defined(PBRT_IS_APPLE_PPC)
00176     return OSAtomicCompareAndSwap64Barrier(oldValue, newValue, v);
00177 #else
00178     int64_t result;
00179     __asm__ __volatile__("lock\ncmpxchgq %2,%1"
00180                           : "=a"(result), "=m"(*v)
00181                           : "q"(newValue), "0"(oldValue)
00182                           : "memory");
00183     return result;
00184 #endif
00185 }
00186 
00187 
00188 #endif // PBRT_HAS_64_BIT_ATOMICS
00189 inline float AtomicAdd(volatile float *val, float delta) {
00190     PBRT_ATOMIC_MEMORY_OP();
00191     union bits { float f; int32_t i; };
00192     bits oldVal, newVal;
00193     do {
00194         // On IA32/x64, adding a PAUSE instruction in compare/exchange loops
00195         // is recommended to improve performance.  (And it does!)
00196 #if (defined(__i386__) || defined(__amd64__))
00197         __asm__ __volatile__ ("pause\n");
00198 #endif
00199         oldVal.f = *val;
00200         newVal.f = oldVal.f + delta;
00201     } while (AtomicCompareAndSwap(((AtomicInt32 *)val),
00202                                   newVal.i, oldVal.i) != oldVal.i);
00203     return newVal.f;
00204 }
00205 
00206 
00207 struct MutexLock;
00208 class Mutex {
00209 public:
00210     static Mutex *Create();
00211     static void Destroy(Mutex *m);
00212 private:
00213     // Mutex Private Methods
00214     Mutex();
00215     ~Mutex();
00216     friend struct MutexLock;
00217     Mutex(Mutex &);
00218     Mutex &operator=(const Mutex &);
00219 
00220     // System-dependent mutex implementation
00221 #if defined(PBRT_IS_WINDOWS)
00222     CRITICAL_SECTION criticalSection;
00223 #else
00224     pthread_mutex_t mutex;
00225 #endif
00226 };
00227 
00228 
00229 struct MutexLock {
00230     MutexLock(Mutex &m);
00231     ~MutexLock();
00232 private:
00233     Mutex &mutex;
00234     MutexLock(const MutexLock &);
00235     MutexLock &operator=(const MutexLock &);
00236 };
00237 
00238 
00239 class RWMutex {
00240 public:
00241     static RWMutex *Create();
00242     static void Destroy(RWMutex *m);
00243 private:
00244     // RWMutex Private Methods
00245     RWMutex();
00246     ~RWMutex();
00247     friend struct RWMutexLock;
00248     RWMutex(RWMutex &);
00249     RWMutex &operator=(const RWMutex &);
00250 
00251     // System-dependent rw mutex implementation
00252 #if defined(PBRT_IS_WINDOWS)
00253     void AcquireRead();
00254     void ReleaseRead();
00255     void AcquireWrite();
00256     void ReleaseWrite();
00257     
00258     LONG numWritersWaiting;
00259     LONG numReadersWaiting;
00260     
00261     // HIWORD is writer active flag;
00262     // LOWORD is readers active count;
00263     DWORD activeWriterReaders;
00264     
00265     HANDLE hReadyToRead;
00266     HANDLE hReadyToWrite;
00267     CRITICAL_SECTION cs;
00268 #else
00269     pthread_rwlock_t mutex;
00270 #endif
00271 };
00272 
00273 
00274 enum RWMutexLockType { READ, WRITE };
00275 struct RWMutexLock {
00276     RWMutexLock(RWMutex &m, RWMutexLockType t);
00277     ~RWMutexLock();
00278     void UpgradeToWrite();
00279     void DowngradeToRead();
00280 private:
00281     RWMutexLockType type;
00282     RWMutex &mutex;
00283     RWMutexLock(const RWMutexLock &);
00284     RWMutexLock &operator=(const RWMutexLock &);
00285 };
00286 
00287 
00288 class Semaphore {
00289 public:
00290     // Semaphore Public Methods
00291     Semaphore();
00292     ~Semaphore();
00293     void Post(int count = 1);
00294     void Wait();
00295     bool TryWait();
00296 private:
00297     // Semaphore Private Data
00298 #if defined(PBRT_IS_WINDOWS)
00299     HANDLE handle;
00300 #else
00301     sem_t *sem;
00302     static int count;
00303 #endif
00304 };
00305 
00306 
00307 class ConditionVariable {
00308 public:
00309     // ConditionVariable Public Methods
00310     ConditionVariable();
00311     ~ConditionVariable();
00312     void Lock();
00313     void Unlock();
00314     void Wait();
00315     void Signal();
00316 private:
00317     // ConditionVariable Private Data
00318 #if !defined(PBRT_IS_WINDOWS)
00319     pthread_mutex_t mutex;
00320     pthread_cond_t cond;
00321 #else
00322     // Count of the number of waiters.
00323     uint32_t waitersCount;
00324     // Serialize access to <waitersCount>.
00325     CRITICAL_SECTION waitersCountMutex, conditionMutex;
00326     // Signal and broadcast event HANDLEs.
00327     enum { SIGNAL = 0, BROADCAST=1, NUM_EVENTS=2 };
00328     HANDLE events[NUM_EVENTS];
00329 #endif
00330 };
00331 
00332 
00333 void TasksInit();
00334 void TasksCleanup();
00335 class Task {
00336 public:
00337     virtual ~Task();
00338     virtual void Run() = 0;
00339 };
00340 
00341 
00342 void EnqueueTasks(const vector<Task *> &tasks);
00343 void WaitForAllTasks();
00344 int NumSystemCores();
00345 
00346 #endif // PBRT_CORE_PARALLEL_H