PBRT
|
00001 00002 /* 00003 pbrt source code Copyright(c) 1998-2012 Matt Pharr and Greg Humphreys. 00004 00005 This file is part of pbrt. 00006 00007 Redistribution and use in source and binary forms, with or without 00008 modification, are permitted provided that the following conditions are 00009 met: 00010 00011 - Redistributions of source code must retain the above copyright 00012 notice, this list of conditions and the following disclaimer. 00013 00014 - Redistributions in binary form must reproduce the above copyright 00015 notice, this list of conditions and the following disclaimer in the 00016 documentation and/or other materials provided with the distribution. 00017 00018 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS 00019 IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 00020 TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A 00021 PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 00022 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 00023 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 00024 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00025 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00026 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00027 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 00028 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00029 00030 */ 00031 00032 #if defined(_MSC_VER) 00033 #pragma once 00034 #endif 00035 00036 #ifndef PBRT_CORE_PARALLEL_H 00037 #define PBRT_CORE_PARALLEL_H 00038 00039 // core/parallel.h* 00040 #include "pbrt.h" 00041 #if defined(PBRT_IS_APPLE_PPC) 00042 #include <libkern/OSAtomic.h> 00043 #endif // PBRT_IS_APPLE_PPC 00044 00045 #if defined(PBRT_IS_WINDOWS) 00046 #include <windows.h> 00047 #else 00048 #include <pthread.h> 00049 #include <semaphore.h> 00050 #endif 00051 #include "core/probes.h" 00052 00053 // Parallel Declarations 00054 #if defined(PBRT_IS_WINDOWS) 00055 #if _MSC_VER >= 1300 00056 extern "C" void _ReadWriteBarrier(); 00057 #pragma intrinsic(_ReadWriteBarrier) 00058 #else 00059 #define _ReadWriteBarrier() 00060 #endif 00061 00062 typedef volatile LONG AtomicInt32; 00063 00064 #ifdef PBRT_HAS_64_BIT_ATOMICS 00065 typedef volatile LONGLONG AtomicInt64; 00066 #endif // 64-bit 00067 #else 00068 typedef volatile int32_t AtomicInt32; 00069 #ifdef PBRT_HAS_64_BIT_ATOMICS 00070 typedef volatile int64_t AtomicInt64; 00071 #endif 00072 #endif // !PBRT_IS_WINDOWS 00073 inline int32_t AtomicAdd(AtomicInt32 *v, int32_t delta) { 00074 PBRT_ATOMIC_MEMORY_OP(); 00075 #if defined(PBRT_IS_WINDOWS) 00076 // Do atomic add with MSVC inline assembly 00077 #if (PBRT_POINTER_SIZE == 8) 00078 return InterlockedAdd(v, delta); 00079 #else 00080 int32_t result; 00081 _ReadWriteBarrier(); 00082 __asm { 00083 __asm mov edx, v 00084 __asm mov eax, delta 00085 __asm lock xadd [edx], eax 00086 __asm mov result, eax 00087 } 00088 _ReadWriteBarrier(); 00089 return result + delta; 00090 #endif 00091 #elif defined(PBRT_IS_APPLE_PPC) 00092 return OSAtomicAdd32Barrier(delta, v); 00093 #else 00094 // Do atomic add with gcc x86 inline assembly 00095 int32_t origValue; 00096 __asm__ __volatile__("lock\n" 00097 "xaddl %0,%1" 00098 : "=r"(origValue), "=m"(*v) : "0"(delta) 00099 : "memory"); 00100 return origValue + delta; 00101 #endif 00102 } 00103 00104 00105 inline int32_t AtomicCompareAndSwap(AtomicInt32 *v, int32_t newValue, 00106 int32_t oldValue); 00107 inline int32_t AtomicCompareAndSwap(AtomicInt32 *v, int32_t newValue, int32_t oldValue) { 00108 PBRT_ATOMIC_MEMORY_OP(); 00109 #if defined(PBRT_IS_WINDOWS) 00110 return InterlockedCompareExchange(v, newValue, oldValue); 00111 #elif defined(PBRT_IS_APPLE_PPC) 00112 return OSAtomicCompareAndSwap32Barrier(oldValue, newValue, v); 00113 #else 00114 int32_t result; 00115 __asm__ __volatile__("lock\ncmpxchgl %2,%1" 00116 : "=a"(result), "=m"(*v) 00117 : "q"(newValue), "0"(oldValue) 00118 : "memory"); 00119 return result; 00120 #endif 00121 } 00122 00123 00124 template <typename T> 00125 inline T *AtomicCompareAndSwapPointer(T **v, T *newValue, T *oldValue) { 00126 PBRT_ATOMIC_MEMORY_OP(); 00127 #if defined(PBRT_IS_WINDOWS) 00128 return InterlockedCompareExchange(v, newValue, oldValue); 00129 #elif defined(PBRT_IS_APPLE_PPC) 00130 #ifdef PBRT_HAS_64_BIT_ATOMICS 00131 return OSAtomicCompareAndSwap64Barrier(oldValue, newValue, v); 00132 #else 00133 return OSAtomicCompareAndSwap32Barrier(oldValue, newValue, v); 00134 #endif 00135 #else 00136 T *result; 00137 __asm__ __volatile__("lock\ncmpxchg" 00138 #ifdef PBRT_HAS_64_BIT_ATOMICS 00139 "q" 00140 #else 00141 "l" 00142 #endif // 64 bit atomics 00143 " %2,%1" 00144 : "=a"(result), "=m"(*v) 00145 : "q"(newValue), "0"(oldValue) 00146 : "memory"); 00147 return result; 00148 #endif 00149 } 00150 00151 00152 #ifdef PBRT_HAS_64_BIT_ATOMICS 00153 inline int64_t AtomicAdd(AtomicInt64 *v, int64_t delta) { 00154 PBRT_ATOMIC_MEMORY_OP(); 00155 #ifdef PBRT_IS_WINDOWS 00156 return InterlockedAdd64(v, delta); 00157 #elif defined(PBRT_IS_APPLE_PPC) 00158 return OSAtomicAdd64Barrier(delta, v); 00159 #else 00160 int64_t result; 00161 __asm__ __volatile__("lock\nxaddq %0,%1" 00162 : "=r"(result), "=m"(*v) 00163 : "0"(delta) 00164 : "memory"); 00165 return result + delta; 00166 #endif 00167 } 00168 00169 00170 00171 inline int64_t AtomicCompareAndSwap(AtomicInt64 *v, int64_t newValue, int64_t oldValue) { 00172 PBRT_ATOMIC_MEMORY_OP(); 00173 #if defined(PBRT_IS_WINDOWS) 00174 return InterlockedCompareExchange64(v, newValue, oldValue); 00175 #elif defined(PBRT_IS_APPLE_PPC) 00176 return OSAtomicCompareAndSwap64Barrier(oldValue, newValue, v); 00177 #else 00178 int64_t result; 00179 __asm__ __volatile__("lock\ncmpxchgq %2,%1" 00180 : "=a"(result), "=m"(*v) 00181 : "q"(newValue), "0"(oldValue) 00182 : "memory"); 00183 return result; 00184 #endif 00185 } 00186 00187 00188 #endif // PBRT_HAS_64_BIT_ATOMICS 00189 inline float AtomicAdd(volatile float *val, float delta) { 00190 PBRT_ATOMIC_MEMORY_OP(); 00191 union bits { float f; int32_t i; }; 00192 bits oldVal, newVal; 00193 do { 00194 // On IA32/x64, adding a PAUSE instruction in compare/exchange loops 00195 // is recommended to improve performance. (And it does!) 00196 #if (defined(__i386__) || defined(__amd64__)) 00197 __asm__ __volatile__ ("pause\n"); 00198 #endif 00199 oldVal.f = *val; 00200 newVal.f = oldVal.f + delta; 00201 } while (AtomicCompareAndSwap(((AtomicInt32 *)val), 00202 newVal.i, oldVal.i) != oldVal.i); 00203 return newVal.f; 00204 } 00205 00206 00207 struct MutexLock; 00208 class Mutex { 00209 public: 00210 static Mutex *Create(); 00211 static void Destroy(Mutex *m); 00212 private: 00213 // Mutex Private Methods 00214 Mutex(); 00215 ~Mutex(); 00216 friend struct MutexLock; 00217 Mutex(Mutex &); 00218 Mutex &operator=(const Mutex &); 00219 00220 // System-dependent mutex implementation 00221 #if defined(PBRT_IS_WINDOWS) 00222 CRITICAL_SECTION criticalSection; 00223 #else 00224 pthread_mutex_t mutex; 00225 #endif 00226 }; 00227 00228 00229 struct MutexLock { 00230 MutexLock(Mutex &m); 00231 ~MutexLock(); 00232 private: 00233 Mutex &mutex; 00234 MutexLock(const MutexLock &); 00235 MutexLock &operator=(const MutexLock &); 00236 }; 00237 00238 00239 class RWMutex { 00240 public: 00241 static RWMutex *Create(); 00242 static void Destroy(RWMutex *m); 00243 private: 00244 // RWMutex Private Methods 00245 RWMutex(); 00246 ~RWMutex(); 00247 friend struct RWMutexLock; 00248 RWMutex(RWMutex &); 00249 RWMutex &operator=(const RWMutex &); 00250 00251 // System-dependent rw mutex implementation 00252 #if defined(PBRT_IS_WINDOWS) 00253 void AcquireRead(); 00254 void ReleaseRead(); 00255 void AcquireWrite(); 00256 void ReleaseWrite(); 00257 00258 LONG numWritersWaiting; 00259 LONG numReadersWaiting; 00260 00261 // HIWORD is writer active flag; 00262 // LOWORD is readers active count; 00263 DWORD activeWriterReaders; 00264 00265 HANDLE hReadyToRead; 00266 HANDLE hReadyToWrite; 00267 CRITICAL_SECTION cs; 00268 #else 00269 pthread_rwlock_t mutex; 00270 #endif 00271 }; 00272 00273 00274 enum RWMutexLockType { READ, WRITE }; 00275 struct RWMutexLock { 00276 RWMutexLock(RWMutex &m, RWMutexLockType t); 00277 ~RWMutexLock(); 00278 void UpgradeToWrite(); 00279 void DowngradeToRead(); 00280 private: 00281 RWMutexLockType type; 00282 RWMutex &mutex; 00283 RWMutexLock(const RWMutexLock &); 00284 RWMutexLock &operator=(const RWMutexLock &); 00285 }; 00286 00287 00288 class Semaphore { 00289 public: 00290 // Semaphore Public Methods 00291 Semaphore(); 00292 ~Semaphore(); 00293 void Post(int count = 1); 00294 void Wait(); 00295 bool TryWait(); 00296 private: 00297 // Semaphore Private Data 00298 #if defined(PBRT_IS_WINDOWS) 00299 HANDLE handle; 00300 #else 00301 sem_t *sem; 00302 static int count; 00303 #endif 00304 }; 00305 00306 00307 class ConditionVariable { 00308 public: 00309 // ConditionVariable Public Methods 00310 ConditionVariable(); 00311 ~ConditionVariable(); 00312 void Lock(); 00313 void Unlock(); 00314 void Wait(); 00315 void Signal(); 00316 private: 00317 // ConditionVariable Private Data 00318 #if !defined(PBRT_IS_WINDOWS) 00319 pthread_mutex_t mutex; 00320 pthread_cond_t cond; 00321 #else 00322 // Count of the number of waiters. 00323 uint32_t waitersCount; 00324 // Serialize access to <waitersCount>. 00325 CRITICAL_SECTION waitersCountMutex, conditionMutex; 00326 // Signal and broadcast event HANDLEs. 00327 enum { SIGNAL = 0, BROADCAST=1, NUM_EVENTS=2 }; 00328 HANDLE events[NUM_EVENTS]; 00329 #endif 00330 }; 00331 00332 00333 void TasksInit(); 00334 void TasksCleanup(); 00335 class Task { 00336 public: 00337 virtual ~Task(); 00338 virtual void Run() = 0; 00339 }; 00340 00341 00342 void EnqueueTasks(const vector<Task *> &tasks); 00343 void WaitForAllTasks(); 00344 int NumSystemCores(); 00345 00346 #endif // PBRT_CORE_PARALLEL_H