Skip to content
Snippets Groups Projects
Commit 17ee72b2 authored by Felix Weiglhofer's avatar Felix Weiglhofer
Browse files

Add atomic operations for floats.

parent 3f4c2cc5
No related branches found
No related tags found
No related merge requests found
......@@ -81,13 +81,17 @@ XPU_D XPU_FORCE_INLINE float tan(float x);
XPU_D XPU_FORCE_INLINE int atomic_cas(int *addr, int compare, int val);
XPU_D XPU_FORCE_INLINE unsigned int atomic_cas(unsigned int *addr, unsigned int compare, unsigned int val);
XPU_D XPU_FORCE_INLINE float atomic_cas(float *addr, float compare, float val);
XPU_D XPU_FORCE_INLINE int atomic_cas_block(int *addr, int compare, int val);
XPU_D XPU_FORCE_INLINE unsigned int atomic_cas_block(unsigned int *addr, unsigned int compare, unsigned int val);
XPU_D XPU_FORCE_INLINE float atomic_cas_block(float *addr, float compare, float val);
XPU_D XPU_FORCE_INLINE int atomic_add(int *addr, int val);
XPU_D XPU_FORCE_INLINE unsigned int atomic_add(unsigned int *addr, unsigned int val);
XPU_D XPU_FORCE_INLINE float atomic_add(float *addr, float val);
XPU_D XPU_FORCE_INLINE int atomic_add_block(int *addr, int val);
XPU_D XPU_FORCE_INLINE unsigned int atomic_add_block(unsigned int *addr, unsigned int val);
XPU_D XPU_FORCE_INLINE float atomic_add_block(float *addr, float val);
XPU_D XPU_FORCE_INLINE int atomic_sub(int *addr, int val);
XPU_D XPU_FORCE_INLINE unsigned int atomic_sub(unsigned int *addr, unsigned int val);
......@@ -111,6 +115,9 @@ XPU_D XPU_FORCE_INLINE unsigned int atomic_xor_block(unsigned int *addr, unsigne
XPU_D XPU_FORCE_INLINE void barrier();
XPU_D int float_as_int(float val);
XPU_D float int_as_float(int val);
template<typename Key, typename KeyValueType, int BlockSize, int ItemsPerThread=8, xpu::driver_t Impl=XPU_COMPILATION_TARGET>
class block_sort {
......
......@@ -58,6 +58,11 @@ inline unsigned int atomic_cas(unsigned int *addr, unsigned int compare, unsigne
return compare;
}
inline float atomic_cas(float *addr, float compare, float val) {
__atomic_compare_exchange(addr, &compare, &val, false, __ATOMIC_SEQ_CST, __ATOMIC_SEQ_CST);
return compare;
}
inline int atomic_cas_block(int *addr, int compare, int val) {
return detail::exchange(*addr, (*addr == compare ? val : *addr));
}
......@@ -66,6 +71,10 @@ inline unsigned int atomic_cas_block(unsigned int *addr, unsigned int compare, u
return detail::exchange(*addr, (*addr == compare ? val : *addr));
}
inline float atomic_cas_block(float *addr, float compare, float val) {
return detail::exchange(*addr, (*addr == compare ? val : *addr));
}
inline int atomic_add(int *addr, int val) {
return __atomic_fetch_add(addr, val, __ATOMIC_SEQ_CST);
}
......@@ -74,6 +83,18 @@ inline unsigned int atomic_add(unsigned int *addr, unsigned int val) {
return __atomic_fetch_add(addr, val, __ATOMIC_SEQ_CST);
}
inline float atomic_add(float *addr, float val) {
float old = *addr;
float assumed;
do {
assumed = old;
old = atomic_cas(addr, assumed, assumed + val);
} while (float_as_int(old) != float_as_int(assumed));
return old;
}
inline int atomic_add_block(int *addr, int val) {
return detail::exchange(*addr, *addr + val);
}
......@@ -82,6 +103,10 @@ inline unsigned int atomic_add_block(unsigned int *addr, unsigned int val) {
return detail::exchange(*addr, *addr + val);
}
inline float atomic_add_block(float *addr, float val) {
return detail::exchange(*addr, *addr + val);
}
inline int atomic_sub(int *addr, int val) {
return __atomic_fetch_sub(addr, val, __ATOMIC_SEQ_CST);
}
......@@ -148,6 +173,23 @@ inline unsigned int atomic_xor_block(unsigned int *addr, unsigned int val) {
XPU_FORCE_INLINE void barrier() { return; }
namespace detail {
union float_int_reint {
float f;
int i;
};
} // namespace detail
inline int float_as_int(float val) {
detail::float_int_reint xval { .f = val };
return xval.i;
}
inline float int_as_float(int val) {
detail::float_int_reint xval { .i = val };
return xval.f;
}
template<typename Key, typename KeyValueType, int BlockSize, int ItemsPerThread>
class block_sort<Key, KeyValueType, BlockSize, ItemsPerThread, cpu> {
......
......@@ -72,6 +72,10 @@ XPU_D XPU_FORCE_INLINE unsigned int atomic_cas(unsigned int *addr, unsigned int
return atomicCAS(addr, compare, val);
}
XPU_D XPU_FORCE_INLINE float atomic_cas(float *addr, float compare, float val) {
return __int_as_float(atomicCAS((int *) addr, __float_as_int(compare), __float_as_int(val)));
}
XPU_D XPU_FORCE_INLINE int atomic_cas_block(int *addr, int compare, int val) {
#if XPU_CUDA_HAS_BLOCK_ATOMICS
return atomicCAS_block(addr, compare, val);
......@@ -88,6 +92,10 @@ XPU_D XPU_FORCE_INLINE unsigned int atomic_cas_block(unsigned int *addr, unsigne
#endif
}
XPU_D XPU_FORCE_INLINE float atomic_cas_block(float *addr, float compare, float val) {
return int_as_float(atomic_cas_block((int *) addr, float_as_int(compare), float_as_int(val)));
}
XPU_D XPU_FORCE_INLINE int atomic_add(int *addr, int compare, int val) {
return atomicAdd(addr, val);
}
......@@ -96,6 +104,10 @@ XPU_D XPU_FORCE_INLINE unsigned int atomic_add(unsigned int *addr, unsigned int
return atomicAdd(addr, val);
}
XPU_D XPU_FORCE_INLINE float atomic_add(float *addr, float val) {
return atomicAdd(addr, val);
}
XPU_D XPU_FORCE_INLINE int atomic_add_block(int *addr, int val) {
#if XPU_CUDA_HAS_BLOCK_ATOMICS
return atomicAdd_block(addr, val);
......@@ -112,6 +124,14 @@ XPU_D XPU_FORCE_INLINE unsigned int atomic_add_block(unsigned int *addr, unsigne
#endif
}
XPU_D XPU_FORCE_INLINE float atomic_add_block(float *addr, float val) {
#if XPU_CUDA_HAS_BLOCK_ATOMICS
return atomicAdd_block(addr, val);
#else
return atomicAdd(addr, val);
#endif
}
XPU_D XPU_FORCE_INLINE int atomic_sub(int *addr, int val) {
return atomicSub(addr, val);
}
......@@ -210,6 +230,9 @@ XPU_D XPU_FORCE_INLINE unsigned int atomic_xor_block(unsigned int *addr, unsigne
XPU_D XPU_FORCE_INLINE void barrier() { __syncthreads(); }
XPU_D XPU_FORCE_INLINE int float_as_int(float val) { return __float_as_int(val); }
XPU_D XPU_FORCE_INLINE float int_as_float(int val) { return __int_as_float(val); }
namespace detail {
#if XPU_IS_CUDA
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment