AtomicOps.h#
Fully qualified name: carb/cpp/detail/AtomicOps.h
File members: carb/cpp/detail/AtomicOps.h
// Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
//
// NVIDIA CORPORATION and its licensors retain all intellectual property
// and proprietary rights in and to this software, related documentation
// and any modifications thereto. Any use, reproduction, disclosure or
// distribution of this software and related documentation without an express
// license agreement from NVIDIA CORPORATION is strictly prohibited.
//
#pragma once
#include "../../Defines.h"
#include <atomic>
namespace carb
{
namespace cpp
{
namespace detail
{
template <class T, size_t S = sizeof(T)>
struct AtomicOps;
template <class T>
struct AtomicOps<T, 4>
{
static_assert(sizeof(T) == 4, "Invalid assumption");
static_assert(std::is_integral<T>::value, "Must be an integral type");
using NativeType = T;
using UnderlyingType = int32_t;
using AtomicType = std::atomic<T>;
static bool test_bit_and_set(AtomicType& val,
const unsigned bit,
const std::memory_order order = std::memory_order_seq_cst) noexcept;
static bool test_bit_and_reset(AtomicType& val,
const unsigned bit,
const std::memory_order order = std::memory_order_seq_cst) noexcept;
};
template <class T>
struct AtomicOps<T, 8>
{
static_assert(sizeof(T) == 8, "Invalid assumption");
static_assert(std::is_integral<T>::value, "Must be an integral type");
using NativeType = T;
using UnderlyingType = int64_t;
using AtomicType = std::atomic<T>;
static bool test_bit_and_set(AtomicType& val,
const unsigned bit,
const std::memory_order order = std::memory_order_seq_cst) noexcept;
static bool test_bit_and_reset(AtomicType& val,
const unsigned bit,
const std::memory_order order = std::memory_order_seq_cst) noexcept;
};
// x86-64 specializations to use bts/btr instructions
#if CARB_PLATFORM_WINDOWS && CARB_X86_64
extern "C" unsigned char _interlockedbittestandset(long volatile*, long);
# pragma intrinsic(_interlockedbittestandset)
extern "C" unsigned char _interlockedbittestandset64(__int64 volatile*, __int64);
# pragma intrinsic(_interlockedbittestandset64)
extern "C" unsigned char _interlockedbittestandreset(long volatile*, long);
# pragma intrinsic(_interlockedbittestandreset)
extern "C" unsigned char _interlockedbittestandreset64(__int64 volatile*, __int64);
# pragma intrinsic(_interlockedbittestandreset64)
template <class T>
bool AtomicOps<T, 4>::test_bit_and_set(AtomicType& val, const unsigned bit, const std::memory_order) noexcept
{
// Effectively seq_cst
return !!_interlockedbittestandset(reinterpret_cast<long*>(&val), bit);
}
template <class T>
bool AtomicOps<T, 4>::test_bit_and_reset(AtomicType& val, const unsigned bit, const std::memory_order) noexcept
{
// Effectively seq_cst
return !!_interlockedbittestandreset(reinterpret_cast<long*>(&val), bit);
}
template <class T>
bool AtomicOps<T, 8>::test_bit_and_set(AtomicType& val, const unsigned bit, const std::memory_order) noexcept
{
// Effectively seq_cst
return !!_interlockedbittestandset64(reinterpret_cast<__int64*>(&val), bit);
}
template <class T>
bool AtomicOps<T, 8>::test_bit_and_reset(AtomicType& val, const unsigned bit, const std::memory_order) noexcept
{
// Effectively seq_cst
return !!_interlockedbittestandreset64(reinterpret_cast<__int64*>(&val), bit);
}
#elif CARB_COMPILER_GNUC && CARB_X86_64
template <class T>
bool AtomicOps<T, 4>::test_bit_and_set(AtomicType& val, const unsigned bit, const std::memory_order) noexcept
{
// Effectively seq_cst
bool out;
__asm__ volatile("lock; btsl %2, %1; setc %0" : "=r"(out), "+m"(*(&val)) : "Ir"((bit)) : "cc");
return out;
}
template <class T>
bool AtomicOps<T, 4>::test_bit_and_reset(AtomicType& val, const unsigned bit, const std::memory_order) noexcept
{
// Effectively seq_cst
bool out;
__asm__ volatile("lock; btrl %2, %1; setc %0" : "=r"(out), "+m"(*(&val)) : "Ir"((bit)) : "cc");
return out;
}
template <class T>
bool AtomicOps<T, 8>::test_bit_and_set(AtomicType& val, const unsigned bit, const std::memory_order) noexcept
{
// Effectively seq_cst
bool out;
__asm__ volatile("lock; btsq %2, %1; setc %0" : "=r"(out), "+m"(*(&val)) : "Jr"((uint64_t)(bit)) : "cc");
return out;
}
template <class T>
bool AtomicOps<T, 8>::test_bit_and_reset(AtomicType& val, const unsigned bit, const std::memory_order) noexcept
{
// Effectively seq_cst
bool out;
__asm__ volatile("lock; btrq %2, %1; setc %0" : "=r"(out), "+m"(*(&val)) : "Jr"((uint64_t)(bit)) : "cc");
return out;
}
#else // Fall back to slower existing atomics
template <class T>
bool AtomicOps<T, 4>::test_bit_and_set(AtomicType& val, const unsigned bit, const std::memory_order order) noexcept
{
const NativeType mask = NativeType(1) << bit;
return !!(val.fetch_or(mask, order) & mask);
}
template <class T>
bool AtomicOps<T, 4>::test_bit_and_reset(AtomicType& val, const unsigned bit, const std::memory_order order) noexcept
{
const NativeType mask = NativeType(1) << bit;
return !!(val.fetch_and(~mask, order) & mask);
}
template <class T>
bool AtomicOps<T, 8>::test_bit_and_set(AtomicType& val, const unsigned bit, const std::memory_order order) noexcept
{
const NativeType mask = NativeType(1) << bit;
return !!(val.fetch_or(mask, order) & mask);
}
template <class T>
bool AtomicOps<T, 8>::test_bit_and_reset(AtomicType& val, const unsigned bit, const std::memory_order order) noexcept
{
const NativeType mask = NativeType(1) << bit;
return !!(val.fetch_and(~mask, order) & mask);
}
#endif
} // namespace detail
} // namespace cpp
} // namespace carb