carb/thread/RecursiveSharedMutex.h
File members: carb/thread/RecursiveSharedMutex.h
// Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
//
// NVIDIA CORPORATION and its licensors retain all intellectual property
// and proprietary rights in and to this software, related documentation
// and any modifications thereto. Any use, reproduction, disclosure or
// distribution of this software and related documentation without an express
// license agreement from NVIDIA CORPORATION is strictly prohibited.
//
#pragma once
#include "SharedMutex.h"
#include "ThreadLocal.h"
#include <algorithm>
#include <vector>
namespace carb
{
namespace thread
{
#ifndef DOXYGEN_SHOULD_SKIP_THIS
class recursive_shared_mutex;
namespace detail
{
using LockEntry = std::pair<recursive_shared_mutex*, ptrdiff_t>;
using LockList = std::vector<LockEntry>;
// TL;DR: Gymnastics to get around SIOF (Static Initialization Order Fiasco) with supported compilers
//
// For GCC this is pretty easy. The init_priority attribute allows us to specify a priority value to use for
// initialization order. For recursive_shared_mutex's lockList, we really only care that it's constructed before
// application initializers run.
//
// We have to jump through some hoops here for MSVC since this is a header-only class. MSVC does have pragma init_seg,
// BUT a given translation unit (i.e. cpp files) may have only one. Since this exists as a header-only class and we
// don't want to force linkage of a cpp file specifically for this, we can get around it by injecting our initializer
// function into the appropriate segment for initializer order at link time.
//
// This is a fairly good reference for the various C-Runtime initializer sections:
// https://gist.github.com/vaualbus/622099d88334fbba1d4ae703642c2956
//
// #pragma init_seg(lib) corresponds to section .CRT$XCL (the L seems to indicate `lib`). Ironically, C=compiler,
// L=lib, and U=user are also in alphabetical order and make nice delimiters between .CRT$XCA (__xc_a) and .CRT$XCZ
// (__xc_z).
# if CARB_COMPILER_MSC
// If we just specified a variable of type carb::thread::ThreadLocal<LockList> (even allocating it into a specific
// custom section) the compiler will still try to instantiate it during the init_seg(user) order. To circumvent this
// behavior, we instead contain this variable inside `DataContainer`, but are careful to have the DataContainer()
// constructor well defined with zero side-effects. This is because constructLockList() will be called first (during the
// compiler's init_seg(lib) initialization order), which will construct the TLS member inside of DataContainer, but the
// DataContainer() constructor for lockListData runs after (during the compiler's init_seg(user) initialization order).
// clang-format off
// (for brevity)
struct DataContainer
{
struct DummyType { constexpr DummyType() noexcept {} };
union
{
DummyType empty;
carb::thread::ThreadLocal<LockList> tls;
};
constexpr DataContainer() noexcept : empty() {}
~DataContainer() noexcept {}
} __declspec(selectany) lockListData;
// clang-format on
__declspec(selectany) bool constructed{ false };
inline carb::thread::ThreadLocal<LockList>& lockList() noexcept
{
// Should have been constructed with either pConstructLockList (initializer) or ensureLockList()
CARB_ASSERT(constructed);
return lockListData.tls;
}
inline void constructLockList() noexcept
{
// Construct the lock list and then register a function to destroy it at exit time
CARB_ASSERT(!constructed);
new (&lockListData.tls) carb::thread::ThreadLocal<LockList>();
constructed = true;
::atexit([] {
lockList().~ThreadLocal();
constructed = false;
});
}
inline void ensureLockList() noexcept
{
// OVCC-1298: With LTCG turned on sometimes the linker doesn't obey the segment information below and puts
// pConstructLockList that is supposed to construct the lock list into the wrong segment, not in the initializer
// list. Which means it gets skipped at startup. As a work-around we can construct it when the
// recursive_shared_mutex constructor is called, though this may be late and cause SIOF issues (see OM-18917).
if (CARB_UNLIKELY(!constructed))
{
static std::once_flag flag;
std::call_once(flag, [] {
if (!constructed)
constructLockList();
});
CARB_ASSERT(constructed);
}
}
extern "C"
{
// Declare these so the linker knows to include them
using CRTConstructor = void(__cdecl*)();
extern CRTConstructor __xc_a[], __xc_z[];
// Force the linker to include this symbol
# pragma comment(linker, "/include:pConstructLockList")
// Inject a pointer to our constructLockList() function at XCL, the same section that #pragma init_seg(lib) uses
# pragma section(".CRT$XCL", long, read)
__declspec(allocate(".CRT$XCL")) __declspec(selectany) CRTConstructor pConstructLockList = constructLockList;
}
# else
// According to this GCC bug: https://gcc.gnu.org/bugzilla//show_bug.cgi?id=65115
// The default priority if init_priority is not specified is 65535. So we use one value lower than that.
# define DEFAULT_INIT_PRIORITY (65535)
# define LIBRARY_INIT_PRIORITY (DEFAULT_INIT_PRIORITY - 1)
struct Constructed
{
bool constructed;
constexpr Constructed() : constructed{ true }
{
}
~Constructed()
{
constructed = false;
}
explicit operator bool() const
{
return constructed;
}
} constructed CARB_ATTRIBUTE(weak, init_priority(LIBRARY_INIT_PRIORITY));
carb::thread::ThreadLocal<LockList> lockListTls CARB_ATTRIBUTE(weak, init_priority(LIBRARY_INIT_PRIORITY));
inline carb::thread::ThreadLocal<LockList>& lockList()
{
CARB_ASSERT(constructed);
return lockListTls;
}
constexpr inline void ensureLockList() noexcept
{
}
# endif
} // namespace detail
#endif
class recursive_shared_mutex : private carb::thread::shared_mutex
{
public:
#if !CARB_DEBUG && !CARB_COMPILER_MSC && !CARB_ASSERT_ENABLED
constexpr
#endif
recursive_shared_mutex()
{
detail::ensureLockList();
}
~recursive_shared_mutex() = default;
void lock();
bool try_lock();
void unlock();
void lock_shared();
bool try_lock_shared();
void unlock_shared();
bool owns_lock() const;
bool owns_lock_shared() const;
bool owns_lock_exclusive() const;
private:
const detail::LockEntry* hasLockEntry() const
{
auto& list = detail::lockList().get();
auto iter = std::find_if(list.begin(), list.end(), [this](detail::LockEntry& e) { return e.first == this; });
return iter == list.end() ? nullptr : std::addressof(*iter);
}
detail::LockEntry& lockEntry()
{
auto& list = detail::lockList().get();
auto iter = std::find_if(list.begin(), list.end(), [this](detail::LockEntry& e) { return e.first == this; });
if (iter == list.end())
iter = (list.emplace_back(this, 0), list.end() - 1);
return *iter;
}
void removeLockEntry(detail::LockEntry& e)
{
auto& list = detail::lockList().get();
CARB_ASSERT(std::addressof(e) >= std::addressof(list.front()) && std::addressof(e) <= std::addressof(list.back()));
e = list.back();
list.pop_back();
}
};
// Function implementations
inline void recursive_shared_mutex::lock()
{
detail::LockEntry& e = lockEntry();
if (e.second < 0)
{
// Already locked exclusively (negative lock count). Increase the negative count.
--e.second;
}
else
{
if (e.second > 0)
{
// This thread already has shared locks for this lock. We need to convert to exclusive.
shared_mutex::unlock_shared();
}
// Acquire the lock exclusively
shared_mutex::lock();
// Now inside the lock
e.second = -(e.second + 1);
}
}
inline bool recursive_shared_mutex::try_lock()
{
detail::LockEntry& e = lockEntry();
if (e.second < 0)
{
// Already locked exclusively (negative lock count). Increase the negative count.
--e.second;
return true;
}
else if (e.second == 0)
{
if (shared_mutex::try_lock())
{
// Inside the lock
e.second = -1;
return true;
}
// Lock failed
removeLockEntry(e);
}
// Either we already have shared locks (that can't be converted to exclusive without releasing the lock and possibly
// not being able to acquire it again) or the try_lock failed.
return false;
}
inline void recursive_shared_mutex::unlock()
{
detail::LockEntry& e = lockEntry();
CARB_CHECK(e.second != 0);
if (e.second > 0)
{
if (--e.second == 0)
{
shared_mutex::unlock_shared();
removeLockEntry(e);
}
}
else if (e.second < 0)
{
if (++e.second == 0)
{
shared_mutex::unlock();
removeLockEntry(e);
}
}
else
{
// unlock() without being locked!
std::terminate();
}
}
inline void recursive_shared_mutex::lock_shared()
{
detail::LockEntry& e = lockEntry();
if (e.second < 0)
{
// We already own an exclusive lock, which is stronger than shared. So just increase the exclusive lock.
--e.second;
}
else
{
if (e.second == 0)
{
shared_mutex::lock_shared();
// Now inside the lock
}
++e.second;
}
}
inline bool recursive_shared_mutex::try_lock_shared()
{
detail::LockEntry& e = lockEntry();
if (e.second < 0)
{
// We already own an exclusive lock, which is stronger than shared. So just increase the exclusive lock.
--e.second;
return true;
}
else if (e.second == 0 && !shared_mutex::try_lock_shared())
{
// Failed to get the shared lock
removeLockEntry(e);
return false;
}
++e.second;
return true;
}
inline void recursive_shared_mutex::unlock_shared()
{
unlock();
}
inline bool recursive_shared_mutex::owns_lock() const
{
auto entry = hasLockEntry();
return entry ? entry->second != 0 : false;
}
inline bool recursive_shared_mutex::owns_lock_exclusive() const
{
auto entry = hasLockEntry();
return entry ? entry->second < 0 : false;
}
inline bool recursive_shared_mutex::owns_lock_shared() const
{
auto entry = hasLockEntry();
return entry ? entry->second > 0 : false;
}
} // namespace thread
} // namespace carb