carb/thread/RecursiveSharedMutex.h

File members: carb/thread/RecursiveSharedMutex.h

// Copyright (c) 2020-2023, NVIDIA CORPORATION. All rights reserved.
//
// NVIDIA CORPORATION and its licensors retain all intellectual property
// and proprietary rights in and to this software, related documentation
// and any modifications thereto. Any use, reproduction, disclosure or
// distribution of this software and related documentation without an express
// license agreement from NVIDIA CORPORATION is strictly prohibited.
//

#pragma once

#include "SharedMutex.h"
#include "ThreadLocal.h"

#include <algorithm>
#include <vector>

namespace carb
{
namespace thread
{

#ifndef DOXYGEN_SHOULD_SKIP_THIS
class recursive_shared_mutex;
namespace detail
{

using LockEntry = std::pair<recursive_shared_mutex*, ptrdiff_t>;
using LockList = std::vector<LockEntry>;

// TL;DR: Gymnastics to get around SIOF (Static Initialization Order Fiasco) with supported compilers
//
// For GCC this is pretty easy. The init_priority attribute allows us to specify a priority value to use for
// initialization order. For recursive_shared_mutex's lockList, we really only care that it's constructed before
// application initializers run.
//
// We have to jump through some hoops here for MSVC since this is a header-only class. MSVC does have pragma init_seg,
// BUT a given translation unit (i.e. cpp files) may have only one. Since this exists as a header-only class and we
// don't want to force linkage of a cpp file specifically for this, we can get around it by injecting our initializer
// function into the appropriate segment for initializer order at link time.
//
// This is a fairly good reference for the various C-Runtime initializer sections:
// https://gist.github.com/vaualbus/622099d88334fbba1d4ae703642c2956
//
// #pragma init_seg(lib) corresponds to section .CRT$XCL (the L seems to indicate `lib`). Ironically, C=compiler,
// L=lib, and U=user are also in alphabetical order and make nice delimiters between .CRT$XCA (__xc_a) and .CRT$XCZ
// (__xc_z).
#    if CARB_COMPILER_MSC

// If we just specified a variable of type carb::thread::ThreadLocal<LockList> (even allocating it into a specific
// custom section) the compiler will still try to instantiate it during the init_seg(user) order. To circumvent this
// behavior, we instead contain this variable inside `DataContainer`, but are careful to have the DataContainer()
// constructor well defined with zero side-effects. This is because constructLockList() will be called first (during the
// compiler's init_seg(lib) initialization order), which will construct the TLS member inside of DataContainer, but the
// DataContainer() constructor for lockListData runs after (during the compiler's init_seg(user) initialization order).

// clang-format off
// (for brevity)
struct DataContainer
{
    struct DummyType { constexpr DummyType() noexcept {} };
    union
    {
        DummyType empty;
        carb::thread::ThreadLocal<LockList> tls;
    };

    constexpr DataContainer() noexcept : empty() {}
    ~DataContainer() noexcept {}
} __declspec(selectany) lockListData;
// clang-format on

__declspec(selectany) bool constructed{ false };
inline carb::thread::ThreadLocal<LockList>& lockList() noexcept
{
    // Should have been constructed with either pConstructLockList (initializer) or ensureLockList()
    CARB_ASSERT(constructed);
    return lockListData.tls;
}

inline void constructLockList() noexcept
{
    // Construct the lock list and then register a function to destroy it at exit time
    CARB_ASSERT(!constructed);
    new (&lockListData.tls) carb::thread::ThreadLocal<LockList>();
    constructed = true;
    ::atexit([] {
        lockList().~ThreadLocal();
        constructed = false;
    });
}

inline void ensureLockList() noexcept
{
    // OVCC-1298: With LTCG turned on sometimes the linker doesn't obey the segment information below and puts
    // pConstructLockList that is supposed to construct the lock list into the wrong segment, not in the initializer
    // list. Which means it gets skipped at startup. As a work-around we can construct it when the
    // recursive_shared_mutex constructor is called, though this may be late and cause SIOF issues (see OM-18917).
    if (CARB_UNLIKELY(!constructed))
    {
        static std::once_flag flag;
        std::call_once(flag, [] {
            if (!constructed)
                constructLockList();
        });
        CARB_ASSERT(constructed);
    }
}

extern "C"
{
    // Declare these so the linker knows to include them
    using CRTConstructor = void(__cdecl*)();
    extern CRTConstructor __xc_a[], __xc_z[];

    // Force the linker to include this symbol
#        pragma comment(linker, "/include:pConstructLockList")

    // Inject a pointer to our constructLockList() function at XCL, the same section that #pragma init_seg(lib) uses
#        pragma section(".CRT$XCL", long, read)
    __declspec(allocate(".CRT$XCL")) __declspec(selectany) CRTConstructor pConstructLockList = constructLockList;
}
#    else
// According to this GCC bug: https://gcc.gnu.org/bugzilla//show_bug.cgi?id=65115
// The default priority if init_priority is not specified is 65535. So we use one value lower than that.
#        define DEFAULT_INIT_PRIORITY (65535)
#        define LIBRARY_INIT_PRIORITY (DEFAULT_INIT_PRIORITY - 1)
struct Constructed
{
    bool constructed;
    constexpr Constructed() : constructed{ true }
    {
    }
    ~Constructed()
    {
        constructed = false;
    }
    explicit operator bool() const
    {
        return constructed;
    }
} constructed CARB_ATTRIBUTE(weak, init_priority(LIBRARY_INIT_PRIORITY));
carb::thread::ThreadLocal<LockList> lockListTls CARB_ATTRIBUTE(weak, init_priority(LIBRARY_INIT_PRIORITY));
inline carb::thread::ThreadLocal<LockList>& lockList()
{
    CARB_ASSERT(constructed);
    return lockListTls;
}

constexpr inline void ensureLockList() noexcept
{
}
#    endif

} // namespace detail
#endif

class recursive_shared_mutex : private carb::thread::shared_mutex
{
public:
#if !CARB_DEBUG && !CARB_COMPILER_MSC && !CARB_ASSERT_ENABLED
    constexpr
#endif
        recursive_shared_mutex()
    {
        detail::ensureLockList();
    }

    ~recursive_shared_mutex() = default;

    void lock();

    bool try_lock();

    void unlock();

    void lock_shared();

    bool try_lock_shared();

    void unlock_shared();

    bool owns_lock() const;

    bool owns_lock_shared() const;

    bool owns_lock_exclusive() const;

private:
    const detail::LockEntry* hasLockEntry() const
    {
        auto& list = detail::lockList().get();
        auto iter = std::find_if(list.begin(), list.end(), [this](detail::LockEntry& e) { return e.first == this; });
        return iter == list.end() ? nullptr : std::addressof(*iter);
    }
    detail::LockEntry& lockEntry()
    {
        auto& list = detail::lockList().get();
        auto iter = std::find_if(list.begin(), list.end(), [this](detail::LockEntry& e) { return e.first == this; });
        if (iter == list.end())
            iter = (list.emplace_back(this, 0), list.end() - 1);
        return *iter;
    }
    void removeLockEntry(detail::LockEntry& e)
    {
        auto& list = detail::lockList().get();
        CARB_ASSERT(std::addressof(e) >= std::addressof(list.front()) && std::addressof(e) <= std::addressof(list.back()));
        e = list.back();
        list.pop_back();
    }
};

// Function implementations
inline void recursive_shared_mutex::lock()
{
    detail::LockEntry& e = lockEntry();
    if (e.second < 0)
    {
        // Already locked exclusively (negative lock count). Increase the negative count.
        --e.second;
    }
    else
    {
        if (e.second > 0)
        {
            // This thread already has shared locks for this lock. We need to convert to exclusive.
            shared_mutex::unlock_shared();
        }
        // Acquire the lock exclusively
        shared_mutex::lock();
        // Now inside the lock
        e.second = -(e.second + 1);
    }
}

inline bool recursive_shared_mutex::try_lock()
{
    detail::LockEntry& e = lockEntry();
    if (e.second < 0)
    {
        // Already locked exclusively (negative lock count). Increase the negative count.
        --e.second;
        return true;
    }
    else if (e.second == 0)
    {
        if (shared_mutex::try_lock())
        {
            // Inside the lock
            e.second = -1;
            return true;
        }
        // Lock failed
        removeLockEntry(e);
    }
    // Either we already have shared locks (that can't be converted to exclusive without releasing the lock and possibly
    // not being able to acquire it again) or the try_lock failed.
    return false;
}

inline void recursive_shared_mutex::unlock()
{
    detail::LockEntry& e = lockEntry();
    CARB_CHECK(e.second != 0);
    if (e.second > 0)
    {
        if (--e.second == 0)
        {
            shared_mutex::unlock_shared();
            removeLockEntry(e);
        }
    }
    else if (e.second < 0)
    {
        if (++e.second == 0)
        {
            shared_mutex::unlock();
            removeLockEntry(e);
        }
    }
    else
    {
        // unlock() without being locked!
        std::terminate();
    }
}

inline void recursive_shared_mutex::lock_shared()
{
    detail::LockEntry& e = lockEntry();
    if (e.second < 0)
    {
        // We already own an exclusive lock, which is stronger than shared. So just increase the exclusive lock.
        --e.second;
    }
    else
    {
        if (e.second == 0)
        {
            shared_mutex::lock_shared();
            // Now inside the lock
        }
        ++e.second;
    }
}

inline bool recursive_shared_mutex::try_lock_shared()
{
    detail::LockEntry& e = lockEntry();
    if (e.second < 0)
    {
        // We already own an exclusive lock, which is stronger than shared. So just increase the exclusive lock.
        --e.second;
        return true;
    }
    else if (e.second == 0 && !shared_mutex::try_lock_shared())
    {
        // Failed to get the shared lock
        removeLockEntry(e);
        return false;
    }
    ++e.second;
    return true;
}

inline void recursive_shared_mutex::unlock_shared()
{
    unlock();
}

inline bool recursive_shared_mutex::owns_lock() const
{
    auto entry = hasLockEntry();
    return entry ? entry->second != 0 : false;
}

inline bool recursive_shared_mutex::owns_lock_exclusive() const
{
    auto entry = hasLockEntry();
    return entry ? entry->second < 0 : false;
}

inline bool recursive_shared_mutex::owns_lock_shared() const
{
    auto entry = hasLockEntry();
    return entry ? entry->second > 0 : false;
}

} // namespace thread
} // namespace carb