ContainerHelper.h#

Fully qualified name: omni/extras/ContainerHelper.h
File members: omni/extras/ContainerHelper.h
// Copyright (c) 2023-2024, NVIDIA CORPORATION. All rights reserved.
//
// NVIDIA CORPORATION and its licensors retain all intellectual property
// and proprietary rights in and to this software, related documentation
// and any modifications thereto. Any use, reproduction, disclosure or
// distribution of this software and related documentation without an express
// license agreement from NVIDIA CORPORATION is strictly prohibited.
//
#pragma once

#include "../../carb/Defines.h"

#if CARB_PLATFORM_LINUX
#    include <fcntl.h>
#    include <unistd.h>

#    include <string>
#    include <utility>
#    include <vector>
#endif

namespace omni
{
namespace extras
{

#if CARB_PLATFORM_LINUX || defined(DOXYGEN_BUILD)

#    ifndef DOXYGEN_SHOULD_SKIP_THIS
namespace detail
{

inline bool readLineFromFile(const char* file, char* buffer, size_t len) noexcept
{
    auto fd = ::open(file, O_RDONLY, 0);
    if (fd == -1)
    {
        return false;
    }

    auto size = CARB_RETRY_EINTR(::read(fd, buffer, len - 1));
    ::close(fd);

    if (size <= 0)
    {
        return false;
    }

    buffer[size] = '\0';
    return true;
}

inline int32_t readIntFromFile(const char* file) noexcept
{
    char buffer[64];

    if (!readLineFromFile(file, buffer, CARB_COUNTOF(buffer)))
    {
        return -1;
    }

    return std::atoi(buffer);
}

struct CpuQuota
{
    int32_t quota = -1;

    int32_t period = 100'000;
};

inline CpuQuota readCpuQuotaFromFile(const char* file) noexcept
{
    char buffer[64];
    int quota;
    int period;
    char* endp = nullptr;

    if (!readLineFromFile(file, buffer, CARB_COUNTOF(buffer)))
    {
        return { -1, -1 };
    }

    // attempt to parse two values from the line.  The first value may be 'max' or a number
    // between 0 and 100000.  The second number is often 100000 but could be different depending
    // on scheduler settings.  We expect this file to always be properly formatted.

    // set to use the maximum quota for all available CPU cores => fail.
    if (strncmp(buffer, "max ", 4) == 0)
    {
        return { -1, std::atoi(&buffer[4]) };
    }

    quota = (int)strtol(buffer, &endp, 10);
    period = std::atoi(endp);

    return { quota, period };
}

inline int32_t getCpuSetCoreCount(const char* buffer, std::vector<int32_t>* cores = nullptr) noexcept
{
    std::string str = buffer;
    size_t pos = 0;
    size_t next;
    int32_t coreStart;
    int32_t coreEnd;
    int32_t total = 0;

    while (1)
    {
        std::string start;
        std::string end;

        next = str.find_first_of(",-", pos);

        // last item in the list (must be a single core) or a single core => parse and add it.
        if (next == std::string::npos || str[next] == ',')
        {
            size_t limit = (next == std::string::npos) ? str.length() : next;

            start = str.substr(pos, limit - pos);
            coreStart = std::atoi(start.c_str());
            total++;

            if (cores != nullptr)
            {
                cores->push_back(coreStart);
            }

            // done
            if (next == std::string::npos)
            {
                break;
            }
        }

        // found a core range => parse the start and end and add them.
        else if (str[next] == '-')
        {
            size_t posEnd = next + 1;

            start = str.substr(pos, next - pos);
            coreStart = std::atoi(start.c_str());

            next = str.find_first_of(",", posEnd);
            end = str.substr(posEnd, next - posEnd);
            coreEnd = std::atoi(end.c_str());

            // add the total core count.  Note that the ranges are always inclusive so we always
            // need to add 1 to the calculated range.
            total += coreEnd - coreStart + 1;

            // add all the cores to the list (inclusive on both ends of the range).
            if (cores != nullptr)
            {
                for (int32_t i = coreStart; i <= coreEnd; i++)
                {
                    cores->push_back(i);
                }
            }

            // nothing left in the source string => done.
            if (next == std::string::npos)
            {
                break;
            }
        }

        pos = next + 1;
    }

    return total;
}

inline int readCpuSetCoreCountFromFile(const char* file, char* buffer = nullptr, size_t len = 0) noexcept
{
    char localBuffer[1024];

    if (buffer == nullptr || len == 0)
    {
        buffer = localBuffer;
        len = CARB_COUNTOF(localBuffer);
    }

    // read the first line from the file.  This will contain the CPU core list.  This list is
    // comma separated where each value may either be a single core index or a core index range.
    // For example, "0-1,3" would indicate that cores 0, 1, and 3 are available and "0-3,7,9"
    // would indicate that cores 0, 1, 2, 3, 7, and 9 are available.
    if (!readLineFromFile(file, buffer, len))
    {
        return -1;
    }

    // return the total number of cores included in the CPU set.
    return getCpuSetCoreCount(buffer);
}

inline bool isRunningInContainer() noexcept
{
    FILE* fp;

    // first (and easiest) check is to check whether the `/.dockerenv` file exists.  This file
    // is not necessarily always present though.
    if (access("/.dockerenv", F_OK) == 0)
    {
        return true;
    }

    // a more reliable but more expensive check is to verify the control group of `init`.  If
    // running under docker, all of the entries will have a path that starts with `/docker` or
    // `/lxc` instead of just `/`.
    // Kubernetes seems to use `:/kubepods.slice`.
    fp = fopen("/proc/1/cgroup", "r");

    if (fp != nullptr)
    {
        char line[256];

        while (fgets(line, CARB_COUNTOF(line), fp) != nullptr)
        {
            if (feof(fp) || ferror(fp))
                break;

            if (strstr(line, ":/docker") != nullptr || strstr(line, ":/lxc") != nullptr ||
                strstr(line, ":/kubepods") != nullptr)
            {
                return true;
            }
        }

        fclose(fp);
    }

    return false;
}

inline bool getCgroupCpuQuota(CpuQuota& quota)
{
    // attempt to read the CPU quota from cgroup v2 first.
    quota = readCpuQuotaFromFile("/sys/fs/cgroup/cpu.max");

    if (quota.quota > 0 && quota.period > 0)
    {
        return true;
    }

    // attempt to read the CPU quota from cgroup v1 next.
    quota.quota = detail::readIntFromFile("/sys/fs/cgroup/cpu/cpu.cfs_quota_us");
    quota.period = detail::readIntFromFile("/sys/fs/cgroup/cpu/cpu.cfs_period_us");

    if (quota.quota > 0 && quota.period > 0)
    {
        return true;
    }

    // CPU quota not available.
    return false;
}

inline int32_t getCgroupCpuQuota() noexcept
{
    CpuQuota quota;

    // attempt to read the CPU quota from cgroup v2 and cgroup v1.  This can affect how much
    // of the assigned cores scheduled time the processes in the container are effectively allowed
    // to make use of.
    if (getCgroupCpuQuota(quota))
    {
        return ::carb_max(1, (quota.quota + (quota.period / 2)) / quota.period);
    }

    return -1;
}

inline int getDockerCpuLimit() noexcept
{
    // See:
    // https://docs.docker.com/config/containers/resource_constraints/#cpu
    // https://engineering.squarespace.com/blog/2017/understanding-linux-container-scheduling
    // https://docs.kernel.org/admin-guide/cgroup-v1/cpusets.html
    // https://docs.kernel.org/admin-guide/cgroup-v2.html

    // ****** read the CPU core limit if defined ******
    // first attempt to read from the cgroup v2 and v1 CPU set lists to get the total core count.
    // We'll try the 'effective' CPU set list first.  If they doesn't exist or is empty, we'll fall
    // back to the main CPU set lists.  Note that the v1 and v2 cgroups will never simultaneously
    // exist in the container and that Docker will always respect the host system's cgroup version
    // regardless of the container's base image version.
    int32_t coreCount;

    // try to read the cgroup v2 effective CPU set.
    coreCount = detail::readCpuSetCoreCountFromFile("/sys/fs/cgroup/cpuset.cpus.effective");

    // fall back to the cgroup v2 main CPU set.
    if (coreCount < 0)
        coreCount = detail::readCpuSetCoreCountFromFile("/sys/fs/cgroup/cpuset.cpus");

    // fall back to the cgroup v1 effective CPU set.
    if (coreCount < 0)
        coreCount = detail::readCpuSetCoreCountFromFile("/sys/fs/cgroup/cpuset/cpuset.effective_cpus");

    // fall back to the cgroup v1 main CPU set.
    if (coreCount < 0)
        coreCount = detail::readCpuSetCoreCountFromFile("/sys/fs/cgroup/cpuset/cpuset.cpus");

    // ****** read the CPU usage quota if defined ******
    // next attempt to read the CPU quota from cgroup v2 and cgroup v1.  This can affect how much
    // of the assigned cores scheduled time the processes in the container are effectively allowed
    // to make use of.
    int32_t coreQuota = getCgroupCpuQuota();

    // ****** calculate the effective CPU core limit ******
    // a CPU set has been assigned and has limited the core count for the container => calculate
    //   the effective core count from the CPU set and the CPU quota.
    if (coreCount > 0)
    {
        // a CPU quota has been defined as well => caclulate the effective core count from both
        //   values.  Note that the CPU usage quota can oversubscribe the assigned core count.
        //   In that case we'd want to return the minimum of the CPU set core count and the
        //   effective count from the CPU quota.
        if (coreQuota > 0)
            return ::carb_min(coreQuota, coreCount);

        // no CPU usage quota defined => just return the CPU set core count.
        return coreCount;
    }

    // no CPU set core count defined => just return the effective core count from the CPU quota.
    else if (coreQuota > 0)
    {
        return coreQuota;
    }

    // no CPU usage limit.  This will effectively limit it to the host's bare metal CPU core
    // count.
    return -1;
}

} // namespace detail
#    endif

inline int getDockerCpuLimit() noexcept
{
    static int s_coreCount = detail::getDockerCpuLimit();
    return s_coreCount;
}

inline bool isRunningInContainer()
{
    static bool s_inContainer = detail::isRunningInContainer();
    return s_inContainer;
}

#else

inline int getDockerCpuLimit() noexcept
{
    return -1;
}

inline bool isRunningInContainer() noexcept
{
    return false;
}

#endif

} // namespace extras
} // namespace omni