AudioUtils.h

File members: carb/audio/AudioUtils.h
// Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
//
// NVIDIA CORPORATION and its licensors retain all intellectual property
// and proprietary rights in and to this software, related documentation
// and any modifications thereto. Any use, reproduction, disclosure or
// distribution of this software and related documentation without an express
// license agreement from NVIDIA CORPORATION is strictly prohibited.
//
#pragma once

#ifndef DOXYGEN_SHOULD_SKIP_THIS
#    define _USE_MATH_DEFINES
#endif

#include "../Framework.h"
#include "../logging/Log.h"
#include "../math/Util.h"
#include "AudioTypes.h"
#include "IAudioData.h"
#include "IAudioPlayback.h"
#include "IAudioUtils.h"
#include "IAudioCapture.h"

#include <atomic>
#include <limits.h>
#include <math.h>
#include <string.h>

#if CARB_PLATFORM_WINDOWS
#    define strdup _strdup
#endif

namespace carb
{
namespace audio
{

template <typename T>
constexpr float degreesToRadians(T degrees)
{
    return degrees * (float(M_PI) / 180.f);
}

constexpr double degreesToRadians(double degrees)
{
    return degrees * (M_PI / 180.0);
}

template <typename T>
constexpr float radiansToDegrees(T radians)
{
    return (radians * (180.f / float(M_PI)));
}

constexpr double radiansToDegrees(double radians)
{
    return (radians * (180.0 / M_PI));
}

template <typename T>
size_t getSetBitCount(T value_)
{
    return math::popCount(value_);
}

constexpr size_t getSpeakerCountForMode(SpeakerMode mode)
{
    switch (mode)
    {
        case kSpeakerModeDefault:
            return 0;

        case kSpeakerModeMono:
            return 1;

        case kSpeakerModeStereo:
            return 2;

        case kSpeakerModeQuad:
            return 4;

        case kSpeakerModeFourPointOne:
            return 5;

        case kSpeakerModeFivePointOne:
            return 6;

        case kSpeakerModeSixPointOne:
            return 7;

        case kSpeakerModeSevenPointOne:
            return 8;

        case kSpeakerModeNinePointOne:
            return 10;

        case kSpeakerModeSevenPointOnePointFour:
            return 12;

        case kSpeakerModeNinePointOnePointFour:
            return 14;

        case kSpeakerModeNinePointOnePointSix:
            return 16;

        default:
            return getSetBitCount(mode);
    }
}

constexpr SpeakerMode getSpeakerModeForCount(size_t channels)
{
    switch (channels)
    {
        case 1:
            return kSpeakerModeMono;

        case 2:
            return kSpeakerModeStereo;

        case 3:
            return kSpeakerModeTwoPointOne;

        case 4:
            return kSpeakerModeQuad;

        case 5:
            return kSpeakerModeFourPointOne;

        case 6:
            return kSpeakerModeFivePointOne;

        case 7:
            return kSpeakerModeSixPointOne;

        case 8:
            return kSpeakerModeSevenPointOne;

        case 10:
            return kSpeakerModeNinePointOne;

        case 12:
            return kSpeakerModeSevenPointOnePointFour;

        case 14:
            return kSpeakerModeNinePointOnePointFour;

        case 16:
            return kSpeakerModeNinePointOnePointSix;

        default:
            return kSpeakerModeDefault;
    }
}

constexpr SpeakerMode getSpeakerFlagsForCount(size_t channels)
{
    if (channels >= kMaxChannels)
        return 0xffffffffffffffffull;

    return (1ull << channels) - 1;
}

constexpr Speaker getSpeakerFromSpeakerFlag(SpeakerMode flag)
{
    switch (flag)
    {
        case fSpeakerFlagFrontLeft:
            return Speaker::eFrontLeft;

        case fSpeakerFlagFrontRight:
            return Speaker::eFrontRight;

        case fSpeakerFlagFrontCenter:
            return Speaker::eFrontCenter;

        case fSpeakerFlagLowFrequencyEffect:
            return Speaker::eLowFrequencyEffect;

        case fSpeakerFlagSideLeft:
            return Speaker::eSideLeft;

        case fSpeakerFlagSideRight:
            return Speaker::eSideRight;

        case fSpeakerFlagBackLeft:
            return Speaker::eBackLeft;

        case fSpeakerFlagBackRight:
            return Speaker::eBackRight;

        case fSpeakerFlagBackCenter:
            return Speaker::eBackCenter;

        case fSpeakerFlagTopFrontLeft:
            return Speaker::eTopFrontLeft;

        case fSpeakerFlagTopFrontRight:
            return Speaker::eTopFrontRight;

        case fSpeakerFlagTopBackLeft:
            return Speaker::eTopBackLeft;

        case fSpeakerFlagTopBackRight:
            return Speaker::eTopBackRight;

        case fSpeakerFlagFrontLeftWide:
            return Speaker::eFrontLeftWide;

        case fSpeakerFlagFrontRightWide:
            return Speaker::eFrontRightWide;

        case fSpeakerFlagTopLeft:
            return Speaker::eTopLeft;

        case fSpeakerFlagTopRight:
            return Speaker::eTopRight;

        default:
            return Speaker::eCount;
    }
}

constexpr size_t getSpeakerFromSpeakerMode(SpeakerMode channelMask, size_t index)
{
    // no bits set in the channel mask -> nothing to do => fail.
    if (channelMask == 0)
        return kInvalidSpeakerName;

    SpeakerMode bit = 1;
    size_t i = 0;

    // walk through the channel mask searching for set bits.
    for (; bit != 0; bit <<= 1, i++)
    {
        // no speaker set for this bit => skip it.
        if ((channelMask & bit) == 0)
            continue;

        if (index == 0)
            return i;

        index--;
    }

    return kInvalidSpeakerName;
}

constexpr size_t sampleFormatToBitsPerSample(SampleFormat fmt)
{
    switch (fmt)
    {
        case SampleFormat::ePcm8:
            return 8;

        case SampleFormat::ePcm16:
            return 16;

        case SampleFormat::ePcm24:
            return 24;

        case SampleFormat::ePcm32:
            return 32;

        case SampleFormat::ePcmFloat:
            return 32;

        default:
            return 0;
    }
}

constexpr SampleFormat bitsPerSampleToIntegerPcmSampleFormat(size_t bps)
{
    switch (bps)
    {
        case 8:
            return SampleFormat::ePcm8;

        case 16:
            return SampleFormat::ePcm16;

        case 24:
            return SampleFormat::ePcm24;

        case 32:
            return SampleFormat::ePcm32;

        default:
            return SampleFormat::eCount;
    }
}

constexpr size_t millisecondsToFrames(size_t timeInMilliseconds, size_t frameRate)
{
    return (frameRate * timeInMilliseconds) / 1000;
}

constexpr size_t microsecondsToFrames(size_t timeInMicroseconds, size_t frameRate)
{
    return (frameRate * timeInMicroseconds) / 1000000;
}

inline size_t millisecondsToFrames(size_t timeInMilliseconds, const SoundFormat* format)
{
    return millisecondsToFrames(timeInMilliseconds, format->frameRate);
}

inline size_t microsecondsToFrames(size_t timeInMicroseconds, const SoundFormat* format)
{
    return microsecondsToFrames(timeInMicroseconds, format->frameRate);
}

constexpr size_t millisecondsToBytes(size_t timeInMilliseconds, size_t frameRate, size_t channels, size_t bps)
{
    return (timeInMilliseconds * frameRate * channels * bps) / (1000 * CHAR_BIT);
}

constexpr size_t microsecondsToBytes(size_t timeInMicroseconds, size_t frameRate, size_t channels, size_t bps)
{
    return (timeInMicroseconds * frameRate * channels * bps) / (1000000 * CHAR_BIT);
}

constexpr size_t millisecondsToBytes(size_t timeInMilliseconds, size_t frameRate, size_t channels, SampleFormat format)
{
    return millisecondsToBytes(timeInMilliseconds, frameRate, channels, sampleFormatToBitsPerSample(format));
}

constexpr size_t microsecondsToBytes(size_t timeInMicroseconds, size_t frameRate, size_t channels, SampleFormat format)
{
    return microsecondsToBytes(timeInMicroseconds, frameRate, channels, sampleFormatToBitsPerSample(format));
}

inline size_t millisecondsToBytes(size_t timeInMilliseconds, const SoundFormat* format)
{
    return millisecondsToBytes(timeInMilliseconds, format->frameRate, format->channels, format->bitsPerSample);
}

inline size_t microsecondsToBytes(size_t timeInMicroseconds, const SoundFormat* format)
{
    return microsecondsToBytes(timeInMicroseconds, format->frameRate, format->channels, format->bitsPerSample);
}

constexpr size_t framesToMilliseconds(size_t frames, size_t frameRate)
{
    return (frames * 1000) / frameRate;
}

constexpr size_t framesToMicroseconds(size_t frames, size_t frameRate)
{
    return (frames * 1000000) / frameRate;
}

inline size_t framesToMilliseconds(size_t frames, const SoundFormat* format)
{
    return framesToMilliseconds(frames, format->frameRate);
}

inline size_t framesToMicroseconds(size_t frames, const SoundFormat* format)
{
    return framesToMicroseconds(frames, format->frameRate);
}

constexpr size_t framesToBytes(size_t frames, size_t channels, size_t bps)
{
    return (frames * channels * bps) / CHAR_BIT;
}

constexpr size_t framesToBytes(size_t frames, size_t channels, SampleFormat format)
{
    return framesToBytes(frames, channels, sampleFormatToBitsPerSample(format));
}

inline size_t framesToBytes(size_t frames, const SoundFormat* format)
{
    return framesToBytes(frames, format->channels, format->bitsPerSample);
}

constexpr size_t bytesToFrames(size_t bytes, size_t channels, size_t bps)
{
    return (bytes * CHAR_BIT) / (channels * bps);
}

constexpr size_t bytesToFrames(size_t bytes, size_t channels, SampleFormat format)
{
    size_t bps = sampleFormatToBitsPerSample(format);
    if (bps == 0)
    {
        CARB_LOG_ERROR("attempting to convert bytes to frames in a variable bitrate format (%d), return 0", int(format));
        return 0;
    }

    return bytesToFrames(bytes, channels, bps);
}

inline size_t bytesToFrames(size_t bytes, const SoundFormat* format)
{
    if (format->bitsPerSample == 0)
    {
        CARB_LOG_ERROR(
            "attempting to convert bytes to frames in a variable bitrate format (%d), return 0", int(format->format));
        return 0;
    }

    return bytesToFrames(bytes, format->channels, format->bitsPerSample);
}

constexpr size_t bytesToMilliseconds(size_t bytes, size_t frameRate, size_t channels, size_t bps)
{
    return (bytesToFrames(bytes * 1000, channels, bps)) / frameRate;
}

constexpr size_t bytesToMicroseconds(size_t bytes, size_t frameRate, size_t channels, size_t bps)
{
    return bytesToFrames(bytes * 1000000, channels, bps) / frameRate;
}

constexpr size_t bytesToMilliseconds(size_t bytes, size_t frameRate, size_t channels, SampleFormat format)
{
    return bytesToMilliseconds(bytes, frameRate, channels, sampleFormatToBitsPerSample(format));
}

constexpr size_t bytesToMicroseconds(size_t bytes, size_t frameRate, size_t channels, SampleFormat format)
{
    return bytesToMicroseconds(bytes, frameRate, channels, sampleFormatToBitsPerSample(format));
}

inline size_t bytesToMilliseconds(size_t bytes, const SoundFormat* format)
{
    return bytesToMilliseconds(bytes, format->frameRate, format->channels, format->bitsPerSample);
}

inline size_t bytesToMicroseconds(size_t bytes, const SoundFormat* format)
{
    return bytesToMicroseconds(bytes, format->frameRate, format->channels, format->bitsPerSample);
}

inline size_t convertUnits(size_t input, UnitType inputUnits, UnitType outputUnits, const SoundFormat* format)
{
    CARB_ASSERT(format != nullptr);

    switch (inputUnits)
    {
        case UnitType::eBytes:
            switch (outputUnits)
            {
                case UnitType::eBytes:
                    return input;

                case UnitType::eFrames:
                    return bytesToFrames(input, format);

                case UnitType::eMilliseconds:
                    return bytesToMilliseconds(input, format);

                case UnitType::eMicroseconds:
                    return bytesToMicroseconds(input, format);

                default:
                    break;
            }

            break;

        case UnitType::eFrames:
            switch (outputUnits)
            {
                case UnitType::eBytes:
                    return framesToBytes(input, format);

                case UnitType::eFrames:
                    return input;

                case UnitType::eMilliseconds:
                    return framesToMilliseconds(input, format);

                case UnitType::eMicroseconds:
                    return framesToMicroseconds(input, format);

                default:
                    break;
            }

            break;

        case UnitType::eMilliseconds:
            switch (outputUnits)
            {
                case UnitType::eBytes:
                    return millisecondsToBytes(input, format);

                case UnitType::eFrames:
                    return millisecondsToFrames(input, format);

                case UnitType::eMilliseconds:
                    return input;

                case UnitType::eMicroseconds:
                    return input * 1000;

                default:
                    break;
            }

            break;

        case UnitType::eMicroseconds:
            switch (outputUnits)
            {
                case UnitType::eBytes:
                    return microsecondsToBytes(input, format);

                case UnitType::eFrames:
                    return microsecondsToFrames(input, format);

                case UnitType::eMilliseconds:
                    return input / 1000;

                case UnitType::eMicroseconds:
                    return input;

                default:
                    break;
            }

            break;

        default:
            break;
    }

    return 0;
}

constexpr size_t alignBytesToFrameCeil(size_t bytes, size_t channels, size_t bps)
{
    size_t blockSize = (channels * bps) / CHAR_BIT;
    size_t count = bytes + (blockSize - 1);
    return count - (count % blockSize);
}

inline size_t alignBytesToFrameCeil(size_t bytes, size_t channels, SampleFormat format)
{
    return alignBytesToFrameCeil(bytes, channels, sampleFormatToBitsPerSample(format));
}

inline size_t alignBytesToFrameCeil(size_t bytes, const SoundFormat* format)
{
    return alignBytesToFrameCeil(bytes, format->channels, format->bitsPerSample);
}

constexpr size_t alignBytesToFrameFloor(size_t bytes, size_t channels, size_t bps)
{
    size_t blockSize = (channels * bps) / CHAR_BIT;
    return bytes - (bytes % blockSize);
}

constexpr size_t alignBytesToFrameFloor(size_t bytes, size_t channels, SampleFormat format)
{
    return alignBytesToFrameFloor(bytes, channels, sampleFormatToBitsPerSample(format));
}

inline size_t alignBytesToFrameFloor(size_t bytes, const SoundFormat* format)
{
    return alignBytesToFrameFloor(bytes, format->channels, format->bitsPerSample);
}

inline void generateSoundFormat(
    SoundFormat* out, SampleFormat format, size_t channels, size_t frameRate, SpeakerMode mask = kSpeakerModeDefault)
{
    out->channels = channels;
    out->format = format;
    out->frameRate = frameRate;
    out->bitsPerSample = sampleFormatToBitsPerSample(out->format);
    out->frameSize = out->bitsPerSample / CHAR_BIT * out->channels;
    out->blockSize = out->frameSize; // PCM is 1 frame per block
    out->framesPerBlock = 1;
    out->channelMask = mask;
    out->validBitsPerSample = out->bitsPerSample;
}

inline void getSoundDataLoadDescDefaults(SoundDataLoadDesc* desc)
{
    *desc = {};
}

inline void getPlaySoundDescDefaults(PlaySoundDesc* desc)
{
    *desc = {};
}

inline void getConeDefaults(EntityCone* cone)
{
    cone->insideAngle = kConeAngleOmnidirectional;
    cone->outsideAngle = kConeAngleOmnidirectional;
    cone->volume = { 1.0f, 0.0f };
    cone->lowPassFilter = { 0.0f, 1.0f };
    cone->reverb = { 0.0f, 1.0f };
    cone->ext = nullptr;
}

inline void getRolloffDefaults(RolloffDesc* desc)
{
    desc->type = RolloffType::eInverse;
    desc->nearDistance = 0.0f;
    desc->farDistance = 10000.0f;
    desc->volume = nullptr;
    desc->lowFrequency = nullptr;
    desc->lowPassDirect = nullptr;
    desc->lowPassReverb = nullptr;
    desc->reverb = nullptr;
    desc->ext = nullptr;
}

inline SoundData* createEmptySound(const IAudioData* iface,
                                   SampleFormat fmt,
                                   size_t frameRate,
                                   size_t channels,
                                   size_t bufferLength,
                                   UnitType unitType = UnitType::eFrames,
                                   const char* name = nullptr)
{
    SoundDataLoadDesc desc = {};

    desc.flags |= fDataFlagEmpty;
    if (name == nullptr)
        desc.flags |= fDataFlagNoName;
    desc.name = name;
    desc.pcmFormat = fmt;
    desc.frameRate = frameRate;
    desc.channels = channels;
    desc.bufferLength = bufferLength;
    desc.bufferLengthType = unitType;

    return iface->createData(&desc);
}

inline SoundData* convertSoundFormat(const IAudioUtils* iface, SoundData* snd, SampleFormat newFmt)
{
    ConversionDesc desc = {};
    desc.flags = fConvertFlagCopy;
    desc.soundData = snd;
    desc.newFormat = newFmt;
    return iface->convert(&desc);
}

inline SoundData* convertToVorbis(const IAudioUtils* iface,
                                  SoundData* snd,
                                  float quality = 0.9f,
                                  bool nativeChannelOrder = false)
{
    VorbisEncoderSettings vorbis = {};
    ConversionDesc desc = {};

    desc.flags = fConvertFlagCopy;
    desc.soundData = snd;
    desc.newFormat = SampleFormat::eVorbis;
    desc.encoderSettings = &vorbis;

    vorbis.quality = quality;
    vorbis.nativeChannelOrder = nativeChannelOrder;

    return iface->convert(&desc);
}

inline SoundData* convertToFlac(const IAudioUtils* iface,
                                SoundData* snd,
                                uint32_t compressionLevel = 5,
                                uint32_t bitsPerSample = 0,
                                FlacFileType fileType = FlacFileType::eFlac,
                                bool streamableSubset = true,
                                uint32_t blockSize = 0,
                                bool verifyOutput = false)
{
    FlacEncoderSettings flac = {};
    ConversionDesc desc = {};

    desc.flags = fConvertFlagCopy;
    desc.soundData = snd;
    desc.newFormat = SampleFormat::eFlac;
    desc.encoderSettings = &flac;

    flac.compressionLevel = compressionLevel;
    flac.bitsPerSample = bitsPerSample;
    flac.fileType = fileType;
    flac.streamableSubset = streamableSubset;
    flac.blockSize = blockSize;
    flac.verifyOutput = verifyOutput;

    return iface->convert(&desc);
}

inline bool saveSoundToDisk(const IAudioUtils* iface,
                            SoundData* snd,
                            const char* fileName,
                            SampleFormat fmt = SampleFormat::eDefault,
                            SaveFlags flags = 0)
{
    SoundDataSaveDesc desc = {};

    desc.flags = flags;
    desc.format = fmt;
    desc.soundData = snd;
    desc.filename = fileName;

    return iface->saveToFile(&desc);
}

inline bool saveToDiskAsVorbis(const IAudioUtils* iface,
                               SoundData* snd,
                               const char* fileName,
                               float quality = 0.9f,
                               bool nativeChannelOrder = false,
                               SaveFlags flags = 0)
{
    VorbisEncoderSettings vorbis = {};
    SoundDataSaveDesc desc = {};

    desc.flags = flags;
    desc.format = SampleFormat::eVorbis;
    desc.soundData = snd;
    desc.filename = fileName;
    desc.encoderSettings = &vorbis;

    vorbis.quality = quality;
    vorbis.nativeChannelOrder = nativeChannelOrder;

    return iface->saveToFile(&desc);
}

inline bool saveToDiskAsFlac(const IAudioUtils* iface,
                             SoundData* snd,
                             const char* fileName,
                             uint32_t compressionLevel = 5,
                             uint32_t bitsPerSample = 0,
                             FlacFileType fileType = FlacFileType::eFlac,
                             bool streamableSubset = true,
                             uint32_t blockSize = 0,
                             bool verifyOutput = false,
                             SaveFlags flags = 0)
{
    FlacEncoderSettings flac = {};
    carb::audio::SoundDataSaveDesc desc = {};

    desc.flags = flags;
    desc.format = SampleFormat::eFlac;
    desc.soundData = snd;
    desc.filename = fileName;
    desc.encoderSettings = &flac;

    flac.compressionLevel = compressionLevel;
    flac.bitsPerSample = bitsPerSample;
    flac.fileType = fileType;
    flac.streamableSubset = streamableSubset;
    flac.blockSize = blockSize;
    flac.verifyOutput = verifyOutput;

    return iface->saveToFile(&desc);
}

inline bool saveToDiskAsOpus(const IAudioUtils* iface,
                             SoundData* snd,
                             const char* fileName,
                             uint32_t bitrate = 0,
                             OpusCodecUsage usage = OpusCodecUsage::eGeneral,
                             int8_t complexity = -1,
                             uint8_t blockSize = 48,
                             uint8_t packetLoss = 0,
                             uint8_t bandwidth = 20,
                             uint8_t bitDepth = 0,
                             int16_t outputGain = 0,
                             OpusEncoderFlags flags = 0,
                             SaveFlags saveFlags = 0)
{
    OpusEncoderSettings opus = {};
    carb::audio::SoundDataSaveDesc desc = {};

    desc.flags = saveFlags;
    desc.format = SampleFormat::eOpus;
    desc.soundData = snd;
    desc.filename = fileName;
    desc.encoderSettings = &opus;

    opus.flags = flags;
    opus.bitrate = bitrate;
    opus.usage = usage;
    opus.complexity = complexity;
    opus.blockSize = blockSize;
    opus.packetLoss = packetLoss;
    opus.bandwidth = bandwidth;
    opus.bitDepth = bitDepth;
    opus.outputGain = outputGain;

    return iface->saveToFile(&desc);
}

inline SoundData* createSoundFromFile(const IAudioData* iface,
                                      const char* filename,
                                      bool streaming = false,
                                      size_t autoStream = 0,
                                      SampleFormat fmt = SampleFormat::eDefault,
                                      DataFlags flags = 0)
{
    constexpr DataFlags kValidFlags = fDataFlagSkipMetaData | fDataFlagSkipEventPoints | fDataFlagCalcPeaks;
    SoundDataLoadDesc desc = {};

    if ((flags & ~kValidFlags) != 0)
    {
        CARB_LOG_ERROR("invalid flags 0x%08" PRIx32, flags);
        return nullptr;
    }

    desc.flags = flags;
    desc.name = filename;
    desc.pcmFormat = fmt;
    desc.autoStreamThreshold = autoStream;

    if (streaming)
        desc.flags |= fDataFlagStream;

    else
        desc.flags |= fDataFlagDecode;

    return iface->createData(&desc);
}

inline SoundData* createSoundFromBlob(const IAudioData* iface,
                                      const void* dataBlob,
                                      size_t dataLength,
                                      bool streaming = false,
                                      size_t autoStream = 0,
                                      SampleFormat fmt = SampleFormat::eDefault,
                                      DataFlags flags = 0)
{
    constexpr DataFlags kValidFlags =
        fDataFlagSkipMetaData | fDataFlagSkipEventPoints | fDataFlagCalcPeaks | fDataFlagUserMemory;
    SoundDataLoadDesc desc = {};

    if ((flags & ~kValidFlags) != 0)
    {
        CARB_LOG_ERROR("invalid flags 0x%08" PRIx32, flags);
        return nullptr;
    }

    desc.flags = fDataFlagInMemory | flags;
    desc.dataBlob = dataBlob;
    desc.dataBlobLengthInBytes = dataLength;
    desc.pcmFormat = fmt;
    desc.autoStreamThreshold = autoStream;

    if (streaming)
        desc.flags |= fDataFlagStream;

    else
        desc.flags |= fDataFlagDecode;

    return iface->createData(&desc);
}

inline SoundData* createSoundFromRawPcmBlob(
    const IAudioData* iface, const void* dataBlob, size_t dataLength, size_t frames, const SoundFormat* format)
{
    SoundDataLoadDesc desc = {};

    desc.flags = carb::audio::fDataFlagFormatRaw | carb::audio::fDataFlagInMemory;
    desc.dataBlob = dataBlob;
    desc.dataBlobLengthInBytes = dataLength;
    desc.channels = format->channels;
    desc.frameRate = format->frameRate;
    desc.encodedFormat = format->format;
    desc.pcmFormat = format->format;
    desc.bufferLength = frames;
    desc.bufferLengthType = carb::audio::UnitType::eFrames;

    return iface->createData(&desc);
}

inline Voice* playOneShotSound(const IAudioPlayback* iface, Context* ctx, SoundData* snd, bool spatial = false)
{
    PlaySoundDesc desc = {};
    VoiceParams params = {};

    // desc to play the sound once fully in a non-spatial manner
    desc.sound = snd;
    if (spatial)
    {
        desc.validParams = fVoiceParamPlaybackMode;
        desc.params = &params;
        params.playbackMode = fPlaybackModeSpatial;
    }

    return iface->playSound(ctx, &desc);
}

inline Voice* playLoopingSound(const IAudioPlayback* iface,
                               Context* ctx,
                               SoundData* snd,
                               size_t loopCount = kEventPointLoopInfinite,
                               bool spatial = false)
{
    EventPoint loopPoint = {};
    PlaySoundDesc desc = {};
    VoiceParams params = {};

    // desc to play the sound once fully in a non-spatial manner
    desc.sound = snd;
    desc.loopPoint.loopPoint = &loopPoint;
    loopPoint.loopCount = loopCount;
    if (spatial)
    {
        desc.validParams = fVoiceParamPlaybackMode;
        desc.params = &params;
        params.playbackMode = fPlaybackModeSpatial;
    }

    return iface->playSound(ctx, &desc);
}

inline void setVoiceVolume(const IAudioPlayback* iface, Voice* voice, float volume)
{
    carb::audio::VoiceParams params = {};
    params.volume = volume;
    iface->setVoiceParameters(voice, fVoiceParamVolume, &params);
}

inline void setVoiceFrequencyRatio(const IAudioPlayback* iface, Voice* voice, float frequencyRatio)
{
    carb::audio::VoiceParams params = {};
    params.frequencyRatio = frequencyRatio;
    iface->setVoiceParameters(voice, fVoiceParamFrequencyRatio, &params);
}

inline void pauseVoice(const IAudioPlayback* iface, Voice* voice)
{
    carb::audio::VoiceParams params = {};
    params.playbackMode = fPlaybackModePaused;
    iface->setVoiceParameters(voice, fVoiceParamPause, &params);
}

inline void unpauseVoice(const IAudioPlayback* iface, Voice* voice)
{
    carb::audio::VoiceParams params = {};
    iface->setVoiceParameters(voice, fVoiceParamPause, &params);
}

inline void muteVoice(const IAudioPlayback* iface, Voice* voice)
{
    carb::audio::VoiceParams params = {};
    params.playbackMode = fPlaybackModeMuted;
    iface->setVoiceParameters(voice, fVoiceParamMute, &params);
}

inline void unmuteVoice(const IAudioPlayback* iface, Voice* voice)
{
    carb::audio::VoiceParams params = {};
    iface->setVoiceParameters(voice, fVoiceParamMute, &params);
}

inline void setVoiceMatrix(const IAudioPlayback* iface, Voice* voice, const float* matrix)
{
    carb::audio::VoiceParams params = {};
    params.matrix = matrix;
    iface->setVoiceParameters(voice, fVoiceParamMatrix, &params);
}

inline int16_t calculateOpusGain(float gain)
{
    // multiply by 256 to convert this into a s7.8 fixed point value.
    // IEEE754 float has 23 bits in the mantissa, so we can represent the 16
    // bit range losslessly with a float
    gain *= 256.f;

    // clamp the result in case the gain was too large, then truncate the
    // fractional part
    return int16_t(CARB_CLAMP(gain, float(INT16_MIN), float(INT16_MAX)));
}

inline float calculateGainFromLinearScale(float linear)
{
    // gain is calculated as 20 * log10(linear)
    return 20.f * log10f(linear);
}

inline float calculateLinearScaleFromGain(float gain)
{
    return powf(10, gain * (1.f / 20.f));
}

inline size_t incrementWithWrap(size_t counter, size_t modulo)
{
    CARB_ASSERT(modulo > 0);
    CARB_ASSERT(counter < modulo);
    return (counter + 1 == modulo) ? 0 : counter + 1;
}

inline size_t decrementWithWrap(size_t counter, size_t modulo)
{
    CARB_ASSERT(modulo > 0);
    CARB_ASSERT(counter <= modulo);
    return (counter == 0) ? modulo - 1 : counter - 1;
}

inline int64_t estimateVideoLatency(double fps, double framesInFlight, int64_t perceptibleDelay = kImperceptibleDelay)
{
    constexpr int64_t kMinLatency = 20'000;
    double usPerFrame;

    if (fps == 0.0)
        return 0;

    usPerFrame = 1'000'000.0 / fps;

    // the current delay is less than the requested perceptible latency time => clamp the
    //   estimated delay down to zero.
    if (usPerFrame * framesInFlight <= double(perceptibleDelay))
        return 0;

    // calculate the estimated delay in microseconds.  Note that this will fudge the calculated
    // total latency by a small amount because there is an expected minimum small latency in
    // queuing a new voice already.
    return (int64_t)((usPerFrame * framesInFlight) - double(CARB_MIN(perceptibleDelay / 2, kMinLatency)));
}

} // namespace audio
} // namespace carb