carb/audio/IAudioData.h

File members: carb/audio/IAudioData.h

// Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
//
// NVIDIA CORPORATION and its licensors retain all intellectual property
// and proprietary rights in and to this software, related documentation
// and any modifications thereto. Any use, reproduction, disclosure or
// distribution of this software and related documentation without an express
// license agreement from NVIDIA CORPORATION is strictly prohibited.
//
#pragma once

#include "../Interface.h"
#include "../assets/IAssets.h"
#include "AudioTypes.h"

namespace carb
{
namespace audio
{

/************************************* Interface Objects *****************************************/
struct SoundData DOXYGEN_EMPTY_CLASS;

struct CodecState DOXYGEN_EMPTY_CLASS;

/********************************* Sound Data Object Creation ************************************/
constexpr uint32_t kInstancesUnlimited = 0;

typedef uint32_t DataFlags;

constexpr DataFlags fDataFlagFormatMask = 0x000000ff;

constexpr DataFlags fDataFlagFormatAuto = 0x00000000;

constexpr DataFlags fDataFlagFormatRaw = 0x00000001;

constexpr DataFlags fDataFlagFormatPcm = 0x00000002;

constexpr DataFlags fDataFlagSkipMetaData = 0x00200000;

constexpr DataFlags fDataFlagSkipEventPoints = 0x00400000;

constexpr DataFlags fDataFlagCalcPeaks = 0x01000000;

constexpr DataFlags fDataFlagInMemory = 0x02000000;

constexpr DataFlags fDataFlagUserMemory = 0x04000000;

constexpr DataFlags fDataFlagEmpty = 0x08000000;

constexpr DataFlags fDataFlagUserDecode = 0x10000000;

constexpr DataFlags fDataFlagStream = 0x20000000;

constexpr DataFlags fDataFlagDecode = 0x40000000;

constexpr DataFlags fDataFlagNoName = 0x80000000;
typedef AudioResult(CARB_ABI* SoundDataReadCallback)(const SoundData* soundData,
                                                     void* data,
                                                     size_t* dataLength,
                                                     void* context);

typedef AudioResult(CARB_ABI* SoundDataSetPosCallback)(const SoundData* soundData,
                                                       size_t position,
                                                       UnitType type,
                                                       void* context);

typedef void(CARB_ABI* SoundDataDestructionCallback)(const SoundData* soundData, void* context);

constexpr size_t kMemoryLimitThreshold = 1ull << 31;

struct SoundDataLoadDesc
{
    DataFlags flags = 0;

    uint32_t padding1{ 0 };

    const char* name = nullptr;

    const void* dataBlob = nullptr;

    size_t dataBlobLengthInBytes = 0;

    size_t channels = kDefaultChannelCount;

    SpeakerMode channelMask = kSpeakerModeDefault;

    size_t frameRate = kDefaultFrameRate;

    SampleFormat encodedFormat = SampleFormat::eDefault;

    SampleFormat pcmFormat = SampleFormat::eDefault;

    size_t bufferLength = 0;

    UnitType bufferLengthType = UnitType::eFrames;

    uint32_t padding2{ 0 };

    SoundDataReadCallback readCallback = nullptr;

    SoundDataSetPosCallback setPosCallback = nullptr;

    void* readCallbackContext = nullptr;

    SoundDataDestructionCallback destructionCallback = nullptr;

    void* destructionCallbackContext = nullptr;

    void* encoderSettings = nullptr;

    uint32_t maxInstances = kInstancesUnlimited;

    uint32_t padding3{ 0 };

    size_t autoStreamThreshold = 0;

    void* ext = nullptr;
};

struct SoundLoadParameters : public carb::assets::LoadParameters
{
    SoundDataLoadDesc params = {};
};

/************************************* Codec State Objects ***************************************/
enum class CodecPart
{
    eDecoder,

    eEncoder,
};

typedef uint64_t DecodeStateFlags;

constexpr DecodeStateFlags fDecodeStateFlagForceParse = 0x00000001;

constexpr DecodeStateFlags fDecodeStateFlagOptimizeSeek = 0x00000002;

constexpr DecodeStateFlags fDecodeStateFlagCoarseSeek = 0x00000004;

constexpr DecodeStateFlags fDecodeStateFlagSkipMetaData = 0x00000008;

constexpr DecodeStateFlags fDecodeStateFlagSkipEventPoints = 0x00000010;

struct DecodeStateDesc
{
    DecodeStateFlags flags;

    SoundData* soundData;

    SampleFormat outputFormat;

    void* readCallbackContext;

    void* ext;
};

typedef uint64_t EncodeStateFlags;

constexpr EncodeStateFlags fEncodeStateFlagNoExpandBuffer = 0x00000001;

constexpr EncodeStateFlags fEncodeStateFlagStripMetaData = 0x00000002;

constexpr EncodeStateFlags fEncodeStateFlagStripEventPoints = 0x00000004;

constexpr EncodeStateFlags fEncodeStateFlagStripPeaks = 0x00000008;
struct EncodeStateDesc
{
    EncodeStateFlags flags;

    const SoundData* soundData;

    SoundData* target;

    SampleFormat inputFormat;

    void* encoderSettings;

    void* ext;
};

struct CodecStateDesc
{
    CodecPart part;

    union
    {
        DecodeStateDesc decode;
        EncodeStateDesc encode;
    };

    void* ext;
};

struct WaveEncoderSettings
{
    bool alignDataChunk = true;
};

struct VorbisEncoderSettings
{
    uint32_t flags = 0;

    float quality = 0.9f;

    bool nativeChannelOrder = false;
};

enum class FlacFileType
{
    eFlac,

    eOgg,
};

struct FlacEncoderSettings
{
    uint32_t flags = 0;

    FlacFileType fileType = FlacFileType::eFlac;

    uint32_t bitsPerSample = 0;

    uint32_t compressionLevel = 5;

    uint32_t blockSize = 0;

    bool streamableSubset = true;

    bool verifyOutput = false;
};

enum class OpusCodecUsage
{
    eGeneral,

    eMusic,

    eVoice,
};

const uint32_t kOpusBitrateMax = 512001;

using OpusEncoderFlags = uint32_t;

constexpr OpusEncoderFlags fOpusEncoderFlagLowLatency = 0x00000001;

constexpr OpusEncoderFlags fOpusEncoderFlagConstantBitrate = 0x00000002;

constexpr OpusEncoderFlags fOpusEncoderFlagDiscontinuousTransmission = 0x00000004;

constexpr OpusEncoderFlags fOpusEncoderFlagDisablePrediction = 0x00000008;

constexpr OpusEncoderFlags fOpusEncoderFlagNativeChannelOrder = 0x00000010;

struct OpusEncoderSettings
{
    OpusEncoderFlags flags = 0;

    OpusCodecUsage usage = OpusCodecUsage::eGeneral;

    size_t frames = 0;

    uint32_t bitrate = 0;

    uint8_t blockSize = 48;

    uint8_t packetLoss = 0;

    int8_t complexity = -1;

    uint8_t bandwidth = 20;

    uint8_t bitDepth = 0;

    int16_t outputGain = 0;
};

typedef uint32_t CodecCaps;

constexpr CodecCaps fCodecCapsSupportsEncode = 0x00000001;

constexpr CodecCaps fCodecCapsSupportsDecode = 0x00000002;

constexpr CodecCaps fCodecCapsCompressed = 0x00000004;

constexpr CodecCaps fCodecCapsSupportsAdditionalParameters = 0x00000008;

constexpr CodecCaps fCodecCapsRequiresAdditionalParameters = 0x00000010;

constexpr CodecCaps fCodecCapsSupportsSetPosition = 0x00000020;

constexpr CodecCaps fCodecCapsHasFrameAccuratePosition = 0x00000040;

constexpr CodecCaps fCodecCapsHasAccurateAvailableValue = 0x00000080;
struct CodecInfo
{
    SampleFormat encodedFormat;

    SampleFormat preferredFormat;

    char name[256];

    char provider[256];

    char copyright[256];

    CodecCaps capabilities;

    size_t minBlockSize;

    size_t maxBlockSize;

    size_t minChannels;

    size_t maxChannels;
};

/*********************************** Metadata Definitions ***********************************/
constexpr char kMetaDataTagArchivalLocation[] = "Archival Location";
constexpr char kMetaDataTagCommissioned[] = "Commissioned";
constexpr char kMetaDataTagCropped[] = "Cropped";
constexpr char kMetaDataTagDimensions[] = "Dimensions";
constexpr char kMetaDataTagDisc[] = "Disc";
constexpr char kMetaDataTagDpi[] = "Dots Per Inch";
constexpr char kMetaDataTagEditor[] = "Editor";
constexpr char kMetaDataTagEngineer[] = "Engineer";
constexpr char kMetaDataTagKeywords[] = "Keywords";
constexpr char kMetaDataTagLanguage[] = "Language";
constexpr char kMetaDataTagLightness[] = "Lightness";
constexpr char kMetaDataTagMedium[] = "Medium";
constexpr char kMetaDataTagPaletteSetting[] = "Palette Setting";
constexpr char kMetaDataTagSubject[] = "Subject";
constexpr char kMetaDataTagSourceForm[] = "Source Form";
constexpr char kMetaDataTagSharpness[] = "Sharpness";
constexpr char kMetaDataTagTechnician[] = "Technician";
constexpr char kMetaDataTagWriter[] = "Writer";
constexpr char kMetaDataTagAlbum[] = "Album";
constexpr char kMetaDataTagArtist[] = "Artist";
constexpr char kMetaDataTagCopyright[] = "Copyright";
constexpr char kMetaDataTagCreationDate[] = "Date";
constexpr char kMetaDataTagDescription[] = "Description";
constexpr char kMetaDataTagGenre[] = "Genre";
constexpr char kMetaDataTagOrganization[] = "Organization";
constexpr char kMetaDataTagTitle[] = "Title";
constexpr char kMetaDataTagTrackNumber[] = "TrackNumber";
constexpr char kMetaDataTagEncoder[] = "Encoder";

constexpr char kMetaDataTagISRC[] = "ISRC";
constexpr char kMetaDataTagLicense[] = "License";
constexpr char kMetaDataTagPerformer[] = "Performer";
constexpr char kMetaDataTagVersion[] = "Version";
constexpr char kMetaDataTagLocation[] = "Location";
constexpr char kMetaDataTagContact[] = "Contact";
constexpr char kMetaDataTagComment[] = "Comment";

constexpr char kMetaDataTagSpeed[] = "Speed";

constexpr char kMetaDataTagStartTime[] = "StartTime";

constexpr char kMetaDataTagEndTime[] = "EndTime";

constexpr char kMetaDataTagSubGenre[] = "SubGenre";

constexpr char kMetaDataTagBpm[] = "BPM";

constexpr char kMetaDataTagPlaylistDelay[] = "PlaylistDelay";

constexpr char kMetaDataTagFileName[] = "FileName";

constexpr char kMetaDataTagOriginalAlbum[] = "OriginalTitle";
constexpr char kMetaDataTagOriginalWriter[] = "OriginalWriter";
constexpr char kMetaDataTagOriginalPerformer[] = "OriginalPerformer";
constexpr char kMetaDataTagOriginalYear[] = "OriginalYear";
constexpr char kMetaDataTagPublisher[] = "Publisher";
constexpr char kMetaDataTagRecordingDate[] = "RecordingDate";
constexpr char kMetaDataTagInternetRadioStationName[] = "InternetRadioStationName";
constexpr char kMetaDataTagInternetRadioStationOwner[] = "InternetRadioStationOwner";
constexpr char kMetaDataTagInternetRadioStationUrl[] = "InternetRadioStationUrl";
constexpr char kMetaDataTagPaymentUrl[] = "PaymentUrl";
constexpr char kMetaDataTagInternetCommercialInformationUrl[] = "CommercialInformationUrl";
constexpr char kMetaDataTagInternetCopyrightUrl[] = "CopyrightUrl";
constexpr char kMetaDataTagWebsite[] = "Website";
constexpr char kMetaDataTagInternetArtistWebsite[] = "ArtistWebsite";
constexpr char kMetaDataTagAudioSourceWebsite[] = "AudioSourceWebsite";
constexpr char kMetaDataTagComposer[] = "Composer";
constexpr char kMetaDataTagOwner[] = "Owner";
constexpr char kMetaDataTagTermsOfUse[] = "TermsOfUse";
constexpr char kMetaDataTagInitialKey[] = "InitialKey";

constexpr const char* const kMetaDataTagClearAllTags = nullptr;

struct PeakVolumes
{
    size_t channels;

    size_t frame[kMaxChannels];

    float peak[kMaxChannels];

    size_t peakFrame;

    float peakVolume;
};

typedef uint32_t EventPointId;

constexpr size_t kEventPointInvalidFrame = ~0ull;

constexpr size_t kEventPointLoopInfinite = SIZE_MAX;

struct EventPoint
{
    EventPointId id;

    size_t frame;

    const char* label = nullptr;

    const char* text = nullptr;

    size_t length = 0;

    size_t loopCount = 0;

    size_t playIndex = 0;

    UserData userData = {};

    void* ext = nullptr;
};

constexpr EventPoint* const kEventPointTableClear = nullptr;

/******************************** Sound Data Management Interface ********************************/
struct IAudioData
{
    CARB_PLUGIN_INTERFACE("carb::audio::IAudioData", 1, 0)

    /*************************** Sound Data Creation and Management ******************************/
    SoundData*(CARB_ABI* createData)(const SoundDataLoadDesc* desc);

    SoundData*(CARB_ABI* acquire)(SoundData* sound);

    size_t(CARB_ABI* release)(SoundData* sound);

    /*************************** Sound Data Information Accessors ********************************/
    DataFlags(CARB_ABI* getFlags)(const SoundData* sound);

    const char*(CARB_ABI* getName)(const SoundData* sound);

    size_t(CARB_ABI* getLength)(const SoundData* sound, UnitType units);

    bool(CARB_ABI* setValidLength)(SoundData* sound, size_t length, UnitType units);

    size_t(CARB_ABI* getValidLength)(const SoundData* sound, UnitType units);

    void*(CARB_ABI* getBuffer)(const SoundData* sound);

    const void*(CARB_ABI* getReadBuffer)(const SoundData* sound);

    size_t(CARB_ABI* getMemoryUsed)(const SoundData* sound);

    void(CARB_ABI* getFormat)(const SoundData* sound, CodecPart type, SoundFormat* format);

    bool(CARB_ABI* getPeakLevel)(const SoundData* sound, PeakVolumes* peaks);

    size_t(CARB_ABI* getEventPoints)(const SoundData* sound, EventPoint* events, size_t maxEvents);

    const EventPoint*(CARB_ABI* getEventPointById)(const SoundData* sound, EventPointId id);

    const EventPoint*(CARB_ABI* getEventPointByIndex)(const SoundData* sound, size_t index);

    const EventPoint*(CARB_ABI* getEventPointByPlayIndex)(const SoundData* sound, size_t playIndex);

    size_t(CARB_ABI* getEventPointMaxPlayIndex)(const SoundData* sound);

    bool(CARB_ABI* setEventPoints)(SoundData* sound, const EventPoint* eventPoints, size_t count);

    uint32_t(CARB_ABI* getMaxInstances)(const SoundData* sound);

    void(CARB_ABI* setMaxInstances)(SoundData* sound, uint32_t limit);

    void*(CARB_ABI* getUserData)(const SoundData* sound);

    void(CARB_ABI* setUserData)(SoundData* sound, const UserData* userData);

    /************************************ Sound Data Codec ***************************************/
    const CodecInfo*(CARB_ABI* getCodecFormatInfo)(SampleFormat encodedFormat, SampleFormat pcmFormat);

    CodecState*(CARB_ABI* createCodecState)(const CodecStateDesc* desc);

    void(CARB_ABI* destroyCodecState)(CodecState* decodeState);

    const void*(CARB_ABI* decodeData)(CodecState* decodeState, void* buffer, size_t framesToDecode, size_t* framesDecoded);

    size_t(CARB_ABI* getDecodeAvailable)(const CodecState* decodeState, UnitType units);

    size_t(CARB_ABI* getCodecPosition)(const CodecState* decodeState, UnitType units);

    bool(CARB_ABI* setCodecPosition)(CodecState* decodeState, size_t newPosition, UnitType units);

    size_t(CARB_ABI* getCodecDataSizeEstimate)(const CodecState* decodeState, size_t inputBytes);

    size_t(CARB_ABI* encodeData)(CodecState* encodeState, const void* buffer, size_t lengthInFrames);

    /***************************** Sound Data Metadata Information ********************************/
    const char*(CARB_ABI* getMetaDataTagName)(const SoundData* sound, size_t index, const char** value);

    const char*(CARB_ABI* getMetaData)(const SoundData* sound, const char* tagName);

    bool(CARB_ABI* setMetaData)(SoundData* sound, const char* tagName, const char* tagValue);
};

} // namespace audio
} // namespace carb

#ifndef DOXYGEN_SHOULD_SKIP_THIS
CARB_ASSET(carb::audio::SoundData, 0, 1);
#endif