carb/audio/IAudioData.h
File members: carb/audio/IAudioData.h
// Copyright (c) 2019-2023, NVIDIA CORPORATION. All rights reserved.
//
// NVIDIA CORPORATION and its licensors retain all intellectual property
// and proprietary rights in and to this software, related documentation
// and any modifications thereto. Any use, reproduction, disclosure or
// distribution of this software and related documentation without an express
// license agreement from NVIDIA CORPORATION is strictly prohibited.
//
#pragma once
#include "../Interface.h"
#include "../assets/IAssets.h"
#include "AudioTypes.h"
namespace carb
{
namespace audio
{
/************************************* Interface Objects *****************************************/
struct SoundData DOXYGEN_EMPTY_CLASS;
struct CodecState DOXYGEN_EMPTY_CLASS;
/********************************* Sound Data Object Creation ************************************/
constexpr uint32_t kInstancesUnlimited = 0;
typedef uint32_t DataFlags;
constexpr DataFlags fDataFlagFormatMask = 0x000000ff;
constexpr DataFlags fDataFlagFormatAuto = 0x00000000;
constexpr DataFlags fDataFlagFormatRaw = 0x00000001;
constexpr DataFlags fDataFlagFormatPcm = 0x00000002;
constexpr DataFlags fDataFlagSkipMetaData = 0x00200000;
constexpr DataFlags fDataFlagSkipEventPoints = 0x00400000;
constexpr DataFlags fDataFlagCalcPeaks = 0x01000000;
constexpr DataFlags fDataFlagInMemory = 0x02000000;
constexpr DataFlags fDataFlagUserMemory = 0x04000000;
constexpr DataFlags fDataFlagEmpty = 0x08000000;
constexpr DataFlags fDataFlagUserDecode = 0x10000000;
constexpr DataFlags fDataFlagStream = 0x20000000;
constexpr DataFlags fDataFlagDecode = 0x40000000;
constexpr DataFlags fDataFlagNoName = 0x80000000;
typedef AudioResult(CARB_ABI* SoundDataReadCallback)(const SoundData* soundData,
                                                     void* data,
                                                     size_t* dataLength,
                                                     void* context);
typedef AudioResult(CARB_ABI* SoundDataSetPosCallback)(const SoundData* soundData,
                                                       size_t position,
                                                       UnitType type,
                                                       void* context);
typedef void(CARB_ABI* SoundDataDestructionCallback)(const SoundData* soundData, void* context);
constexpr size_t kMemoryLimitThreshold = 1ull << 31;
struct SoundDataLoadDesc
{
    DataFlags flags = 0;
    uint32_t padding1{ 0 };
    const char* name = nullptr;
    const void* dataBlob = nullptr;
    size_t dataBlobLengthInBytes = 0;
    size_t channels = kDefaultChannelCount;
    SpeakerMode channelMask = kSpeakerModeDefault;
    size_t frameRate = kDefaultFrameRate;
    SampleFormat encodedFormat = SampleFormat::eDefault;
    SampleFormat pcmFormat = SampleFormat::eDefault;
    size_t bufferLength = 0;
    UnitType bufferLengthType = UnitType::eFrames;
    uint32_t padding2{ 0 };
    SoundDataReadCallback readCallback = nullptr;
    SoundDataSetPosCallback setPosCallback = nullptr;
    void* readCallbackContext = nullptr;
    SoundDataDestructionCallback destructionCallback = nullptr;
    void* destructionCallbackContext = nullptr;
    void* encoderSettings = nullptr;
    uint32_t maxInstances = kInstancesUnlimited;
    uint32_t padding3{ 0 };
    size_t autoStreamThreshold = 0;
    void* ext = nullptr;
};
struct SoundLoadParameters : public carb::assets::LoadParameters
{
    SoundDataLoadDesc params = {};
};
/************************************* Codec State Objects ***************************************/
enum class CodecPart
{
    eDecoder,
    eEncoder,
};
typedef uint64_t DecodeStateFlags;
constexpr DecodeStateFlags fDecodeStateFlagForceParse = 0x00000001;
constexpr DecodeStateFlags fDecodeStateFlagOptimizeSeek = 0x00000002;
constexpr DecodeStateFlags fDecodeStateFlagCoarseSeek = 0x00000004;
constexpr DecodeStateFlags fDecodeStateFlagSkipMetaData = 0x00000008;
constexpr DecodeStateFlags fDecodeStateFlagSkipEventPoints = 0x00000010;
struct DecodeStateDesc
{
    DecodeStateFlags flags;
    SoundData* soundData;
    SampleFormat outputFormat;
    void* readCallbackContext;
    void* ext;
};
typedef uint64_t EncodeStateFlags;
constexpr EncodeStateFlags fEncodeStateFlagNoExpandBuffer = 0x00000001;
constexpr EncodeStateFlags fEncodeStateFlagStripMetaData = 0x00000002;
constexpr EncodeStateFlags fEncodeStateFlagStripEventPoints = 0x00000004;
constexpr EncodeStateFlags fEncodeStateFlagStripPeaks = 0x00000008;
struct EncodeStateDesc
{
    EncodeStateFlags flags;
    const SoundData* soundData;
    SoundData* target;
    SampleFormat inputFormat;
    void* encoderSettings;
    void* ext;
};
struct CodecStateDesc
{
    CodecPart part;
    union
    {
        DecodeStateDesc decode;
        EncodeStateDesc encode;
    };
    void* ext;
};
struct WaveEncoderSettings
{
    bool alignDataChunk = true;
};
struct VorbisEncoderSettings
{
    uint32_t flags = 0;
    float quality = 0.9f;
    bool nativeChannelOrder = false;
};
enum class FlacFileType
{
    eFlac,
    eOgg,
};
struct FlacEncoderSettings
{
    uint32_t flags = 0;
    FlacFileType fileType = FlacFileType::eFlac;
    uint32_t bitsPerSample = 0;
    uint32_t compressionLevel = 5;
    uint32_t blockSize = 0;
    bool streamableSubset = true;
    bool verifyOutput = false;
};
enum class OpusCodecUsage
{
    eGeneral,
    eMusic,
    eVoice,
};
const uint32_t kOpusBitrateMax = 512001;
using OpusEncoderFlags = uint32_t;
constexpr OpusEncoderFlags fOpusEncoderFlagLowLatency = 0x00000001;
constexpr OpusEncoderFlags fOpusEncoderFlagConstantBitrate = 0x00000002;
constexpr OpusEncoderFlags fOpusEncoderFlagDiscontinuousTransmission = 0x00000004;
constexpr OpusEncoderFlags fOpusEncoderFlagDisablePrediction = 0x00000008;
constexpr OpusEncoderFlags fOpusEncoderFlagNativeChannelOrder = 0x00000010;
struct OpusEncoderSettings
{
    OpusEncoderFlags flags = 0;
    OpusCodecUsage usage = OpusCodecUsage::eGeneral;
    size_t frames = 0;
    uint32_t bitrate = 0;
    uint8_t blockSize = 48;
    uint8_t packetLoss = 0;
    int8_t complexity = -1;
    uint8_t bandwidth = 20;
    uint8_t bitDepth = 0;
    int16_t outputGain = 0;
};
typedef uint32_t CodecCaps;
constexpr CodecCaps fCodecCapsSupportsEncode = 0x00000001;
constexpr CodecCaps fCodecCapsSupportsDecode = 0x00000002;
constexpr CodecCaps fCodecCapsCompressed = 0x00000004;
constexpr CodecCaps fCodecCapsSupportsAdditionalParameters = 0x00000008;
constexpr CodecCaps fCodecCapsRequiresAdditionalParameters = 0x00000010;
constexpr CodecCaps fCodecCapsSupportsSetPosition = 0x00000020;
constexpr CodecCaps fCodecCapsHasFrameAccuratePosition = 0x00000040;
constexpr CodecCaps fCodecCapsHasAccurateAvailableValue = 0x00000080;
struct CodecInfo
{
    SampleFormat encodedFormat;
    SampleFormat preferredFormat;
    char name[256];
    char provider[256];
    char copyright[256];
    CodecCaps capabilities;
    size_t minBlockSize;
    size_t maxBlockSize;
    size_t minChannels;
    size_t maxChannels;
};
/*********************************** Metadata Definitions ***********************************/
constexpr char kMetaDataTagArchivalLocation[] = "Archival Location";
constexpr char kMetaDataTagCommissioned[] = "Commissioned";
constexpr char kMetaDataTagCropped[] = "Cropped";
constexpr char kMetaDataTagDimensions[] = "Dimensions";
constexpr char kMetaDataTagDisc[] = "Disc";
constexpr char kMetaDataTagDpi[] = "Dots Per Inch";
constexpr char kMetaDataTagEditor[] = "Editor";
constexpr char kMetaDataTagEngineer[] = "Engineer";
constexpr char kMetaDataTagKeywords[] = "Keywords";
constexpr char kMetaDataTagLanguage[] = "Language";
constexpr char kMetaDataTagLightness[] = "Lightness";
constexpr char kMetaDataTagMedium[] = "Medium";
constexpr char kMetaDataTagPaletteSetting[] = "Palette Setting";
constexpr char kMetaDataTagSubject[] = "Subject";
constexpr char kMetaDataTagSourceForm[] = "Source Form";
constexpr char kMetaDataTagSharpness[] = "Sharpness";
constexpr char kMetaDataTagTechnician[] = "Technician";
constexpr char kMetaDataTagWriter[] = "Writer";
constexpr char kMetaDataTagAlbum[] = "Album";
constexpr char kMetaDataTagArtist[] = "Artist";
constexpr char kMetaDataTagCopyright[] = "Copyright";
constexpr char kMetaDataTagCreationDate[] = "Date";
constexpr char kMetaDataTagDescription[] = "Description";
constexpr char kMetaDataTagGenre[] = "Genre";
constexpr char kMetaDataTagOrganization[] = "Organization";
constexpr char kMetaDataTagTitle[] = "Title";
constexpr char kMetaDataTagTrackNumber[] = "TrackNumber";
constexpr char kMetaDataTagEncoder[] = "Encoder";
constexpr char kMetaDataTagISRC[] = "ISRC";
constexpr char kMetaDataTagLicense[] = "License";
constexpr char kMetaDataTagPerformer[] = "Performer";
constexpr char kMetaDataTagVersion[] = "Version";
constexpr char kMetaDataTagLocation[] = "Location";
constexpr char kMetaDataTagContact[] = "Contact";
constexpr char kMetaDataTagComment[] = "Comment";
constexpr char kMetaDataTagSpeed[] = "Speed";
constexpr char kMetaDataTagStartTime[] = "StartTime";
constexpr char kMetaDataTagEndTime[] = "EndTime";
constexpr char kMetaDataTagSubGenre[] = "SubGenre";
constexpr char kMetaDataTagBpm[] = "BPM";
constexpr char kMetaDataTagPlaylistDelay[] = "PlaylistDelay";
constexpr char kMetaDataTagFileName[] = "FileName";
constexpr char kMetaDataTagOriginalAlbum[] = "OriginalTitle";
constexpr char kMetaDataTagOriginalWriter[] = "OriginalWriter";
constexpr char kMetaDataTagOriginalPerformer[] = "OriginalPerformer";
constexpr char kMetaDataTagOriginalYear[] = "OriginalYear";
constexpr char kMetaDataTagPublisher[] = "Publisher";
constexpr char kMetaDataTagRecordingDate[] = "RecordingDate";
constexpr char kMetaDataTagInternetRadioStationName[] = "InternetRadioStationName";
constexpr char kMetaDataTagInternetRadioStationOwner[] = "InternetRadioStationOwner";
constexpr char kMetaDataTagInternetRadioStationUrl[] = "InternetRadioStationUrl";
constexpr char kMetaDataTagPaymentUrl[] = "PaymentUrl";
constexpr char kMetaDataTagInternetCommercialInformationUrl[] = "CommercialInformationUrl";
constexpr char kMetaDataTagInternetCopyrightUrl[] = "CopyrightUrl";
constexpr char kMetaDataTagWebsite[] = "Website";
constexpr char kMetaDataTagInternetArtistWebsite[] = "ArtistWebsite";
constexpr char kMetaDataTagAudioSourceWebsite[] = "AudioSourceWebsite";
constexpr char kMetaDataTagComposer[] = "Composer";
constexpr char kMetaDataTagOwner[] = "Owner";
constexpr char kMetaDataTagTermsOfUse[] = "TermsOfUse";
constexpr char kMetaDataTagInitialKey[] = "InitialKey";
constexpr const char* const kMetaDataTagClearAllTags = nullptr;
struct PeakVolumes
{
    size_t channels;
    size_t frame[kMaxChannels];
    float peak[kMaxChannels];
    size_t peakFrame;
    float peakVolume;
};
typedef uint32_t EventPointId;
constexpr size_t kEventPointInvalidFrame = ~0ull;
constexpr size_t kEventPointLoopInfinite = SIZE_MAX;
struct EventPoint
{
    EventPointId id;
    size_t frame;
    const char* label = nullptr;
    const char* text = nullptr;
    size_t length = 0;
    size_t loopCount = 0;
    size_t playIndex = 0;
    UserData userData = {};
    void* ext = nullptr;
};
constexpr EventPoint* const kEventPointTableClear = nullptr;
/******************************** Sound Data Management Interface ********************************/
struct IAudioData
{
    CARB_PLUGIN_INTERFACE("carb::audio::IAudioData", 1, 0)
    /*************************** Sound Data Creation and Management ******************************/
    SoundData*(CARB_ABI* createData)(const SoundDataLoadDesc* desc);
    SoundData*(CARB_ABI* acquire)(SoundData* sound);
    size_t(CARB_ABI* release)(SoundData* sound);
    /*************************** Sound Data Information Accessors ********************************/
    DataFlags(CARB_ABI* getFlags)(const SoundData* sound);
    const char*(CARB_ABI* getName)(const SoundData* sound);
    size_t(CARB_ABI* getLength)(const SoundData* sound, UnitType units);
    bool(CARB_ABI* setValidLength)(SoundData* sound, size_t length, UnitType units);
    size_t(CARB_ABI* getValidLength)(const SoundData* sound, UnitType units);
    void*(CARB_ABI* getBuffer)(const SoundData* sound);
    const void*(CARB_ABI* getReadBuffer)(const SoundData* sound);
    size_t(CARB_ABI* getMemoryUsed)(const SoundData* sound);
    void(CARB_ABI* getFormat)(const SoundData* sound, CodecPart type, SoundFormat* format);
    bool(CARB_ABI* getPeakLevel)(const SoundData* sound, PeakVolumes* peaks);
    size_t(CARB_ABI* getEventPoints)(const SoundData* sound, EventPoint* events, size_t maxEvents);
    const EventPoint*(CARB_ABI* getEventPointById)(const SoundData* sound, EventPointId id);
    const EventPoint*(CARB_ABI* getEventPointByIndex)(const SoundData* sound, size_t index);
    const EventPoint*(CARB_ABI* getEventPointByPlayIndex)(const SoundData* sound, size_t playIndex);
    size_t(CARB_ABI* getEventPointMaxPlayIndex)(const SoundData* sound);
    bool(CARB_ABI* setEventPoints)(SoundData* sound, const EventPoint* eventPoints, size_t count);
    uint32_t(CARB_ABI* getMaxInstances)(const SoundData* sound);
    void(CARB_ABI* setMaxInstances)(SoundData* sound, uint32_t limit);
    void*(CARB_ABI* getUserData)(const SoundData* sound);
    void(CARB_ABI* setUserData)(SoundData* sound, const UserData* userData);
    /************************************ Sound Data Codec ***************************************/
    const CodecInfo*(CARB_ABI* getCodecFormatInfo)(SampleFormat encodedFormat, SampleFormat pcmFormat);
    CodecState*(CARB_ABI* createCodecState)(const CodecStateDesc* desc);
    void(CARB_ABI* destroyCodecState)(CodecState* decodeState);
    const void*(CARB_ABI* decodeData)(CodecState* decodeState, void* buffer, size_t framesToDecode, size_t* framesDecoded);
    size_t(CARB_ABI* getDecodeAvailable)(const CodecState* decodeState, UnitType units);
    size_t(CARB_ABI* getCodecPosition)(const CodecState* decodeState, UnitType units);
    bool(CARB_ABI* setCodecPosition)(CodecState* decodeState, size_t newPosition, UnitType units);
    size_t(CARB_ABI* getCodecDataSizeEstimate)(const CodecState* decodeState, size_t inputBytes);
    size_t(CARB_ABI* encodeData)(CodecState* encodeState, const void* buffer, size_t lengthInFrames);
    /***************************** Sound Data Metadata Information ********************************/
    const char*(CARB_ABI* getMetaDataTagName)(const SoundData* sound, size_t index, const char** value);
    const char*(CARB_ABI* getMetaData)(const SoundData* sound, const char* tagName);
    bool(CARB_ABI* setMetaData)(SoundData* sound, const char* tagName, const char* tagValue);
};
} // namespace audio
} // namespace carb
#ifndef DOXYGEN_SHOULD_SKIP_THIS
CARB_ASSET(carb::audio::SoundData, 0, 1);
#endif