Tutorial 23 - Extended Attributes On The GPU

Extended attributes are no different from other types of attributes with respect to where their memory will be located. The difference is that there is a slightly different API for accessing their data, illustrating by these examples.

This node also illustrates the new concept of having a node create an ABI function override that handles the runtime type resolution of extended attribute types. In this case when any of the two input attributes or one output attribute become resolved then the other two attributes are resolved to the same type, if possible.

OgnTutorialCpuGpuExtended.ogn

The ogn file shows the implementation of a node named “omni.graph.tutorials.CpuGpuExtended” with an input ‘any’ attribute on the CPU, an input ‘any’ attribute on the GPU, and an output whose memory location is decided at runtime by a boolean.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
{
    "CpuGpuExtended": {
        "version": 1,
        "categories": "tutorials",
        "description": [
            "This is a tutorial node. It exercises functionality for accessing data in extended attributes that",
            "are on the GPU as well as those whose CPU/GPU location is decided at runtime. The compute",
            "adds the two inputs 'gpuData' and 'cpuData' together, placing the result in `cpuGpuSum`, whose",
            "memory location is determined by the 'gpu' flag.",
            "This node is identical to OgnTutorialCpuGpuExtendedPy.ogn, except is is implemented in C++."
        ],
        "tags": ["tutorial", "extended", "gpu"],
        "uiName": "Tutorial Node: CPU/GPU Extended Attributes",
        "inputs": {
             "cpuData": {
                "type": "any",
                "description": "Input attribute whose data always lives on the CPU",
                "uiName": "CPU Input Attribute"
             },
             "gpuData": {
                 "type": "any",
                 "memoryType": "cuda",
                 "description": "Input attribute whose data always lives on the GPU",
                 "uiName": "GPU Input Attribute"
             },
             "gpu": {
                 "type": "bool",
                 "description": "If true then put the sum on the GPU, otherwise put it on the CPU",
                 "uiName": "Results To GPU"
             }
        },
        "outputs": {
            "cpuGpuSum": {
                "type": "any",
                "memoryType": "any",
                "description": [
                    "This is the attribute with the selected data. If the 'gpu' attribute is set to true then this",
                    "attribute's contents will be entirely on the GPU, otherwise it will be on the CPU."
                ],
                "uiName": "Sum"
             }
        }
    }
}

OgnTutorialCpuGpuExtended.cpp

The cpp file contains the implementation of the compute method. It sums two inputs on either the CPU or GPU based on the input boolean. For simplicity only the float[3][] attribute type is processed, with all others resulting in a compute failure.

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
// Copyright (c) 2021, NVIDIA CORPORATION. All rights reserved.
//
// NVIDIA CORPORATION and its licensors retain all intellectual property
// and proprietary rights in and to this software, related documentation
// and any modifications thereto.  Any use, reproduction, disclosure or
// distribution of this software and related documentation without an express
// license agreement from NVIDIA CORPORATION is strictly prohibited.
//
#include <OgnTutorialCpuGpuExtendedDatabase.h>
extern "C" void cpuGpuSumCPU(float const(*p1)[3], float const(*p2)[3], float(*sums)[3], size_t);
extern "C" void cpuGpuSumGPU(float const (**p1)[3], float const (**p2)[3], float(**sums)[3]);

namespace omni {
namespace graph {
namespace tutorials {

// Only the pointf[3][] type is accepted. Make a shortcut to the type information describing it for comparison
core::Type acceptedType(core::BaseDataType::eFloat, 3, 1, core::AttributeRole::ePosition);

// Template for reporting type inconsistencies. The data types are different for the attributes but the checks are
// the same so this avoids duplication
template <typename P1, typename P2, typename P3>
bool verifyDataTypes(OgnTutorialCpuGpuExtendedDatabase& db, const P1& points1, const P2& points2, P3& sums, const char* type)
{
    if (! points1)
    {
        db.logWarning("Skipping compute - The %s attribute was not a valid pointf[3][]", type);
    }
    else if (! points2)
    {
        db.logWarning("Skipping compute - The %s attribute was not a valid pointf[3][]", type);
    }
    else if (! sums)
    {
        db.logWarning("Skipping compute - The %s output attribute was not a valid pointf[3][]", type);
    }
    else if (points1.size() != points2.size())
    {
        db.logWarning("Skipping compute - Point arrays are different sizes (%zu and %zu)", points1.size(), points2.size());
    }
    else 
    {
        sums.resize(points1.size());
        return true;
    }
    return false;
}

class OgnTutorialCpuGpuExtended
{
    bool m_allAttributesResolved{ false };
public:
    static bool compute(OgnTutorialCpuGpuExtendedDatabase& db)
    {
        if (! db.internalState<OgnTutorialCpuGpuExtended>().m_allAttributesResolved)
        {
            db.logWarning("All types are not yet resolved. Cannot run the compute.");
            return false;
        }

        const auto& gpu = db.inputs.gpu();
        const auto cpuData = db.inputs.cpuData();
        const auto gpuData = db.inputs.gpuData();
        auto cpuGpuSum = db.outputs.cpuGpuSum();

        if ((cpuData.type() != acceptedType) || (gpuData.type() != acceptedType) || (cpuGpuSum.type() != acceptedType))
        {
            db.logWarning("Skipping compute - All of the attributes do not have the accepted resolved type pointf[3][]");
            return false;
        }

        if (gpu)
        {
            // Computation on the GPU has been requested so get the GPU versions of the attribute data
            const auto points1 = cpuData.getGpu<float[][3]>();
            const auto points2 = gpuData.get<float[][3]>();
            auto sums = cpuGpuSum.getGpu<float[][3]>();
            if (!verifyDataTypes(db, points1, points2, sums, "GPU"))
            {
                return false;
            }
            cpuGpuSumGPU(points1(), points2(), sums());
        }
        else
        {
            // Computation on the CPU has been requested so get the CPU versions of the attribute data
            const auto points1 = cpuData.get<float[][3]>();
            const auto points2 = gpuData.getCpu<float[][3]>();
            auto sums = cpuGpuSum.getCpu<float[][3]>();
            if (!verifyDataTypes(db, points1, points2, sums, "CPU"))
            {
                return false;
            }
            cpuGpuSumCPU(points1->data(), points2->data(), sums->data(), points1.size());
        }
        return true;
    }

    static void onConnectionTypeResolve(const NodeObj& nodeObj)
    {
        // If any one type is resolved the others should resolve to the same type. Calling this helper function
        // makes that happen automatically. If it returns false then the resolution failed for some reason. The
        // node's user data, which is just a copy of this class, is used to keep track of the resolution state so
        // that the compute method can quickly exit when the types are not resolved.
        AttributeObj attributes[3] {
            nodeObj.iNode->getAttributeByToken(nodeObj, inputs::cpuData.token()),
            nodeObj.iNode->getAttributeByToken(nodeObj, inputs::gpuData.token()),
            nodeObj.iNode->getAttributeByToken(nodeObj, outputs::cpuGpuSum.token())
        };
        auto& state = OgnTutorialCpuGpuExtendedDatabase::sInternalState<OgnTutorialCpuGpuExtended>(nodeObj);
        state.m_allAttributesResolved = nodeObj.iNode->resolveCoupledAttributes(nodeObj, attributes, 3);
    }
};

REGISTER_OGN_NODE()

} // namespace tutorials
} // namespace graph
} // namespace omni

OgnTutorialCpuGpuExtendedPy.py

The py file contains the same algorithm as the C++ node, with the node implementation language being different.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
"""
Implementation of the Python node accessing extended attributes whose memory location is determined at runtime.
"""
import omni.graph.core as og


# Only one type of data is handled by the compute - pointf[3][]
POINT_ARRAY_TYPE = og.Type(og.BaseDataType.FLOAT, tuple_count=3, array_depth=1, role=og.AttributeRole.POSITION)


class OgnTutorialCpuGpuExtendedPy:
    """Exercise GPU access for extended attributes through a Python OmniGraph node"""

    @staticmethod
    def compute(db) -> bool:
        """Implements the same algorithm as the C++ node OgnTutorialCpuGpuExtended.cpp.

        It follows the same code pattern for easier comparison, though in practice you would probably code Python
        nodes differently from C++ nodes to take advantage of the strengths of each language.
        """
        # Find and verify the attributes containing the points
        if db.attributes.inputs.cpuData.get_resolved_type() != POINT_ARRAY_TYPE:
            db.log_warning("Skipping compute - CPU attribute type did not resolve to pointf[3][]")
            return False
        if db.attributes.inputs.gpuData.get_resolved_type() != POINT_ARRAY_TYPE:
            db.log_warning("Skipping compute - GPU attribute type did not resolve to pointf[3][]")
            return False
        if db.attributes.outputs.cpuGpuSum.get_resolved_type() != POINT_ARRAY_TYPE:
            db.log_warning("Skipping compute - Sum attribute type did not resolve to pointf[3][]")
            return False

        # Put accessors into local variables for convenience
        gpu_data = db.inputs.gpuData
        cpu_data = db.inputs.cpuData
        sums = db.outputs.cpuGpuSum

        # Mismatched sizes cannot be computed
        if gpu_data.size != cpu_data.size:
            db.log_warning(f"Skipping compute - Point arrays are different sizes ({gpu_data.size} and {cpu_data.size})")

        # Set the size to what is required for the dot product calculation
        sums.size = cpu_data.size

        # Use the correct data access based on whether the output is supposed to be on the GPU or not
        if db.inputs.gpu:
            # The second line is how the values would be extracted if Python supported GPU data extraction.
            # When it does this tutorial will be updated
            sums.cpu_value = cpu_data.value + gpu_data.cpu_value
            # sums.gpu_value = cpu_data.gpu_value + gpu_data.value
        else:
            sums.cpu_value = cpu_data.value + gpu_data.cpu_value
        return True

    @staticmethod
    def on_connection_type_resolve(node: og.Node) -> None:
        """Whenever any of the inputs or the output get a resolved type the others should get the same resolution"""
        attribs = [
            node.get_attribute("inputs:cpuData"),
            node.get_attribute("inputs:gpuData"),
            node.get_attribute("outputs:cpuGpuSum"),
        ]
        og.resolve_fully_coupled(attribs)