From 4d1ddb4bff40518750fccbc72864ab7dfbab7603 Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdroettboom@nvidia.com>
Date: Tue, 5 May 2026 10:39:37 -0400
Subject: [PATCH 1/5] Fix #1995: Use StrEnum for enum-like strings

---
 cuda_core/cuda/core/_linker.pyx               |  13 +-
 .../core/_memory/_managed_memory_resource.pyx |  12 +-
 .../core/_memory/_virtual_memory_resource.py  |  70 ++++-----
 cuda_core/cuda/core/_module.pyx               |  19 +--
 cuda_core/cuda/core/_program.pyx              |  68 +++++----
 .../cuda/core/graph/_graph_definition.pyx     |   6 +-
 cuda_core/cuda/core/graph/_graph_node.pyx     |   2 +-
 cuda_core/cuda/core/graph/_subclasses.pyx     |  15 +-
 cuda_core/cuda/core/system/_device.pyx        |   1 +
 cuda_core/cuda/core/system/_temperature.pxi   |   1 +
 cuda_core/cuda/core/typing.py                 |  93 +++++++++++-
 cuda_core/docs/source/api_private.rst         |  21 ++-
 .../tests/graph/test_graph_definition.py      |  11 +-
 cuda_core/tests/test_enum_coverage.py         | 134 +++++++++++++++---
 cuda_core/tests/test_launcher.py              |   5 +-
 cuda_core/tests/test_memory.py                |  30 ++--
 cuda_core/tests/test_program.py               |   5 +-
 17 files changed, 357 insertions(+), 149 deletions(-)

diff --git a/cuda_core/cuda/core/_linker.pyx b/cuda_core/cuda/core/_linker.pyx
index e89e780b34c..1a28c6ed3aa 100644
--- a/cuda_core/cuda/core/_linker.pyx
+++ b/cuda_core/cuda/core/_linker.pyx
@@ -39,6 +39,7 @@ from cuda.core._utils.cuda_utils import (
     driver,
     is_sequence,
 )
+from cuda.core.typing import CompilerBackend
 
 ctypedef const char* const_char_ptr
 ctypedef void* void_ptr
@@ -70,12 +71,12 @@ cdef class Linker:
     def __init__(self, *object_codes: ObjectCode, options: "LinkerOptions" = None):
         Linker_init(self, object_codes, options)
 
-    def link(self, target_type) -> ObjectCode:
+    def link(self, target_type: ObjectCodeFormat | str) -> ObjectCode:
         """Link the provided object codes into a single output of the specified target type.
 
         Parameters
         ----------
-        target_type : str
+        target_type : ObjectCodeFormat | str
             The type of the target output. Must be either "cubin" or "ptx".
 
         Returns
@@ -88,7 +89,7 @@ cdef class Linker:
             Ensure that input object codes were compiled with appropriate
             flags for linking (e.g., relocatable device code enabled).
         """
-        return Linker_link(self, target_type)
+        return Linker_link(self, str(target_type))
 
     def get_error_log(self) -> str:
         """Get the error log generated by the linker.
@@ -168,9 +169,9 @@ cdef class Linker:
             return as_py(self._culink_handle)
 
     @property
-    def backend(self) -> str:
-        """Return this Linker instance's underlying backend."""
-        return "nvJitLink" if self._use_nvjitlink else "driver"
+    def backend(self) -> CompilerBackend:
+        """Return this Linker instance's underlying :class:`CompilerBackend`."""
+        return CompilerBackend.NVJITLINK if self._use_nvjitlink else CompilerBackend.DRIVER
 
 
 # =============================================================================
diff --git a/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx b/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx
index 205d3c77545..f37a4f18ee1 100644
--- a/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx
+++ b/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx
@@ -16,6 +16,8 @@ from dataclasses import dataclass
 import threading
 import warnings
 
+from cuda.core.typing import ManagedMemoryLocationType
+
 __all__ = ['ManagedMemoryResource', 'ManagedMemoryResourceOptions']
 
 
@@ -30,7 +32,7 @@ cdef class ManagedMemoryResourceOptions:
         meaning depends on ``preferred_location_type``.
         (Default to ``None``)
 
-    preferred_location_type : ``"device"`` | ``"host"`` | ``"host_numa"`` | None, optional
+    preferred_location_type : ManagedMemoryLocationType | str | None, optional
         Controls how ``preferred_location`` is interpreted.
 
         When set to ``None`` (the default), legacy behavior is used:
@@ -54,7 +56,7 @@ cdef class ManagedMemoryResourceOptions:
         (Default to ``None``)
     """
     preferred_location: int | None = None
-    preferred_location_type: str | None = None
+    preferred_location_type: ManagedMemoryLocationType | str | None = None
 
 
 cdef class ManagedMemoryResource(_MemPool):
@@ -97,7 +99,7 @@ cdef class ManagedMemoryResource(_MemPool):
         return -1
 
     @property
-    def preferred_location(self) -> tuple | None:
+    def preferred_location(self) -> tuple[ManagedMemoryLocationType, int | None] | None:
         """The preferred location for managed memory allocations.
 
         Returns ``None`` if no preferred location is set (driver decides),
@@ -108,8 +110,8 @@ cdef class ManagedMemoryResource(_MemPool):
         if self._pref_loc_type is None:
             return None
         if self._pref_loc_type == "host":
-            return ("host", None)
-        return (self._pref_loc_type, self._pref_loc_id)
+            return (ManagedMemoryLocationType.HOST, None)
+        return (ManagedMemoryLocationType(self._pref_loc_type), self._pref_loc_id)
 
     @property
     def is_device_accessible(self) -> bool:
diff --git a/cuda_core/cuda/core/_memory/_virtual_memory_resource.py b/cuda_core/cuda/core/_memory/_virtual_memory_resource.py
index 7d952e102fd..a60436a4305 100644
--- a/cuda_core/cuda/core/_memory/_virtual_memory_resource.py
+++ b/cuda_core/cuda/core/_memory/_virtual_memory_resource.py
@@ -1,11 +1,11 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
 from __future__ import annotations
 
 from dataclasses import dataclass, field
-from typing import TYPE_CHECKING, Iterable, Literal
+from typing import TYPE_CHECKING, Iterable
 
 if TYPE_CHECKING:
     from cuda.core._stream import Stream
@@ -21,15 +21,16 @@
     _check_driver_error as raise_if_driver_error,
 )
 from cuda.core._utils.version import binding_version
+from cuda.core.typing import (
+    VirtualMemoryAccessType,
+    VirtualMemoryAllocationType,
+    VirtualMemoryGranularityType,
+    VirtualMemoryHandleType,
+    VirtualMemoryLocationType,
+)
 
 __all__ = ["VirtualMemoryResource", "VirtualMemoryResourceOptions"]
 
-VirtualMemoryHandleTypeT = Literal["posix_fd", "generic", "win32_kmt", "fabric"] | None
-VirtualMemoryLocationTypeT = Literal["device", "host", "host_numa", "host_numa_current"]
-VirtualMemoryGranularityT = Literal["minimum", "recommended"]
-VirtualMemoryAccessTypeT = Literal["rw", "r"] | None
-VirtualMemoryAllocationTypeT = Literal["pinned", "managed"]
-
 
 @dataclass
 class VirtualMemoryResourceOptions:
@@ -38,18 +39,18 @@ class VirtualMemoryResourceOptions:
 
     Attributes
     ----------
-    allocation_type: :obj:`~_memory.VirtualMemoryAllocationTypeT`
+    allocation_type: :obj:`~_memory.VirtualMemoryAllocationType` | str
         Controls the type of allocation.
-    location_type: :obj:`~_memory.VirtualMemoryLocationTypeT`
+    location_type: :obj:`~_memory.VirtualMemoryLocationType` | str
         Controls the location of the allocation.
-    handle_type: :obj:`~_memory.VirtualMemoryHandleTypeT`
+    handle_type: :obj:`~_memory.VirtualMemoryHandleType` | str
         Export handle type for the physical allocation. Use
         ``"posix_fd"`` on Linux if you plan to
         import/export the allocation (required for cuMemRetainAllocationHandle).
         Use `None` if you don't need an exportable handle.
     gpu_direct_rdma: bool
         Hint that the allocation should be GDR-capable (if supported).
-    granularity: :obj:`~_memory.VirtualMemoryGranularityT`
+    granularity: :obj:`~_memory.VirtualMemoryGranularityType` | str
         Controls granularity query and size rounding.
     addr_hint: int
         A (optional) virtual address hint to try to reserve at. Setting it to 0 lets the CUDA driver decide.
@@ -57,50 +58,53 @@ class VirtualMemoryResourceOptions:
         Alignment for the VA reservation. If `None`, use the queried granularity.
     peers: Iterable[int]
         Extra device IDs that should be granted access in addition to ``device``.
-    self_access: :obj:`~_memory.VirtualMemoryAccessTypeT`
+    self_access: :obj:`~_memory.VirtualMemoryAccessType` | None | str
         Access flags for the owning device.
-    peer_access: :obj:`~_memory.VirtualMemoryAccessTypeT`
+    peer_access: :obj:`~_memory.VirtualMemoryAccessType` | None | str
         Access flags for peers.
     """
 
-    # Human-friendly strings; normalized in __post_init__
-    allocation_type: VirtualMemoryAllocationTypeT = "pinned"
-    location_type: VirtualMemoryLocationTypeT = "device"
-    handle_type: VirtualMemoryHandleTypeT = "posix_fd"
-    granularity: VirtualMemoryGranularityT = "recommended"
+    allocation_type: VirtualMemoryAllocationType = VirtualMemoryAllocationType.PINNED
+    location_type: VirtualMemoryLocationType = VirtualMemoryLocationType.DEVICE
+    handle_type: VirtualMemoryHandleType = VirtualMemoryHandleType.POSIX_FD
+    granularity: VirtualMemoryGranularityType = VirtualMemoryGranularityType.RECOMMENDED
     gpu_direct_rdma: bool = False
     addr_hint: int | None = 0
     addr_align: int | None = None
     peers: Iterable[int] = field(default_factory=tuple)
-    self_access: VirtualMemoryAccessTypeT = "rw"
-    peer_access: VirtualMemoryAccessTypeT = "rw"
+    self_access: VirtualMemoryAccessType = VirtualMemoryAccessType.READ_WRITE
+    peer_access: VirtualMemoryAccessType = VirtualMemoryAccessType.READ_WRITE
 
     _a = driver.CUmemAccess_flags
-    _access_flags = {"rw": _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE, "r": _a.CU_MEM_ACCESS_FLAGS_PROT_READ, None: 0}  # noqa: RUF012
+    _access_flags = {  # noqa: RUF012
+        VirtualMemoryAccessType.READ_WRITE: _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE,
+        VirtualMemoryAccessType.READ: _a.CU_MEM_ACCESS_FLAGS_PROT_READ,
+        None: 0,
+    }
     _h = driver.CUmemAllocationHandleType
     _handle_types = {  # noqa: RUF012
         None: _h.CU_MEM_HANDLE_TYPE_NONE,
-        "posix_fd": _h.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR,
-        "win32_kmt": _h.CU_MEM_HANDLE_TYPE_WIN32_KMT,
-        "fabric": _h.CU_MEM_HANDLE_TYPE_FABRIC,
+        VirtualMemoryHandleType.POSIX_FD: _h.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR,
+        VirtualMemoryHandleType.WIN32_KMT: _h.CU_MEM_HANDLE_TYPE_WIN32_KMT,
+        VirtualMemoryHandleType.FABRIC: _h.CU_MEM_HANDLE_TYPE_FABRIC,
     }
     _g = driver.CUmemAllocationGranularity_flags
     _granularity = {  # noqa: RUF012
-        "recommended": _g.CU_MEM_ALLOC_GRANULARITY_RECOMMENDED,
-        "minimum": _g.CU_MEM_ALLOC_GRANULARITY_MINIMUM,
+        VirtualMemoryGranularityType.RECOMMENDED: _g.CU_MEM_ALLOC_GRANULARITY_RECOMMENDED,
+        VirtualMemoryGranularityType.MINIMUM: _g.CU_MEM_ALLOC_GRANULARITY_MINIMUM,
     }
     _l = driver.CUmemLocationType
     _location_type = {  # noqa: RUF012
-        "device": _l.CU_MEM_LOCATION_TYPE_DEVICE,
-        "host": _l.CU_MEM_LOCATION_TYPE_HOST,
-        "host_numa": _l.CU_MEM_LOCATION_TYPE_HOST_NUMA,
-        "host_numa_current": _l.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT,
+        VirtualMemoryLocationType.DEVICE: _l.CU_MEM_LOCATION_TYPE_DEVICE,
+        VirtualMemoryLocationType.HOST: _l.CU_MEM_LOCATION_TYPE_HOST,
+        VirtualMemoryLocationType.HOST_NUMA: _l.CU_MEM_LOCATION_TYPE_HOST_NUMA,
+        VirtualMemoryLocationType.HOST_NUMA_CURRENT: _l.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT,
     }
     _t = driver.CUmemAllocationType
     # CUDA 13+ exposes MANAGED in CUmemAllocationType; older 12.x does not
-    _allocation_type = {"pinned": _t.CU_MEM_ALLOCATION_TYPE_PINNED}  # noqa: RUF012
+    _allocation_type = {VirtualMemoryAllocationType.PINNED: _t.CU_MEM_ALLOCATION_TYPE_PINNED}  # noqa: RUF012
     if binding_version() >= (13, 0, 0):
-        _allocation_type["managed"] = _t.CU_MEM_ALLOCATION_TYPE_MANAGED
+        _allocation_type[VirtualMemoryAllocationType.MANAGED] = _t.CU_MEM_ALLOCATION_TYPE_MANAGED
 
     @staticmethod
     def _access_to_flags(spec: str):
diff --git a/cuda_core/cuda/core/_module.pyx b/cuda_core/cuda/core/_module.pyx
index d6c9481b82f..4156e4d7806 100644
--- a/cuda_core/cuda/core/_module.pyx
+++ b/cuda_core/cuda/core/_module.pyx
@@ -12,6 +12,7 @@ from cuda.core._device import Device
 from cuda.core._launch_config cimport LaunchConfig
 from cuda.core._launch_config import LaunchConfig
 from cuda.core._stream cimport Stream
+from cuda.core._program import ObjectCodeFormat
 from cuda.core._resource_handles cimport (
     LibraryHandle,
     KernelHandle,
@@ -569,7 +570,7 @@ cdef class Kernel:
 
 CodeTypeT = bytes | bytearray | str
 
-cdef tuple _supported_code_type = ("cubin", "ptx", "ltoir", "fatbin", "object", "library")
+cdef tuple _supported_code_type = tuple(ObjectCodeFormat.__members__.values())
 
 cdef class ObjectCode:
     """Represent a compiled program to be loaded onto the device.
@@ -599,7 +600,7 @@ cdef class ObjectCode:
         # _h_library is assigned during _lazy_load_module
         self._h_library = LibraryHandle()  # Empty handle
 
-        self._code_type = code_type
+        self._code_type = str(code_type)
         self._module = module
         self._sym_map = {} if symbol_mapping is None else symbol_mapping
         self._name = name if name else ""
@@ -629,7 +630,7 @@ cdef class ObjectCode:
             should be mapped to the mangled names before trying to retrieve
             them (default to no mappings).
         """
-        return ObjectCode._init(module, "cubin", name=name, symbol_mapping=symbol_mapping)
+        return ObjectCode._init(module, ObjectCodeFormat.CUBIN, name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
     def from_ptx(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode:
@@ -647,7 +648,7 @@ cdef class ObjectCode:
             should be mapped to the mangled names before trying to retrieve
             them (default to no mappings).
         """
-        return ObjectCode._init(module, "ptx", name=name, symbol_mapping=symbol_mapping)
+        return ObjectCode._init(module, ObjectCodeFormat.PTX, name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
     def from_ltoir(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode:
@@ -665,7 +666,7 @@ cdef class ObjectCode:
             should be mapped to the mangled names before trying to retrieve
             them (default to no mappings).
         """
-        return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping)
+        return ObjectCode._init(module, ObjectCodeFormat.LTOIR, name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
     def from_fatbin(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode:
@@ -683,7 +684,7 @@ cdef class ObjectCode:
             should be mapped to the mangled names before trying to retrieve
             them (default to no mappings).
         """
-        return ObjectCode._init(module, "fatbin", name=name, symbol_mapping=symbol_mapping)
+        return ObjectCode._init(module, ObjectCodeFormat.FATBIN, name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
     def from_object(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode:
@@ -701,7 +702,7 @@ cdef class ObjectCode:
             should be mapped to the mangled names before trying to retrieve
             them (default to no mappings).
         """
-        return ObjectCode._init(module, "object", name=name, symbol_mapping=symbol_mapping)
+        return ObjectCode._init(module, ObjectCodeFormat.OBJECT, name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
     def from_library(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode:
@@ -719,7 +720,7 @@ cdef class ObjectCode:
             should be mapped to the mangled names before trying to retrieve
             them (default to no mappings).
         """
-        return ObjectCode._init(module, "library", name=name, symbol_mapping=symbol_mapping)
+        return ObjectCode._init(module, ObjectCodeFormat.LIBRARY, name=name, symbol_mapping=symbol_mapping)
 
     # TODO: do we want to unload in a finalizer? Probably not..
 
@@ -758,7 +759,7 @@ cdef class ObjectCode:
 
         """
         self._lazy_load_module()
-        supported_code_types = ("cubin", "ptx", "fatbin")
+        supported_code_types = (ObjectCodeFormat.CUBIN, ObjectCodeFormat.PTX, ObjectCodeFormat.FATBIN)
         if self._code_type not in supported_code_types:
             raise RuntimeError(f'Unsupported code type "{self._code_type}" ({supported_code_types=})')
         try:
diff --git a/cuda_core/cuda/core/_program.pyx b/cuda_core/cuda/core/_program.pyx
index cfc66451c86..f7040919d11 100644
--- a/cuda_core/cuda/core/_program.pyx
+++ b/cuda_core/cuda/core/_program.pyx
@@ -39,6 +39,7 @@ from cuda.core._utils.cuda_utils import (
     is_sequence,
 )
 from cuda.core._utils.version import binding_version, driver_version
+from cuda.core.typing import ObjectCodeFormat, CompilerBackend, PCHStatus, SourceType
 
 __all__ = ["Program", "ProgramOptions"]
 
@@ -67,14 +68,13 @@ cdef class Program:
     code : str | bytes | bytearray
         The source code to compile. For C++ and PTX, must be a string.
         For NVVM IR, can be str, bytes, or bytearray.
-    code_type : str
+    code_type : SourceType | str
         The type of source code. Must be one of ``"c++"``, ``"ptx"``, or ``"nvvm"``.
     options : :class:`ProgramOptions`, optional
         Options to customize the compilation process.
     """
-
-    def __init__(self, code: str | bytes | bytearray, code_type: str, options: ProgramOptions | None = None):
-        Program_init(self, code, code_type, options)
+    def __init__(self, code: str | bytes | bytearray, code_type: SourceType | str, options: ProgramOptions | None = None):
+        Program_init(self, code, str(code_type), options)
 
     def close(self):
         """Destroy this program."""
@@ -85,13 +85,13 @@ cdef class Program:
         self._h_nvvm.reset()
 
     def compile(
-        self, target_type: str, name_expressions: tuple | list = (), logs = None
+        self, target_type: ObjectCodeFormat | str, name_expressions: tuple | list = (), logs = None
     ) -> ObjectCode:
         """Compile the program to the specified target type.
 
         Parameters
         ----------
-        target_type : str
+        target_type : ObjectCodeFormat | str
             The compilation target. Must be one of ``"ptx"``, ``"cubin"``, or ``"ltoir"``.
         name_expressions : tuple | list, optional
             Sequence of name expressions to make accessible in the compiled code.
@@ -104,10 +104,10 @@ cdef class Program:
         :class:`~cuda.core.ObjectCode`
             The compiled object code.
         """
-        return Program_compile(self, target_type, name_expressions, logs)
+        return Program_compile(self, str(target_type), name_expressions, logs)
 
     @property
-    def pch_status(self) -> str | None:
+    def pch_status(self) -> PCHStatus | None:
         """PCH creation outcome from the most recent :meth:`compile` call.
 
         Possible values:
@@ -130,12 +130,14 @@ cdef class Program:
            use the NVRTC backend. For PTX and NVVM programs this property
            always returns ``None``.
         """
-        return self._pch_status
+        if self._pch_status is None:
+            return None
+        return PCHStatus(self._pch_status)
 
     @property
-    def backend(self) -> str:
-        """Return this Program instance's underlying backend."""
-        return self._backend
+    def backend(self) -> CompilerBackend:
+        """Return this Program instance's underlying :class:`CompilerBackend`."""
+        return CompilerBackend(self._backend)
 
     @property
     def handle(self) -> ProgramHandleT:
@@ -435,7 +437,7 @@ class ProgramOptions:
     def _prepare_nvvm_options(self, as_bytes: bool = True) -> list[bytes] | list[str]:
         return _prepare_nvvm_options_impl(self, as_bytes)
 
-    def as_bytes(self, backend: str, target_type: str | None = None) -> list[bytes]:
+    def as_bytes(self, backend: CompilerBackend | str, target_type: ObjectCodeFormat | str | None = None) -> list[bytes]:
         """Convert program options to bytes format for the specified backend.
 
         This method transforms the program options into a format suitable for the
@@ -444,9 +446,9 @@ class ProgramOptions:
 
         Parameters
         ----------
-        backend : str
+        backend : CompilerBackend | str
             The compiler backend to prepare options for. Must be either "nvrtc" or "nvvm".
-        target_type : str, optional
+        target_type : ObjectCodeFormat | str, optional
             The compilation target type (e.g., "ptx", "cubin", "ltoir"). Some backends
             require additional options based on the target type.
 
@@ -467,7 +469,7 @@ class ProgramOptions:
         >>> options = ProgramOptions(arch="sm_80", debug=True)
         >>> nvrtc_options = options.as_bytes("nvrtc")
         """
-        backend = backend.lower()
+        backend = str(backend).lower()
         if backend == "nvrtc":
             return self._prepare_nvrtc_options()
         elif backend == "nvvm":
@@ -639,7 +641,7 @@ cdef inline int Program_init(Program self, object code, str code_type, object op
                 &nvrtc_prog, code_ptr, name_ptr, 0, NULL, NULL))
         self._h_nvrtc = create_nvrtc_program_handle(nvrtc_prog)
         self._nvrtc_code = code_bytes
-        self._backend = "NVRTC"
+        self._backend = str(CompilerBackend.NVRTC)
         self._linker = None
 
     elif code_type == "ptx":
@@ -649,7 +651,7 @@ cdef inline int Program_init(Program self, object code, str code_type, object op
         self._linker = Linker(
             ObjectCode._init(code.encode(), code_type), options=_translate_program_options(options)
         )
-        self._backend = self._linker.backend
+        self._backend = str(self._linker.backend)
 
     elif code_type == "nvvm":
         _get_nvvm_module()  # Validate NVVM availability
@@ -683,12 +685,11 @@ cdef inline int Program_init(Program self, object code, str code_type, object op
         if options.use_libdevice:
             self._use_libdevice = True
 
-        self._backend = "NVVM"
+        self._backend = str(CompilerBackend.NVVM)
         self._linker = None
 
     else:
-        supported_code_types = ("c++", "ptx", "nvvm")
-        assert code_type not in supported_code_types, f"{code_type=}"
+        supported_code_types = tuple(x.value for x in SourceType)
         if options.use_libdevice:
             raise ValueError("use_libdevice is only supported by the NVVM backend")
         raise RuntimeError(f"Unsupported {code_type=} ({supported_code_types=})")
@@ -780,23 +781,18 @@ cdef bint _has_nvrtc_pch_apis():
     return _nvrtc_pch_apis_cached
 
 
-cdef str _PCH_STATUS_CREATED = "created"
-cdef str _PCH_STATUS_NOT_ATTEMPTED = "not_attempted"
-cdef str _PCH_STATUS_FAILED = "failed"
-
-
-cdef str _read_pch_status(cynvrtc.nvrtcProgram prog):
+cdef object _read_pch_status(cynvrtc.nvrtcProgram prog):
     """Query nvrtcGetPCHCreateStatus and translate to a high-level string."""
     cdef cynvrtc.nvrtcResult err
     with nogil:
         err = cynvrtc.nvrtcGetPCHCreateStatus(prog)
     if err == cynvrtc.nvrtcResult.NVRTC_SUCCESS:
-        return _PCH_STATUS_CREATED
+        return PCHStatus.CREATED
     if err == cynvrtc.nvrtcResult.NVRTC_ERROR_PCH_CREATE_HEAP_EXHAUSTED:
         return None  # sentinel: caller should auto-retry
     if err == cynvrtc.nvrtcResult.NVRTC_ERROR_NO_PCH_CREATE_ATTEMPTED:
-        return _PCH_STATUS_NOT_ATTEMPTED
-    return _PCH_STATUS_FAILED
+        return PCHStatus.NOT_ATTEMPTED
+    return PCHStatus.FAILED
 
 
 cdef object Program_compile_nvrtc(Program self, str target_type, object name_expressions, object logs):
@@ -822,7 +818,7 @@ cdef object Program_compile_nvrtc(Program self, str target_type, object name_exp
         ) from e
 
     if status is not None:
-        self._pch_status = status
+        self._pch_status = str(status)
         return result
 
     # Heap exhausted — auto-resize and retry with a fresh program
@@ -844,7 +840,7 @@ cdef object Program_compile_nvrtc(Program self, str target_type, object name_exp
     )
 
     status = _read_pch_status(retry_prog)
-    self._pch_status = status if status is not None else _PCH_STATUS_FAILED
+    self._pch_status = status if status is not None else str(PCHStatus.FAILED)
     return result
 
 
@@ -904,10 +900,10 @@ cdef object Program_compile_nvvm(Program self, str target_type, object logs):
 
 # Supported target types per backend
 cdef dict SUPPORTED_TARGETS = {
-    "NVRTC": ("ptx", "cubin", "ltoir"),
-    "NVVM": ("ptx", "ltoir"),
-    "nvJitLink": ("cubin", "ptx"),
-    "driver": ("cubin", "ptx"),
+    CompilerBackend.NVRTC: (ObjectCodeFormat.PTX, ObjectCodeFormat.CUBIN, ObjectCodeFormat.LTOIR),
+    CompilerBackend.NVVM: (ObjectCodeFormat.PTX, ObjectCodeFormat.LTOIR),
+    CompilerBackend.NVJITLINK: (ObjectCodeFormat.CUBIN, ObjectCodeFormat.PTX),
+    CompilerBackend.DRIVER: (ObjectCodeFormat.CUBIN, ObjectCodeFormat.PTX),
 }
 
 
diff --git a/cuda_core/cuda/core/graph/_graph_definition.pyx b/cuda_core/cuda/core/graph/_graph_definition.pyx
index 9a08232c556..5e4fa60d055 100644
--- a/cuda_core/cuda/core/graph/_graph_definition.pyx
+++ b/cuda_core/cuda/core/graph/_graph_definition.pyx
@@ -27,6 +27,8 @@ from dataclasses import dataclass
 
 from cuda.core._utils.cuda_utils import driver
 
+from cuda.core.typing import GraphMemoryType
+
 __all__ = ['GraphCondition', 'GraphAllocOptions', 'GraphDefinition']
 
 
@@ -78,7 +80,7 @@ class GraphAllocOptions:
     device : int or Device, optional
         The device on which to allocate memory. If None (default),
         uses the current CUDA context's device.
-    memory_type : str, optional
+    memory_type : GraphMemoryType | str, optional
         Type of memory to allocate. One of:
 
         - ``"device"`` (default): Pinned device memory, optimal for GPU kernels.
@@ -101,7 +103,7 @@ class GraphAllocOptions:
     """
 
     device: int | "Device" | None = None
-    memory_type: str = "device"
+    memory_type: GraphMemoryType = GraphMemoryType.DEVICE
     peer_access: list | None = None
 
 
diff --git a/cuda_core/cuda/core/graph/_graph_node.pyx b/cuda_core/cuda/core/graph/_graph_node.pyx
index 36401776600..c9d1786caa6 100644
--- a/cuda_core/cuda/core/graph/_graph_node.pyx
+++ b/cuda_core/cuda/core/graph/_graph_node.pyx
@@ -689,7 +689,7 @@ cdef inline AllocNode GN_alloc(GraphNode self, size_t size, object options):
 
     cdef str memory_type = "device"
     if options is not None and options.memory_type is not None:
-        memory_type = options.memory_type
+        memory_type = str(options.memory_type)
 
     c_memset(&alloc_params, 0, sizeof(alloc_params))
     alloc_params.poolProps.handleTypes = cydriver.CUmemAllocationHandleType.CU_MEM_HANDLE_TYPE_NONE
diff --git a/cuda_core/cuda/core/graph/_subclasses.pyx b/cuda_core/cuda/core/graph/_subclasses.pyx
index 25b648bacef..35009218479 100644
--- a/cuda_core/cuda/core/graph/_subclasses.pyx
+++ b/cuda_core/cuda/core/graph/_subclasses.pyx
@@ -33,6 +33,7 @@ from cuda.core._utils.cuda_utils cimport HANDLE_RETURN
 from cuda.core.graph._utils cimport _is_py_host_trampoline
 
 from cuda.core._utils.cuda_utils import driver, handle_return
+from cuda.core.typing import GraphConditionalType
 
 __all__ = [
     'AllocNode',
@@ -169,8 +170,8 @@ cdef class AllocNode(GraphNode):
         The number of bytes allocated.
     device_id : int
         The device on which the allocation was made.
-    memory_type : str
-        The type of memory allocated (``"device"``, ``"host"``, or ``"managed"``).
+    memory_type : GraphMemoryType | str
+        The type of memory allocated.
     peer_access : tuple of int
         Device IDs that have read-write access to this allocation.
     options : GraphAllocOptions
@@ -698,8 +699,8 @@ cdef class ConditionalNode(GraphNode):
         return self._condition
 
     @property
-    def cond_type(self) -> str | None:
-        """The conditional type as a string: 'if', 'while', or 'switch'.
+    def cond_type(self) -> GraphConditionalType | None:
+        """The conditional type: GraphConditionalType.IF, .WHILE, or .SWITCH
 
         Returns None when reconstructed from the driver pre-CUDA 13.2,
         as the conditional type cannot be determined.
@@ -707,11 +708,11 @@ cdef class ConditionalNode(GraphNode):
         if self._condition is None:
             return None
         if self._cond_type == cydriver.CU_GRAPH_COND_TYPE_IF:
-            return "if"
+            return GraphConditionalType("if")
         elif self._cond_type == cydriver.CU_GRAPH_COND_TYPE_WHILE:
-            return "while"
+            return GraphConditionalType("while")
         else:
-            return "switch"
+            return GraphConditionalType("switch")
 
     @property
     def branches(self) -> tuple:
diff --git a/cuda_core/cuda/core/system/_device.pyx b/cuda_core/cuda/core/system/_device.pyx
index f3cc62fe546..9fbe001d0dd 100644
--- a/cuda_core/cuda/core/system/_device.pyx
+++ b/cuda_core/cuda/core/system/_device.pyx
@@ -170,6 +170,7 @@ _GPU_P2P_CAPS_INDEX_MAPPING = {
     GpuP2PCapsIndex.ATOMICS: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_ATOMICS,
     GpuP2PCapsIndex.PCI: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_PCI,
     GpuP2PCapsIndex.PROP: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_PROP,
+    GpuP2PCapsIndex.UNKNOWN: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_UNKNOWN,
 }
 
 
diff --git a/cuda_core/cuda/core/system/_temperature.pxi b/cuda_core/cuda/core/system/_temperature.pxi
index b322df4a591..d890ba0d128 100644
--- a/cuda_core/cuda/core/system/_temperature.pxi
+++ b/cuda_core/cuda/core/system/_temperature.pxi
@@ -71,6 +71,7 @@ _THERMAL_CONTROLLER_MAPPING = {
     nvml.ThermalController.NVSYSCON_E551: ThermalController.NVSYSCON_E551,
     nvml.ThermalController.MAX6649R: ThermalController.MAX6649R,
     nvml.ThermalController.ADT7473S: ThermalController.ADT7473S,
+    nvml.ThermalController.UNKNOWN: ThermalController.UNKNOWN,
 }
 
 
diff --git a/cuda_core/cuda/core/typing.py b/cuda_core/cuda/core/typing.py
index a66ab1881fb..33405657b24 100644
--- a/cuda_core/cuda/core/typing.py
+++ b/cuda_core/cuda/core/typing.py
@@ -2,12 +2,103 @@
 #
 # SPDX-License-Identifier: Apache-2.0
 
-"""Public type aliases and protocols used in cuda.core API signatures."""
+"""Public type aliases, protocols, and enumerations used in cuda.core API signatures."""
+
+try:
+    from enum import StrEnum
+except ImportError:
+    from backports.strenum import StrEnum
 
 from cuda.core._memory._buffer import DevicePointerT
 from cuda.core._stream import IsStreamT
 
 __all__ = [
+    "CompilerBackend",
     "DevicePointerT",
+    "GraphConditionalType",
+    "GraphMemoryType",
     "IsStreamT",
+    "ManagedMemoryLocationType",
+    "ObjectCodeFormat",
+    "PCHStatus",
+    "SourceType",
+    "VirtualMemoryAccessType",
+    "VirtualMemoryAllocationType",
+    "VirtualMemoryGranularityType",
+    "VirtualMemoryHandleType",
+    "VirtualMemoryLocationType",
 ]
+
+
+class SourceType(StrEnum):
+    CXX = "c++"
+    PTX = "ptx"
+    NVVM = "nvvm"
+
+
+class ObjectCodeFormat(StrEnum):
+    PTX = "ptx"
+    CUBIN = "cubin"
+    LTOIR = "ltoir"
+    FATBIN = "fatbin"
+    OBJECT = "object"
+    LIBRARY = "library"
+
+
+class CompilerBackend(StrEnum):
+    NVRTC = "NVRTC"
+    NVVM = "NVVM"
+    NVJITLINK = "nvJitLink"
+    DRIVER = "driver"
+
+
+class PCHStatus(StrEnum):
+    CREATED = "created"
+    NOT_ATTEMPTED = "not_attempted"
+    FAILED = "failed"
+
+
+class GraphConditionalType(StrEnum):
+    IF = "if"
+    WHILE = "while"
+    SWITCH = "switch"
+
+
+class GraphMemoryType(StrEnum):
+    DEVICE = "device"
+    HOST = "host"
+    MANAGED = "managed"
+
+
+class ManagedMemoryLocationType(StrEnum):
+    DEVICE = "device"
+    HOST = "host"
+    HOST_NUMA = "host_numa"
+
+
+class VirtualMemoryHandleType(StrEnum):
+    POSIX_FD = "posix_fd"
+    WIN32_KMT = "win32_kmt"
+    FABRIC = "fabric"
+
+
+class VirtualMemoryLocationType(StrEnum):
+    DEVICE = "device"
+    HOST = "host"
+    HOST_NUMA = "host_numa"
+    HOST_NUMA_CURRENT = "host_numa_current"
+
+
+class VirtualMemoryGranularityType(StrEnum):
+    MINIMUM = "minimum"
+    RECOMMENDED = "recommended"
+
+
+class VirtualMemoryAccessType(StrEnum):
+    READ_WRITE = "rw"
+    READ = "r"
+
+
+class VirtualMemoryAllocationType(StrEnum):
+    PINNED = "pinned"
+    MANAGED = "managed"
diff --git a/cuda_core/docs/source/api_private.rst b/cuda_core/docs/source/api_private.rst
index a3bb4f1395c..725b211798e 100644
--- a/cuda_core/docs/source/api_private.rst
+++ b/cuda_core/docs/source/api_private.rst
@@ -16,16 +16,23 @@ CUDA runtime
 .. autosummary::
    :toctree: generated/
 
-   typing.DevicePointerT
-   _memory._virtual_memory_resource.VirtualMemoryAllocationTypeT
-   _memory._virtual_memory_resource.VirtualMemoryLocationTypeT
-   _memory._virtual_memory_resource.VirtualMemoryGranularityT
-   _memory._virtual_memory_resource.VirtualMemoryAccessTypeT
-   _memory._virtual_memory_resource.VirtualMemoryHandleTypeT
    _module.KernelAttributes
    _module.KernelOccupancy
-   _module.ParamInfo
    _module.MaxPotentialBlockSizeOccupancyResult
+   _module.ParamInfo
+   typing.CompilerBackend
+   typing.DevicePointerT
+   typing.GraphConditionalType
+   typing.GraphMemoryType
+   typing.ManagedMemoryLocationType
+   typing.ObjectCodeFormat
+   typing.PCHStatus
+   typing.SourceType
+   typing.VirtualMemoryAccessType
+   typing.VirtualMemoryAllocationType
+   typing.VirtualMemoryGranularityType
+   typing.VirtualMemoryHandleType
+   typing.VirtualMemoryLocationType
 
    :template: autosummary/cyclass.rst
 
diff --git a/cuda_core/tests/graph/test_graph_definition.py b/cuda_core/tests/graph/test_graph_definition.py
index e85645e0305..a5d8ff1f34d 100644
--- a/cuda_core/tests/graph/test_graph_definition.py
+++ b/cuda_core/tests/graph/test_graph_definition.py
@@ -33,6 +33,7 @@
     SwitchNode,
     WhileNode,
 )
+from cuda.core.typing import GraphConditionalType, GraphMemoryType
 
 ALLOC_SIZE = 1024
 
@@ -275,7 +276,7 @@ def _build_alloc_node(g):
 def _build_alloc_managed_node(g):
     _skip_if_no_managed_mempool()
     device_id = Device().device_id
-    options = GraphAllocOptions(memory_type="managed")
+    options = GraphAllocOptions(memory_type=GraphMemoryType.MANAGED)
     entry = g.allocate(ALLOC_SIZE)
     node = entry.allocate(ALLOC_SIZE, options)
     return node, {
@@ -421,7 +422,7 @@ def _build_if_then_node(g):
     node = g.if_then(condition)
     return node, {
         "condition": condition,
-        "cond_type": "if",
+        "cond_type": lambda v: isinstance(v, GraphConditionalType) and v == "if",
         "branches": lambda v: isinstance(v, tuple) and len(v) == 1,
         "then": lambda v: isinstance(v, GraphDefinition),
     }
@@ -432,7 +433,7 @@ def _build_if_else_node(g):
     node = g.if_else(condition)
     return node, {
         "condition": condition,
-        "cond_type": "if",
+        "cond_type": lambda v: isinstance(v, GraphConditionalType) and v == "if",
         "branches": lambda v: isinstance(v, tuple) and len(v) == 2,
         "then": lambda v: isinstance(v, GraphDefinition),
         "else_": lambda v: isinstance(v, GraphDefinition),
@@ -444,7 +445,7 @@ def _build_while_loop_node(g):
     node = g.while_loop(condition)
     return node, {
         "condition": condition,
-        "cond_type": "while",
+        "cond_type": lambda v: isinstance(v, GraphConditionalType) and v == "while",
         "branches": lambda v: isinstance(v, tuple) and len(v) == 1,
         "body": lambda v: isinstance(v, GraphDefinition),
     }
@@ -455,7 +456,7 @@ def _build_switch_node(g):
     node = g.switch(condition, 3)
     return node, {
         "condition": condition,
-        "cond_type": "switch",
+        "cond_type": lambda v: isinstance(v, GraphConditionalType) and v == "switch",
         "branches": lambda v: isinstance(v, tuple) and len(v) == 3,
     }
 
diff --git a/cuda_core/tests/test_enum_coverage.py b/cuda_core/tests/test_enum_coverage.py
index 9c70c9f6042..bf65048e853 100644
--- a/cuda_core/tests/test_enum_coverage.py
+++ b/cuda_core/tests/test_enum_coverage.py
@@ -11,10 +11,13 @@
 import inspect
 import pkgutil
 import sys
+from typing import Any
 
 import pytest
 
 import cuda.core
+import cuda.core.typing
+from cuda.bindings import driver
 from cuda.core import system
 
 if sys.version_info >= (3, 11):
@@ -38,7 +41,71 @@
 # in binding_unmapped appears as either a key or a value of the mapping dict,
 # and conversely that every str_enum member not in str_enum_unmapped also
 # appears.
-_CASES = []
+
+_CASES: list[tuple[Any, StrEnum, dict | None, set[str], set[str]]] = [
+    (
+        driver.CUgraphConditionalNodeType,
+        cuda.core.typing.GraphConditionalType,
+        None,
+        set(),
+        set(),
+    ),
+    (
+        driver.CUmemLocationType,
+        cuda.core.typing.ManagedMemoryLocationType,
+        None,
+        # We have some explicitly unsupported memory location types
+        {
+            "CU_MEM_LOCATION_TYPE_NONE",
+            "CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT",
+            "CU_MEM_LOCATION_TYPE_INVISIBLE",
+            "CU_MEM_LOCATION_TYPE_MAX",
+            "CU_MEM_LOCATION_TYPE_INVALID",
+        },
+        set(),
+    ),
+    (
+        driver.CUmemAccess_flags,
+        cuda.core.typing.VirtualMemoryAccessType,
+        cuda.core.VirtualMemoryResourceOptions._access_flags,
+        {"CU_MEM_ACCESS_FLAGS_PROT_NONE", "CU_MEM_ACCESS_FLAGS_PROT_MAX"},
+        set(),
+    ),
+    (
+        driver.CUmemAllocationType,
+        cuda.core.typing.VirtualMemoryAllocationType,
+        cuda.core.VirtualMemoryResourceOptions._allocation_type,
+        {"CU_MEM_ALLOCATION_TYPE_INVALID", "CU_MEM_ALLOCATION_TYPE_MAX"},
+        set(),
+    ),
+    (
+        driver.CUmemAllocationGranularity_flags,
+        cuda.core.typing.VirtualMemoryGranularityType,
+        cuda.core.VirtualMemoryResourceOptions._granularity,
+        set(),
+        set(),
+    ),
+    (
+        driver.CUmemAllocationHandleType,
+        cuda.core.typing.VirtualMemoryHandleType,
+        cuda.core.VirtualMemoryResourceOptions._handle_types,
+        {"CU_MEM_HANDLE_TYPE_NONE", "CU_MEM_HANDLE_TYPE_WIN32", "CU_MEM_HANDLE_TYPE_MAX"},
+        {"GENERIC"},
+    ),
+    (
+        driver.CUmemLocationType,
+        cuda.core.typing.VirtualMemoryLocationType,
+        None,
+        # We have some explicitly unsupported memory location types
+        {
+            "CU_MEM_LOCATION_TYPE_NONE",
+            "CU_MEM_LOCATION_TYPE_INVISIBLE",
+            "CU_MEM_LOCATION_TYPE_MAX",
+            "CU_MEM_LOCATION_TYPE_INVALID",
+        },
+        set(),
+    ),
+]
 
 if system.CUDA_BINDINGS_NVML_IS_COMPATIBLE:
     # Populated below only when NVML bindings are compatible, so that importing
@@ -70,7 +137,7 @@
                 _device._GPU_P2P_STATUS_MAPPING,
                 # Both the typo'd (SUPPORED) and corrected (SUPPORTED) spellings
                 # share the same integer value; the mapping covers both via aliases
-                set(),
+                {"P2P_STATUS_CHIPSET_NOT_SUPPORED"},
                 set(),
             ),
             (
@@ -116,11 +183,8 @@
                 nvml.ThermalController,
                 _device.ThermalController,
                 _device._THERMAL_CONTROLLER_MAPPING,
-                # NONE and UNKNOWN are both handled by the .get() fallback that
-                # returns ThermalController.UNKNOWN when the value is not in the mapping
-                {"NONE", "UNKNOWN"},
-                # UNKNOWN is the default returned by .get() for unrecognised controllers
-                {"UNKNOWN"},
+                {"NONE"},
+                {"NONE"},
             ),
             (
                 nvml.ThermalTarget,
@@ -156,11 +220,8 @@
                 nvml.GpuP2PCapsIndex,
                 _device.GpuP2PCapsIndex,
                 _device._GPU_P2P_CAPS_INDEX_MAPPING,
-                # UNKNOWN is returned by the driver when an index is unrecognised;
-                # it is not a capability the caller selects
-                {"P2P_CAPS_INDEX_UNKNOWN"},
-                # UNKNOWN is a driver-side fallback, not a caller-selectable index
-                {"UNKNOWN"},
+                set(),
+                set(),
             ),
             (
                 nvml.GpuTopologyLevel,
@@ -208,7 +269,18 @@
 # StrEnum subclasses that intentionally have no associated cuda_binding.
 # Add classes here (with a comment explaining why) when a new StrEnum is
 # introduced that wraps something other than a cuda_binding enum.
-_UNBOUND_STR_ENUMS: frozenset[type] = frozenset()
+_UNBOUND_STR_ENUMS: set[StrEnum] = {
+    cuda.core.typing.ObjectCodeFormat,
+    cuda.core.typing.CompilerBackend,
+    # This one enum coordinates values in two cuda_binding enums:
+    # CUmemAllocationType and CUmemLocationType
+    cuda.core.typing.GraphMemoryType,
+    # This should support all of the PCH-related values in nvrtcResult, but
+    # there is no easy way to check that since they are mixed in with other
+    # unrelated things
+    cuda.core.typing.PCHStatus,
+    cuda.core.typing.SourceType,
+}
 
 
 @pytest.mark.parametrize(
@@ -223,18 +295,33 @@ def test_wrapper_covers_all_binding_members(binding, str_enum, mapping, binding_
     mapping (or be listed in the per-entry str_enum_unmapped set).
     """
     required = set(binding.__members__) - binding_unmapped
-    # Compare by integer value so that enum aliases (two names, one integer)
-    # are treated as covered when the canonical member appears in the mapping.
-    covered_values = frozenset(int(m) for m in (*mapping.keys(), *mapping.values()) if isinstance(m, binding))
-    missing = {name for name in required if int(binding.__members__[name]) not in covered_values}
-    assert not missing, f"{binding.__name__} has members not covered by the wrapper mapping: {missing}"
+    if mapping is not None:
+        # Compare by integer value so that enum aliases (two names, one integer)
+        # are treated as covered when the canonical member appears in the mapping.
+        covered_values = frozenset(int(m) for m in (*mapping.keys(), *mapping.values()) if isinstance(m, binding))
+        missing = {name for name in required if int(binding.__members__[name]) not in covered_values}
+        assert not missing, f"{binding.__name__} has members not covered by the wrapper mapping: {missing}"
 
     # Reverse check: every StrEnum member must also appear in the mapping.
     if str_enum is not None:
-        required_str = set(str_enum.__members__) - str_enum_unmapped
-        covered_str = {m.name for m in (*mapping.keys(), *mapping.values()) if isinstance(m, str_enum)}
-        missing_str = required_str - covered_str
-        assert not missing_str, f"{str_enum.__name__} has members not covered by the wrapper mapping: {missing_str}"
+        if mapping is not None:
+            required_str = set(str_enum.__members__) - str_enum_unmapped
+            covered_str = {m.name for m in (*mapping.keys(), *mapping.values()) if isinstance(m, str_enum)}
+            missing_str = required_str - covered_str
+            assert not missing_str, f"{str_enum.__name__} has members not covered by the wrapper mapping: {missing_str}"
+
+        # For checking a StrEnum against a cuda_binding enum directly, without a
+        # mapping, the best we can do is count them, since it's reasonable that
+        # they have been renamed for clarity.
+        required_count = len(required)
+        covered_str_enum = set(str_enum.__members__) - str_enum_unmapped
+        covered_count = len(covered_str_enum)
+        if required_count != covered_count:
+            raise AssertionError(
+                f"{str_enum.__name__} has {covered_count} members, but expected {required_count} based on {binding.__name__} "
+                "after accounting for unmapped members. This may indicate that some members are missing from the wrapper, "
+                "or that some wrapper members do not correspond to actual binding members."
+            )
 
 
 def test_all_str_enums_in_cases():
@@ -268,8 +355,9 @@ def discover_str_enums() -> set[type]:
 
     covered = {x[1] for x in _CASES if x[1] is not None}
     uncovered = discover_str_enums() - covered - _UNBOUND_STR_ENUMS
+    uncovered_names = sorted({c.__qualname__ for c in uncovered})
     assert not uncovered, (
         f"StrEnum subclasses in cuda.core not covered by _CASES: "
-        f"{sorted(c.__qualname__ for c in uncovered)}\n"
+        f"{uncovered_names}\n"
         "Add a _CASES entry for each, or add to _UNBOUND_STR_ENUMS if it does not wrap a cuda_binding enum."
     )
diff --git a/cuda_core/tests/test_launcher.py b/cuda_core/tests/test_launcher.py
index b3461f5a371..775089de907 100644
--- a/cuda_core/tests/test_launcher.py
+++ b/cuda_core/tests/test_launcher.py
@@ -26,6 +26,7 @@
 )
 from cuda.core._memory._legacy import _SynchronousMemoryResource
 from cuda.core._utils.cuda_utils import CUDAError
+from cuda.core.typing import ObjectCodeFormat, SourceType
 
 
 def test_launch_config_init(init_cuda):
@@ -126,8 +127,8 @@ def test_launch_config_native_conversion(init_cuda):
 
 def test_launch_invalid_values(init_cuda):
     code = 'extern "C" __global__ void my_kernel() {}'
-    program = Program(code, "c++")
-    mod = program.compile("cubin")
+    program = Program(code, SourceType.CXX)
+    mod = program.compile(ObjectCodeFormat.CUBIN)
 
     stream = Device().create_stream()
     ker = mod.get_kernel("my_kernel")
diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py
index fb99895616d..3ae6960fc29 100644
--- a/cuda_core/tests/test_memory.py
+++ b/cuda_core/tests/test_memory.py
@@ -46,6 +46,14 @@
 from cuda.core._dlpack import DLDeviceType
 from cuda.core._memory import IPCBufferDescriptor
 from cuda.core._utils.cuda_utils import CUDAError, handle_return
+from cuda.core.typing import (
+    ManagedMemoryLocationType,
+    VirtualMemoryAccessType,
+    VirtualMemoryAllocationType,
+    VirtualMemoryGranularityType,
+    VirtualMemoryHandleType,
+    VirtualMemoryLocationType,
+)
 from cuda.core.utils import StridedMemoryView
 
 POOL_SIZE = 2097152  # 2MB size
@@ -134,19 +142,19 @@ def is_host_accessible(self) -> bool:
 def test_package_contents():
     expected = [
         "Buffer",
-        "MemoryResource",
         "DeviceMemoryResource",
         "DeviceMemoryResourceOptions",
         "GraphMemoryResource",
-        "IPCBufferDescriptor",
         "IPCAllocationHandle",
+        "IPCBufferDescriptor",
         "LegacyPinnedMemoryResource",
         "ManagedMemoryResource",
         "ManagedMemoryResourceOptions",
-        "PinnedMemoryResourceOptions",
+        "MemoryResource",
         "PinnedMemoryResource",
-        "VirtualMemoryResourceOptions",
+        "PinnedMemoryResourceOptions",
         "VirtualMemoryResource",
+        "VirtualMemoryResourceOptions",
     ]
     d = {}
     exec("from cuda.core._memory import *", d)  # noqa: S102
@@ -800,14 +808,14 @@ def test_vmm_allocator_policy_configuration():
 
     # Test with custom VMM config
     custom_config = VirtualMemoryResourceOptions(
-        allocation_type="pinned",
-        location_type="device",
-        granularity="minimum",
+        allocation_type=VirtualMemoryAllocationType.PINNED,
+        location_type=VirtualMemoryLocationType.DEVICE,
+        granularity=VirtualMemoryGranularityType.MINIMUM,
         gpu_direct_rdma=True,
-        handle_type="posix_fd" if not IS_WINDOWS else "win32_kmt",
+        handle_type=VirtualMemoryHandleType.POSIX_FD if not IS_WINDOWS else VirtualMemoryHandleType.WIN32_KMT,
         peers=(),
-        self_access="rw",
-        peer_access="rw",
+        self_access=VirtualMemoryAccessType.READ_WRITE,
+        peer_access=VirtualMemoryAccessType.READ_WRITE,
     )
 
     vmm_mr = VirtualMemoryResource(device, config=custom_config)
@@ -1090,7 +1098,7 @@ def test_managed_memory_resource_preferred_location_device(init_cuda):
     # Explicit style
     opts = ManagedMemoryResourceOptions(
         preferred_location=device.device_id,
-        preferred_location_type="device",
+        preferred_location_type=ManagedMemoryLocationType.DEVICE,
     )
     mr = create_managed_memory_resource_or_skip(opts)
     assert mr.preferred_location == ("device", device.device_id)
diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py
index 992ce336555..d729e2fde8d 100644
--- a/cuda_core/tests/test_program.py
+++ b/cuda_core/tests/test_program.py
@@ -13,6 +13,7 @@
 from cuda.core._module import Kernel, ObjectCode
 from cuda.core._program import Program, ProgramOptions
 from cuda.core._utils.cuda_utils import CUDAError, handle_return
+from cuda.core.typing import CompilerBackend, PCHStatus
 
 pytest_plugins = ("cuda_python_test_helpers.nvvm_bitcode",)
 
@@ -241,6 +242,7 @@ def test_cpp_program_with_various_options(init_cuda, options):
     code = 'extern "C" __global__ void my_kernel() {}'
     program = Program(code, "c++", options)
     assert program.backend == "NVRTC"
+    assert isinstance(program.backend, CompilerBackend)
     program.compile("ptx")
     program.close()
 
@@ -281,6 +283,7 @@ def test_cpp_program_pch_auto_creates(init_cuda, tmp_path):
     assert program.pch_status is None  # not compiled yet
     program.compile("ptx")
     assert program.pch_status in ("created", "not_attempted", "failed")
+    assert isinstance(program.pch_status, PCHStatus)
     program.close()
 
 
@@ -681,7 +684,7 @@ def test_cpp_program_with_extra_sources():
 def test_program_options_as_bytes_nvrtc():
     """Test ProgramOptions.as_bytes() for NVRTC backend"""
     options = ProgramOptions(arch="sm_80", debug=True, lineinfo=True, ftz=True)
-    nvrtc_options = options.as_bytes("nvrtc")
+    nvrtc_options = options.as_bytes(CompilerBackend.NVRTC)
     assert isinstance(nvrtc_options, list)
     assert all(isinstance(opt, bytes) for opt in nvrtc_options)
     options_str = [opt.decode() for opt in nvrtc_options]

From f97b30b52a12c8ec2d34011bf1005f1010511f09 Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdroettboom@nvidia.com>
Date: Tue, 5 May 2026 11:52:46 -0400
Subject: [PATCH 2/5] Address renamings from PR comments

---
 cuda_core/cuda/core/_device.pyx               |  6 +--
 cuda_core/cuda/core/_launcher.pyx             |  2 +-
 cuda_core/cuda/core/_linker.pyx               | 12 ++---
 cuda_core/cuda/core/_memory/_buffer.pyx       | 14 +++---
 .../core/_memory/_graph_memory_resource.pyx   |  4 +-
 cuda_core/cuda/core/_memory/_legacy.py        |  6 +--
 cuda_core/cuda/core/_memory/_memory_pool.pyx  |  4 +-
 cuda_core/cuda/core/_module.pyx               | 18 +++----
 cuda_core/cuda/core/_program.pyx              | 48 +++++++++----------
 cuda_core/cuda/core/_stream.pyx               |  4 +-
 cuda_core/cuda/core/typing.py                 | 27 ++++++-----
 cuda_core/docs/source/api_private.rst         | 12 ++---
 cuda_core/docs/source/interoperability.rst    |  2 +-
 cuda_core/tests/helpers/misc.py               |  2 +-
 cuda_core/tests/test_enum_coverage.py         | 18 +++----
 cuda_core/tests/test_launcher.py              |  6 +--
 cuda_core/tests/test_program.py               |  8 ++--
 cuda_core/tests/test_typing_imports.py        | 20 ++++----
 18 files changed, 106 insertions(+), 107 deletions(-)

diff --git a/cuda_core/cuda/core/_device.pyx b/cuda_core/cuda/core/_device.pyx
index c0d7f09ee44..4dd03e9eaaa 100644
--- a/cuda_core/cuda/core/_device.pyx
+++ b/cuda_core/cuda/core/_device.pyx
@@ -23,7 +23,7 @@ from cuda.core._resource_handles cimport (
     as_cu,
 )
 
-from cuda.core._stream import IsStreamT, Stream, StreamOptions
+from cuda.core._stream import IsStreamType, Stream, StreamOptions
 from cuda.core._utils.clear_error_support import assert_type
 from cuda.core._utils.cuda_utils import (
     ComputeCapability,
@@ -1268,7 +1268,7 @@ class Device:
         """
         raise NotImplementedError("WIP: https://github.com/NVIDIA/cuda-python/issues/189")
 
-    def create_stream(self, obj: IsStreamT | None = None, options: StreamOptions | None = None) -> Stream:
+    def create_stream(self, obj: IsStreamType | None = None, options: StreamOptions | None = None) -> Stream:
         """Create a :obj:`~_stream.Stream` object.
 
         New stream objects can be created in two different ways:
@@ -1285,7 +1285,7 @@ class Device:
 
         Parameters
         ----------
-        obj : :obj:`~_stream.IsStreamT`, optional
+        obj : :obj:`~_stream.IsStreamType`, optional
             Any object supporting the ``__cuda_stream__`` protocol.
         options : :obj:`~_stream.StreamOptions`, optional
             Customizable dataclass for stream creation options.
diff --git a/cuda_core/cuda/core/_launcher.pyx b/cuda_core/cuda/core/_launcher.pyx
index 87d18f2b881..e6a07ad28e6 100644
--- a/cuda_core/cuda/core/_launcher.pyx
+++ b/cuda_core/cuda/core/_launcher.pyx
@@ -20,7 +20,7 @@ from cuda.core._stream import Stream
 from math import prod
 
 
-def launch(stream: Stream | GraphBuilder | IsStreamT, config: LaunchConfig, kernel: Kernel, *kernel_args):
+def launch(stream: Stream | GraphBuilder | IsStreamType, config: LaunchConfig, kernel: Kernel, *kernel_args):
     """Launches a :obj:`~_module.Kernel`
     object with launch-time configuration.
 
diff --git a/cuda_core/cuda/core/_linker.pyx b/cuda_core/cuda/core/_linker.pyx
index 1a28c6ed3aa..2a3d0b514c8 100644
--- a/cuda_core/cuda/core/_linker.pyx
+++ b/cuda_core/cuda/core/_linker.pyx
@@ -39,7 +39,7 @@ from cuda.core._utils.cuda_utils import (
     driver,
     is_sequence,
 )
-from cuda.core.typing import CompilerBackend
+from cuda.core.typing import CompilerBackendType
 
 ctypedef const char* const_char_ptr
 ctypedef void* void_ptr
@@ -71,12 +71,12 @@ cdef class Linker:
     def __init__(self, *object_codes: ObjectCode, options: "LinkerOptions" = None):
         Linker_init(self, object_codes, options)
 
-    def link(self, target_type: ObjectCodeFormat | str) -> ObjectCode:
+    def link(self, target_type: ObjectCodeFormatType | str) -> ObjectCode:
         """Link the provided object codes into a single output of the specified target type.
 
         Parameters
         ----------
-        target_type : ObjectCodeFormat | str
+        target_type : ObjectCodeFormatType | str
             The type of the target output. Must be either "cubin" or "ptx".
 
         Returns
@@ -169,9 +169,9 @@ cdef class Linker:
             return as_py(self._culink_handle)
 
     @property
-    def backend(self) -> CompilerBackend:
-        """Return this Linker instance's underlying :class:`CompilerBackend`."""
-        return CompilerBackend.NVJITLINK if self._use_nvjitlink else CompilerBackend.DRIVER
+    def backend(self) -> CompilerBackendType:
+        """Return this Linker instance's underlying :class:`CompilerBackendType`."""
+        return CompilerBackendType.NVJITLINK if self._use_nvjitlink else CompilerBackendType.DRIVER
 
 
 # =============================================================================
diff --git a/cuda_core/cuda/core/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx
index a56657a3564..c4e8b5a1121 100644
--- a/cuda_core/cuda/core/_memory/_buffer.pyx
+++ b/cuda_core/cuda/core/_memory/_buffer.pyx
@@ -65,7 +65,7 @@ register_mr_dealloc_callback(_mr_dealloc_callback)
 __all__ = ['Buffer', 'MemoryResource']
 
 
-DevicePointerT = driver.CUdeviceptr | int | None
+DevicePointerType = driver.CUdeviceptr | int | None
 """
 A type union of :obj:`~driver.CUdeviceptr`, `int` and `None` for hinting
 :attr:`Buffer.handle`.
@@ -97,7 +97,7 @@ cdef class Buffer:
 
     @classmethod
     def _init(
-        cls, ptr: DevicePointerT, size_t size, mr: MemoryResource | None = None,
+        cls, ptr: DevicePointerType, size_t size, mr: MemoryResource | None = None,
         ipc_descriptor: IPCBufferDescriptor | None = None,
         owner : object | None = None
     ):
@@ -132,14 +132,14 @@ cdef class Buffer:
 
     @staticmethod
     def from_handle(
-        ptr: DevicePointerT, size_t size, mr: MemoryResource | None = None,
+        ptr: DevicePointerType, size_t size, mr: MemoryResource | None = None,
         owner: object | None = None,
     ) -> Buffer:
         """Create a new :class:`Buffer` object from a pointer.
 
         Parameters
         ----------
-        ptr : :obj:`~_memory.DevicePointerT`
+        ptr : :obj:`~_memory.DevicePointerType`
             Allocated buffer handle object
         size : int
             Memory size of the buffer
@@ -347,7 +347,7 @@ cdef class Buffer:
         return self._mem_attrs.device_id
 
     @property
-    def handle(self) -> DevicePointerT:
+    def handle(self) -> DevicePointerType:
         """Return the buffer handle object.
 
         .. caution::
@@ -515,12 +515,12 @@ cdef class MemoryResource:
         """
         raise TypeError("MemoryResource.allocate must be implemented by subclasses.")
 
-    def deallocate(self, ptr: DevicePointerT, size_t size, stream: Stream | GraphBuilder | None = None):
+    def deallocate(self, ptr: DevicePointerType, size_t size, stream: Stream | GraphBuilder | None = None):
         """Deallocate a buffer previously allocated by this resource.
 
         Parameters
         ----------
-        ptr : :obj:`~_memory.DevicePointerT`
+        ptr : :obj:`~_memory.DevicePointerType`
             The pointer or handle to the buffer to deallocate.
         size : int
             The size of the buffer to deallocate, in bytes.
diff --git a/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx b/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx
index e04f25f1581..2180276ed87 100644
--- a/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx
+++ b/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx
@@ -1,4 +1,4 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # SPDX-License-Identifier: Apache-2.0
 
@@ -111,7 +111,7 @@ cdef class cyGraphMemoryResource(MemoryResource):
         stream = Stream_accept(stream) if stream is not None else default_stream()
         return GMR_allocate(self, size, <Stream> stream)
 
-    def deallocate(self, ptr: "DevicePointerT", size_t size, stream: Stream | GraphBuilder | None = None):
+    def deallocate(self, ptr: "DevicePointerType", size_t size, stream: Stream | GraphBuilder | None = None):
         """
         Deallocate a buffer of the requested size. See documentation for :obj:`~_memory.MemoryResource`.
         """
diff --git a/cuda_core/cuda/core/_memory/_legacy.py b/cuda_core/cuda/core/_memory/_legacy.py
index 24ce88487ea..036b89abdcc 100644
--- a/cuda_core/cuda/core/_memory/_legacy.py
+++ b/cuda_core/cuda/core/_memory/_legacy.py
@@ -7,7 +7,7 @@
 from typing import TYPE_CHECKING
 
 if TYPE_CHECKING:
-    from cuda.core._memory._buffer import DevicePointerT
+    from cuda.core._memory._buffer import DevicePointerType
 
 from cuda.core._memory._buffer import Buffer, MemoryResource
 from cuda.core._utils.cuda_utils import (
@@ -53,12 +53,12 @@ def allocate(self, size, stream=None) -> Buffer:
             ptr = 0
         return Buffer._init(ptr, size, self)
 
-    def deallocate(self, ptr: DevicePointerT, size, stream):
+    def deallocate(self, ptr: DevicePointerType, size, stream):
         """Deallocate a buffer previously allocated by this resource.
 
         Parameters
         ----------
-        ptr : :obj:`~_memory.DevicePointerT`
+        ptr : :obj:`~_memory.DevicePointerType`
             The pointer or handle to the buffer to deallocate.
         size : int
             The size of the buffer to deallocate, in bytes.
diff --git a/cuda_core/cuda/core/_memory/_memory_pool.pyx b/cuda_core/cuda/core/_memory/_memory_pool.pyx
index f8f3b683d12..4e0f99d4529 100644
--- a/cuda_core/cuda/core/_memory/_memory_pool.pyx
+++ b/cuda_core/cuda/core/_memory/_memory_pool.pyx
@@ -144,12 +144,12 @@ cdef class _MemPool(MemoryResource):
         stream = Stream_accept(stream) if stream is not None else default_stream()
         return _MP_allocate(self, size, <Stream> stream)
 
-    def deallocate(self, ptr: "DevicePointerT", size_t size, stream: Stream | GraphBuilder | None = None):
+    def deallocate(self, ptr: "DevicePointerType", size_t size, stream: Stream | GraphBuilder | None = None):
         """Deallocate a buffer previously allocated by this resource.
 
         Parameters
         ----------
-        ptr : :obj:`~_memory.DevicePointerT`
+        ptr : :obj:`~_memory.DevicePointerType`
             The pointer or handle to the buffer to deallocate.
         size : int
             The size of the buffer to deallocate, in bytes.
diff --git a/cuda_core/cuda/core/_module.pyx b/cuda_core/cuda/core/_module.pyx
index 4156e4d7806..fee979b6130 100644
--- a/cuda_core/cuda/core/_module.pyx
+++ b/cuda_core/cuda/core/_module.pyx
@@ -12,7 +12,7 @@ from cuda.core._device import Device
 from cuda.core._launch_config cimport LaunchConfig
 from cuda.core._launch_config import LaunchConfig
 from cuda.core._stream cimport Stream
-from cuda.core._program import ObjectCodeFormat
+from cuda.core._program import ObjectCodeFormatType
 from cuda.core._resource_handles cimport (
     LibraryHandle,
     KernelHandle,
@@ -570,7 +570,7 @@ cdef class Kernel:
 
 CodeTypeT = bytes | bytearray | str
 
-cdef tuple _supported_code_type = tuple(ObjectCodeFormat.__members__.values())
+cdef tuple _supported_code_type = tuple(ObjectCodeFormatType.__members__.values())
 
 cdef class ObjectCode:
     """Represent a compiled program to be loaded onto the device.
@@ -630,7 +630,7 @@ cdef class ObjectCode:
             should be mapped to the mangled names before trying to retrieve
             them (default to no mappings).
         """
-        return ObjectCode._init(module, ObjectCodeFormat.CUBIN, name=name, symbol_mapping=symbol_mapping)
+        return ObjectCode._init(module, ObjectCodeFormatType.CUBIN, name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
     def from_ptx(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode:
@@ -648,7 +648,7 @@ cdef class ObjectCode:
             should be mapped to the mangled names before trying to retrieve
             them (default to no mappings).
         """
-        return ObjectCode._init(module, ObjectCodeFormat.PTX, name=name, symbol_mapping=symbol_mapping)
+        return ObjectCode._init(module, ObjectCodeFormatType.PTX, name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
     def from_ltoir(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode:
@@ -666,7 +666,7 @@ cdef class ObjectCode:
             should be mapped to the mangled names before trying to retrieve
             them (default to no mappings).
         """
-        return ObjectCode._init(module, ObjectCodeFormat.LTOIR, name=name, symbol_mapping=symbol_mapping)
+        return ObjectCode._init(module, ObjectCodeFormatType.LTOIR, name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
     def from_fatbin(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode:
@@ -684,7 +684,7 @@ cdef class ObjectCode:
             should be mapped to the mangled names before trying to retrieve
             them (default to no mappings).
         """
-        return ObjectCode._init(module, ObjectCodeFormat.FATBIN, name=name, symbol_mapping=symbol_mapping)
+        return ObjectCode._init(module, ObjectCodeFormatType.FATBIN, name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
     def from_object(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode:
@@ -702,7 +702,7 @@ cdef class ObjectCode:
             should be mapped to the mangled names before trying to retrieve
             them (default to no mappings).
         """
-        return ObjectCode._init(module, ObjectCodeFormat.OBJECT, name=name, symbol_mapping=symbol_mapping)
+        return ObjectCode._init(module, ObjectCodeFormatType.OBJECT, name=name, symbol_mapping=symbol_mapping)
 
     @staticmethod
     def from_library(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode:
@@ -720,7 +720,7 @@ cdef class ObjectCode:
             should be mapped to the mangled names before trying to retrieve
             them (default to no mappings).
         """
-        return ObjectCode._init(module, ObjectCodeFormat.LIBRARY, name=name, symbol_mapping=symbol_mapping)
+        return ObjectCode._init(module, ObjectCodeFormatType.LIBRARY, name=name, symbol_mapping=symbol_mapping)
 
     # TODO: do we want to unload in a finalizer? Probably not..
 
@@ -759,7 +759,7 @@ cdef class ObjectCode:
 
         """
         self._lazy_load_module()
-        supported_code_types = (ObjectCodeFormat.CUBIN, ObjectCodeFormat.PTX, ObjectCodeFormat.FATBIN)
+        supported_code_types = (ObjectCodeFormatType.CUBIN, ObjectCodeFormatType.PTX, ObjectCodeFormatType.FATBIN)
         if self._code_type not in supported_code_types:
             raise RuntimeError(f'Unsupported code type "{self._code_type}" ({supported_code_types=})')
         try:
diff --git a/cuda_core/cuda/core/_program.pyx b/cuda_core/cuda/core/_program.pyx
index f7040919d11..12d52198b2a 100644
--- a/cuda_core/cuda/core/_program.pyx
+++ b/cuda_core/cuda/core/_program.pyx
@@ -39,7 +39,7 @@ from cuda.core._utils.cuda_utils import (
     is_sequence,
 )
 from cuda.core._utils.version import binding_version, driver_version
-from cuda.core.typing import ObjectCodeFormat, CompilerBackend, PCHStatus, SourceType
+from cuda.core.typing import ObjectCodeFormatType, CompilerBackendType, PCHStatusType, SourceCodeType
 
 __all__ = ["Program", "ProgramOptions"]
 
@@ -68,12 +68,12 @@ cdef class Program:
     code : str | bytes | bytearray
         The source code to compile. For C++ and PTX, must be a string.
         For NVVM IR, can be str, bytes, or bytearray.
-    code_type : SourceType | str
+    code_type : SourceCodeType | str
         The type of source code. Must be one of ``"c++"``, ``"ptx"``, or ``"nvvm"``.
     options : :class:`ProgramOptions`, optional
         Options to customize the compilation process.
     """
-    def __init__(self, code: str | bytes | bytearray, code_type: SourceType | str, options: ProgramOptions | None = None):
+    def __init__(self, code: str | bytes | bytearray, code_type: SourceCodeType | str, options: ProgramOptions | None = None):
         Program_init(self, code, str(code_type), options)
 
     def close(self):
@@ -85,13 +85,13 @@ cdef class Program:
         self._h_nvvm.reset()
 
     def compile(
-        self, target_type: ObjectCodeFormat | str, name_expressions: tuple | list = (), logs = None
+        self, target_type: ObjectCodeFormatType | str, name_expressions: tuple | list = (), logs = None
     ) -> ObjectCode:
         """Compile the program to the specified target type.
 
         Parameters
         ----------
-        target_type : ObjectCodeFormat | str
+        target_type : ObjectCodeFormatType | str
             The compilation target. Must be one of ``"ptx"``, ``"cubin"``, or ``"ltoir"``.
         name_expressions : tuple | list, optional
             Sequence of name expressions to make accessible in the compiled code.
@@ -107,7 +107,7 @@ cdef class Program:
         return Program_compile(self, str(target_type), name_expressions, logs)
 
     @property
-    def pch_status(self) -> PCHStatus | None:
+    def pch_status(self) -> PCHStatusType | None:
         """PCH creation outcome from the most recent :meth:`compile` call.
 
         Possible values:
@@ -132,12 +132,12 @@ cdef class Program:
         """
         if self._pch_status is None:
             return None
-        return PCHStatus(self._pch_status)
+        return PCHStatusType(self._pch_status)
 
     @property
-    def backend(self) -> CompilerBackend:
-        """Return this Program instance's underlying :class:`CompilerBackend`."""
-        return CompilerBackend(self._backend)
+    def backend(self) -> CompilerBackendType:
+        """Return this Program instance's underlying :class:`CompilerBackendType`."""
+        return CompilerBackendType(self._backend)
 
     @property
     def handle(self) -> ProgramHandleT:
@@ -437,7 +437,7 @@ class ProgramOptions:
     def _prepare_nvvm_options(self, as_bytes: bool = True) -> list[bytes] | list[str]:
         return _prepare_nvvm_options_impl(self, as_bytes)
 
-    def as_bytes(self, backend: CompilerBackend | str, target_type: ObjectCodeFormat | str | None = None) -> list[bytes]:
+    def as_bytes(self, backend: CompilerBackendType | str, target_type: ObjectCodeFormatType | str | None = None) -> list[bytes]:
         """Convert program options to bytes format for the specified backend.
 
         This method transforms the program options into a format suitable for the
@@ -446,9 +446,9 @@ class ProgramOptions:
 
         Parameters
         ----------
-        backend : CompilerBackend | str
+        backend : CompilerBackendType | str
             The compiler backend to prepare options for. Must be either "nvrtc" or "nvvm".
-        target_type : ObjectCodeFormat | str, optional
+        target_type : ObjectCodeFormatType | str, optional
             The compilation target type (e.g., "ptx", "cubin", "ltoir"). Some backends
             require additional options based on the target type.
 
@@ -641,7 +641,7 @@ cdef inline int Program_init(Program self, object code, str code_type, object op
                 &nvrtc_prog, code_ptr, name_ptr, 0, NULL, NULL))
         self._h_nvrtc = create_nvrtc_program_handle(nvrtc_prog)
         self._nvrtc_code = code_bytes
-        self._backend = str(CompilerBackend.NVRTC)
+        self._backend = str(CompilerBackendType.NVRTC)
         self._linker = None
 
     elif code_type == "ptx":
@@ -685,11 +685,11 @@ cdef inline int Program_init(Program self, object code, str code_type, object op
         if options.use_libdevice:
             self._use_libdevice = True
 
-        self._backend = str(CompilerBackend.NVVM)
+        self._backend = str(CompilerBackendType.NVVM)
         self._linker = None
 
     else:
-        supported_code_types = tuple(x.value for x in SourceType)
+        supported_code_types = tuple(x.value for x in SourceCodeType)
         if options.use_libdevice:
             raise ValueError("use_libdevice is only supported by the NVVM backend")
         raise RuntimeError(f"Unsupported {code_type=} ({supported_code_types=})")
@@ -787,12 +787,12 @@ cdef object _read_pch_status(cynvrtc.nvrtcProgram prog):
     with nogil:
         err = cynvrtc.nvrtcGetPCHCreateStatus(prog)
     if err == cynvrtc.nvrtcResult.NVRTC_SUCCESS:
-        return PCHStatus.CREATED
+        return PCHStatusType.CREATED
     if err == cynvrtc.nvrtcResult.NVRTC_ERROR_PCH_CREATE_HEAP_EXHAUSTED:
         return None  # sentinel: caller should auto-retry
     if err == cynvrtc.nvrtcResult.NVRTC_ERROR_NO_PCH_CREATE_ATTEMPTED:
-        return PCHStatus.NOT_ATTEMPTED
-    return PCHStatus.FAILED
+        return PCHStatusType.NOT_ATTEMPTED
+    return PCHStatusType.FAILED
 
 
 cdef object Program_compile_nvrtc(Program self, str target_type, object name_expressions, object logs):
@@ -840,7 +840,7 @@ cdef object Program_compile_nvrtc(Program self, str target_type, object name_exp
     )
 
     status = _read_pch_status(retry_prog)
-    self._pch_status = status if status is not None else str(PCHStatus.FAILED)
+    self._pch_status = status if status is not None else str(PCHStatusType.FAILED)
     return result
 
 
@@ -900,10 +900,10 @@ cdef object Program_compile_nvvm(Program self, str target_type, object logs):
 
 # Supported target types per backend
 cdef dict SUPPORTED_TARGETS = {
-    CompilerBackend.NVRTC: (ObjectCodeFormat.PTX, ObjectCodeFormat.CUBIN, ObjectCodeFormat.LTOIR),
-    CompilerBackend.NVVM: (ObjectCodeFormat.PTX, ObjectCodeFormat.LTOIR),
-    CompilerBackend.NVJITLINK: (ObjectCodeFormat.CUBIN, ObjectCodeFormat.PTX),
-    CompilerBackend.DRIVER: (ObjectCodeFormat.CUBIN, ObjectCodeFormat.PTX),
+    CompilerBackendType.NVRTC: (ObjectCodeFormatType.PTX, ObjectCodeFormatType.CUBIN, ObjectCodeFormatType.LTOIR),
+    CompilerBackendType.NVVM: (ObjectCodeFormatType.PTX, ObjectCodeFormatType.LTOIR),
+    CompilerBackendType.NVJITLINK: (ObjectCodeFormatType.CUBIN, ObjectCodeFormatType.PTX),
+    CompilerBackendType.DRIVER: (ObjectCodeFormatType.CUBIN, ObjectCodeFormatType.PTX),
 }
 
 
diff --git a/cuda_core/cuda/core/_stream.pyx b/cuda_core/cuda/core/_stream.pyx
index fdb617f0325..97a95cdb861 100644
--- a/cuda_core/cuda/core/_stream.pyx
+++ b/cuda_core/cuda/core/_stream.pyx
@@ -58,7 +58,7 @@ cdef class StreamOptions:
     priority: int | None = None
 
 
-class IsStreamT(Protocol):
+class IsStreamType(Protocol):
     def __cuda_stream__(self) -> tuple[int, int]:
         """
         For any Python object that is meant to be interpreted as a CUDA stream, the intent
@@ -113,7 +113,7 @@ cdef class Stream:
         return Stream._from_handle(cls, get_per_thread_stream())
 
     @classmethod
-    def _init(cls, obj: IsStreamT | None = None, options=None, device_id: int = None,
+    def _init(cls, obj: IsStreamType | None = None, options=None, device_id: int = None,
               ctx: Context = None):
         cdef StreamHandle h_stream
         cdef cydriver.CUstream borrowed
diff --git a/cuda_core/cuda/core/typing.py b/cuda_core/cuda/core/typing.py
index 33405657b24..12829e070db 100644
--- a/cuda_core/cuda/core/typing.py
+++ b/cuda_core/cuda/core/typing.py
@@ -9,19 +9,19 @@
 except ImportError:
     from backports.strenum import StrEnum
 
-from cuda.core._memory._buffer import DevicePointerT
-from cuda.core._stream import IsStreamT
+from cuda.core._memory._buffer import DevicePointerType
+from cuda.core._stream import IsStreamType
 
 __all__ = [
-    "CompilerBackend",
-    "DevicePointerT",
+    "CompilerBackendType",
+    "DevicePointerType",
     "GraphConditionalType",
     "GraphMemoryType",
-    "IsStreamT",
+    "IsStreamType",
     "ManagedMemoryLocationType",
-    "ObjectCodeFormat",
-    "PCHStatus",
-    "SourceType",
+    "ObjectCodeFormatType",
+    "PCHStatusType",
+    "SourceCodeType",
     "VirtualMemoryAccessType",
     "VirtualMemoryAllocationType",
     "VirtualMemoryGranularityType",
@@ -30,13 +30,13 @@
 ]
 
 
-class SourceType(StrEnum):
+class SourceCodeType(StrEnum):
     CXX = "c++"
     PTX = "ptx"
     NVVM = "nvvm"
 
 
-class ObjectCodeFormat(StrEnum):
+class ObjectCodeFormatType(StrEnum):
     PTX = "ptx"
     CUBIN = "cubin"
     LTOIR = "ltoir"
@@ -45,14 +45,14 @@ class ObjectCodeFormat(StrEnum):
     LIBRARY = "library"
 
 
-class CompilerBackend(StrEnum):
+class CompilerBackendType(StrEnum):
     NVRTC = "NVRTC"
     NVVM = "NVVM"
     NVJITLINK = "nvJitLink"
     DRIVER = "driver"
 
 
-class PCHStatus(StrEnum):
+class PCHStatusType(StrEnum):
     CREATED = "created"
     NOT_ATTEMPTED = "not_attempted"
     FAILED = "failed"
@@ -102,3 +102,6 @@ class VirtualMemoryAccessType(StrEnum):
 class VirtualMemoryAllocationType(StrEnum):
     PINNED = "pinned"
     MANAGED = "managed"
+
+
+del StrEnum
diff --git a/cuda_core/docs/source/api_private.rst b/cuda_core/docs/source/api_private.rst
index 725b211798e..1df64f91d4b 100644
--- a/cuda_core/docs/source/api_private.rst
+++ b/cuda_core/docs/source/api_private.rst
@@ -20,14 +20,14 @@ CUDA runtime
    _module.KernelOccupancy
    _module.MaxPotentialBlockSizeOccupancyResult
    _module.ParamInfo
-   typing.CompilerBackend
-   typing.DevicePointerT
+   typing.CompilerBackendType
+   typing.DevicePointerType
    typing.GraphConditionalType
    typing.GraphMemoryType
    typing.ManagedMemoryLocationType
-   typing.ObjectCodeFormat
-   typing.PCHStatus
-   typing.SourceType
+   typing.ObjectCodeFormatType
+   typing.PCHStatusType
+   typing.SourceCodeType
    typing.VirtualMemoryAccessType
    typing.VirtualMemoryAllocationType
    typing.VirtualMemoryGranularityType
@@ -57,7 +57,7 @@ CUDA protocols
    :toctree: generated/
    :template: protocol.rst
 
-   typing.IsStreamT
+   typing.IsStreamType
 
 NVML
 ----
diff --git a/cuda_core/docs/source/interoperability.rst b/cuda_core/docs/source/interoperability.rst
index ae109bbad0b..4aac89d13df 100644
--- a/cuda_core/docs/source/interoperability.rst
+++ b/cuda_core/docs/source/interoperability.rst
@@ -35,7 +35,7 @@ in Python. While we encourage new Python projects to start using streams (and ot
 CUDA types) from ``cuda.core``, we understand that there are already several projects
 exposing their own stream types.
 
-To address this issue, we propose the :attr:`~_stream.IsStreamT.__cuda_stream__` protocol
+To address this issue, we propose the :attr:`~_stream.IsStreamType.__cuda_stream__` protocol
 (currently version 0) as follows: For any Python objects that are meant to be interpreted
 as a stream, they should add a ``__cuda_stream__`` *method* that returns a 2-tuple: The
 version number (``0``) and the address of ``cudaStream_t`` (both as Python ``int``):
diff --git a/cuda_core/tests/helpers/misc.py b/cuda_core/tests/helpers/misc.py
index 89bc175a97d..ec879755cd2 100644
--- a/cuda_core/tests/helpers/misc.py
+++ b/cuda_core/tests/helpers/misc.py
@@ -16,7 +16,7 @@ def try_create_condition(g, default_value=1):
 
 class StreamWrapper:
     """
-    A wrapper around Stream for testing IsStreamT conversions.
+    A wrapper around Stream for testing IsStreamType conversions.
     """
 
     def __init__(self, stream):
diff --git a/cuda_core/tests/test_enum_coverage.py b/cuda_core/tests/test_enum_coverage.py
index bf65048e853..024f71378f4 100644
--- a/cuda_core/tests/test_enum_coverage.py
+++ b/cuda_core/tests/test_enum_coverage.py
@@ -71,13 +71,6 @@
         {"CU_MEM_ACCESS_FLAGS_PROT_NONE", "CU_MEM_ACCESS_FLAGS_PROT_MAX"},
         set(),
     ),
-    (
-        driver.CUmemAllocationType,
-        cuda.core.typing.VirtualMemoryAllocationType,
-        cuda.core.VirtualMemoryResourceOptions._allocation_type,
-        {"CU_MEM_ALLOCATION_TYPE_INVALID", "CU_MEM_ALLOCATION_TYPE_MAX"},
-        set(),
-    ),
     (
         driver.CUmemAllocationGranularity_flags,
         cuda.core.typing.VirtualMemoryGranularityType,
@@ -270,16 +263,18 @@
 # Add classes here (with a comment explaining why) when a new StrEnum is
 # introduced that wraps something other than a cuda_binding enum.
 _UNBOUND_STR_ENUMS: set[StrEnum] = {
-    cuda.core.typing.ObjectCodeFormat,
-    cuda.core.typing.CompilerBackend,
+    cuda.core.typing.ObjectCodeFormatType,
+    cuda.core.typing.CompilerBackendType,
     # This one enum coordinates values in two cuda_binding enums:
     # CUmemAllocationType and CUmemLocationType
     cuda.core.typing.GraphMemoryType,
     # This should support all of the PCH-related values in nvrtcResult, but
     # there is no easy way to check that since they are mixed in with other
     # unrelated things
-    cuda.core.typing.PCHStatus,
-    cuda.core.typing.SourceType,
+    cuda.core.typing.PCHStatusType,
+    cuda.core.typing.SourceCodeType,
+    # This enum is dynamic depending on the version of CTK installed.
+    cuda.core.typing.VirtualMemoryAllocationType,
 }
 
 
@@ -324,6 +319,7 @@ def test_wrapper_covers_all_binding_members(binding, str_enum, mapping, binding_
             )
 
 
+@pytest.mark.skipif(sys.version_info < (3, 11), reason="Requires Python 3.11+ for StrEnum")
 def test_all_str_enums_in_cases():
     """Every StrEnum subclass in cuda.core must appear in _CASES or _UNBOUND_STR_ENUMS.
 
diff --git a/cuda_core/tests/test_launcher.py b/cuda_core/tests/test_launcher.py
index 775089de907..f4858cdaef7 100644
--- a/cuda_core/tests/test_launcher.py
+++ b/cuda_core/tests/test_launcher.py
@@ -26,7 +26,7 @@
 )
 from cuda.core._memory._legacy import _SynchronousMemoryResource
 from cuda.core._utils.cuda_utils import CUDAError
-from cuda.core.typing import ObjectCodeFormat, SourceType
+from cuda.core.typing import ObjectCodeFormatType, SourceCodeType
 
 
 def test_launch_config_init(init_cuda):
@@ -127,8 +127,8 @@ def test_launch_config_native_conversion(init_cuda):
 
 def test_launch_invalid_values(init_cuda):
     code = 'extern "C" __global__ void my_kernel() {}'
-    program = Program(code, SourceType.CXX)
-    mod = program.compile(ObjectCodeFormat.CUBIN)
+    program = Program(code, SourceCodeType.CXX)
+    mod = program.compile(ObjectCodeFormatType.CUBIN)
 
     stream = Device().create_stream()
     ker = mod.get_kernel("my_kernel")
diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py
index d729e2fde8d..eda78bd0fd6 100644
--- a/cuda_core/tests/test_program.py
+++ b/cuda_core/tests/test_program.py
@@ -13,7 +13,7 @@
 from cuda.core._module import Kernel, ObjectCode
 from cuda.core._program import Program, ProgramOptions
 from cuda.core._utils.cuda_utils import CUDAError, handle_return
-from cuda.core.typing import CompilerBackend, PCHStatus
+from cuda.core.typing import CompilerBackendType, PCHStatusType
 
 pytest_plugins = ("cuda_python_test_helpers.nvvm_bitcode",)
 
@@ -242,7 +242,7 @@ def test_cpp_program_with_various_options(init_cuda, options):
     code = 'extern "C" __global__ void my_kernel() {}'
     program = Program(code, "c++", options)
     assert program.backend == "NVRTC"
-    assert isinstance(program.backend, CompilerBackend)
+    assert isinstance(program.backend, CompilerBackendType)
     program.compile("ptx")
     program.close()
 
@@ -283,7 +283,7 @@ def test_cpp_program_pch_auto_creates(init_cuda, tmp_path):
     assert program.pch_status is None  # not compiled yet
     program.compile("ptx")
     assert program.pch_status in ("created", "not_attempted", "failed")
-    assert isinstance(program.pch_status, PCHStatus)
+    assert isinstance(program.pch_status, PCHStatusType)
     program.close()
 
 
@@ -684,7 +684,7 @@ def test_cpp_program_with_extra_sources():
 def test_program_options_as_bytes_nvrtc():
     """Test ProgramOptions.as_bytes() for NVRTC backend"""
     options = ProgramOptions(arch="sm_80", debug=True, lineinfo=True, ftz=True)
-    nvrtc_options = options.as_bytes(CompilerBackend.NVRTC)
+    nvrtc_options = options.as_bytes(CompilerBackendType.NVRTC)
     assert isinstance(nvrtc_options, list)
     assert all(isinstance(opt, bytes) for opt in nvrtc_options)
     options_str = [opt.decode() for opt in nvrtc_options]
diff --git a/cuda_core/tests/test_typing_imports.py b/cuda_core/tests/test_typing_imports.py
index c05e3ae3b37..a7edd2d5069 100644
--- a/cuda_core/tests/test_typing_imports.py
+++ b/cuda_core/tests/test_typing_imports.py
@@ -8,22 +8,22 @@
 def test_typing_module_imports():
     """All type aliases and protocols are importable from cuda.core.typing."""
     from cuda.core.typing import (
-        DevicePointerT,
-        IsStreamT,
+        DevicePointerType,
+        IsStreamType,
     )
 
-    assert DevicePointerT is not None
-    assert IsStreamT is not None
+    assert DevicePointerType is not None
+    assert IsStreamType is not None
 
 
 def test_typing_matches_private_definitions():
     """cuda.core.typing re-exports match the original private definitions."""
-    from cuda.core._memory._buffer import DevicePointerT as _DevicePointerT
-    from cuda.core._stream import IsStreamT as _IsStreamT
+    from cuda.core._memory._buffer import DevicePointerType as _DevicePointerT
+    from cuda.core._stream import IsStreamType as _IsStreamT
     from cuda.core.typing import (
-        DevicePointerT,
-        IsStreamT,
+        DevicePointerType,
+        IsStreamType,
     )
 
-    assert DevicePointerT is _DevicePointerT
-    assert IsStreamT is _IsStreamT
+    assert DevicePointerType is _DevicePointerT
+    assert IsStreamType is _IsStreamT

From 9d50fec8b0879c11f94a04ad7606671d5a3fe8e8 Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdroettboom@nvidia.com>
Date: Tue, 5 May 2026 12:20:41 -0400
Subject: [PATCH 3/5] Add release notes

---
 cuda_core/docs/source/release/1.0.0-notes.rst | 23 +++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/cuda_core/docs/source/release/1.0.0-notes.rst b/cuda_core/docs/source/release/1.0.0-notes.rst
index 3f61a30ec1e..360d95e36f2 100644
--- a/cuda_core/docs/source/release/1.0.0-notes.rst
+++ b/cuda_core/docs/source/release/1.0.0-notes.rst
@@ -113,6 +113,10 @@ Breaking changes
     ``CUgraphConditionalHandle`` value. Previously, ``.handle`` had to be
     extracted explicitly.
 
+- Consistent naming of types annotation helpers
+  (`#2016 <https://github.com/NVIDIA/cuda-python/issues/2016>`__):
+  - :obj:`cuda.core.typing.DevicePointerT` -> :obj:`cuda.core.typing.DevicePointerType`
+  - :obj:`cuda.core.typing.IsStreamT` -> :obj:`cuda.core.typing.IsStreamType`
 
 Fixes and enhancements
 -----------------------
@@ -128,3 +132,22 @@ Fixes and enhancements
   stream and the consumer stream, matching the DLPack synchronization contract.
   Requires PyTorch >= 2.3.
   (`#749 <https://github.com/NVIDIA/cuda-python/issues/749>`__)
+
+- Enums are not available in places where a small number of string values are
+  accepted or returned.  You may continue to use the string values, or use
+  enumerations for better linting and type-checking.
+  (`#2016 <https://github.com/NVIDIA/cuda-python/issues/2016>`__)
+  The new enums are:
+
+  - :class:`cuda.core.typing.CompilerBackendType`
+  - :class:`cuda.core.typing.GraphConditionalType`
+  - :class:`cuda.core.typing.GraphMemoryType`
+  - :class:`cuda.core.typing.ManagedMemoryLocationType`
+  - :class:`cuda.core.typing.ObjectCodeFormatType`
+  - :class:`cuda.core.typing.PCHStatusType`
+  - :class:`cuda.core.typing.SourceCodeType`
+  - :class:`cuda.core.typing.VirtualMemoryAccessType`
+  - :class:`cuda.core.typing.VirtualMemoryAllocationType`
+  - :class:`cuda.core.typing.VirtualMemoryGranularityType`
+  - :class:`cuda.core.typing.VirtualMemoryHandleType`
+  - :class:`cuda.core.typing.VirtualMemoryLocationType`

From 07b2bdf33727186b76706c845d90329ee3591a1d Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdroettboom@nvidia.com>
Date: Tue, 5 May 2026 13:53:53 -0400
Subject: [PATCH 4/5] Fix missing enum test

---
 cuda_core/tests/test_enum_coverage.py | 38 ++++++++++-----------------
 1 file changed, 14 insertions(+), 24 deletions(-)

diff --git a/cuda_core/tests/test_enum_coverage.py b/cuda_core/tests/test_enum_coverage.py
index 024f71378f4..a02ca8f15f9 100644
--- a/cuda_core/tests/test_enum_coverage.py
+++ b/cuda_core/tests/test_enum_coverage.py
@@ -7,9 +7,7 @@
 # mapping dicts at import time, so it runs on any CI host that has a
 # compatible cuda.bindings version.
 
-import importlib
 import inspect
-import pkgutil
 import sys
 from typing import Any
 
@@ -25,6 +23,8 @@
 else:
     from backports.strenum import StrEnum
 
+_MODULES = [cuda.core.typing]
+
 # Each entry is:
 #   (cuda_binding_enum, str_enum, mapping_dict, binding_unmapped, str_enum_unmapped)
 #
@@ -320,7 +320,11 @@ def test_wrapper_covers_all_binding_members(binding, str_enum, mapping, binding_
 
 
 @pytest.mark.skipif(sys.version_info < (3, 11), reason="Requires Python 3.11+ for StrEnum")
-def test_all_str_enums_in_cases():
+@pytest.mark.parametrize(
+    "module",
+    _MODULES,
+)
+def test_all_str_enums_in_cases(module):
     """Every StrEnum subclass in cuda.core must appear in _CASES or _UNBOUND_STR_ENUMS.
 
     This ensures that when a new StrEnum wrapper is added to cuda.core, the
@@ -328,29 +332,15 @@ def test_all_str_enums_in_cases():
     declare it as unbound in _UNBOUND_STR_ENUMS).
     """
 
-    def discover_str_enums() -> set[type]:
-        """Walk all submodules of cuda.core and return every StrEnum subclass found."""
-        found: set[type] = set()
-        for _, modname, _ in pkgutil.walk_packages(
-            path=cuda.core.__path__,
-            prefix=cuda.core.__name__ + ".",
-            onerror=lambda _: None,
-        ):
-            try:
-                mod = importlib.import_module(modname)
-            except Exception:  # noqa
-                continue
-            try:
-                members = inspect.getmembers(mod, inspect.isclass)
-            except Exception:  # noqa
-                continue
-            for _, obj in members:
-                if obj is not StrEnum and issubclass(obj, StrEnum):
-                    found.add(obj)
-        return found
+    found = set()
+
+    members = inspect.getmembers(module, inspect.isclass)
+    for _, obj in members:
+        if obj is not StrEnum and issubclass(obj, StrEnum):
+            found.add(obj)
 
     covered = {x[1] for x in _CASES if x[1] is not None}
-    uncovered = discover_str_enums() - covered - _UNBOUND_STR_ENUMS
+    uncovered = found - covered - _UNBOUND_STR_ENUMS
     uncovered_names = sorted({c.__qualname__ for c in uncovered})
     assert not uncovered, (
         f"StrEnum subclasses in cuda.core not covered by _CASES: "

From 73c458902deb159690427a0916cff40a48ac6b68 Mon Sep 17 00:00:00 2001
From: Michael Droettboom <mdroettboom@nvidia.com>
Date: Tue, 5 May 2026 15:22:50 -0400
Subject: [PATCH 5/5] Fix docstring

---
 cuda_core/cuda/core/typing.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/cuda_core/cuda/core/typing.py b/cuda_core/cuda/core/typing.py
index 36a301d4766..54418f7d84b 100644
--- a/cuda_core/cuda/core/typing.py
+++ b/cuda_core/cuda/core/typing.py
@@ -32,11 +32,9 @@
 ]
 
 
+# A type union of :obj:`~driver.CUdeviceptr`, `int` and `None` for hinting
+# :attr:`Buffer.handle`.
 DevicePointerType = driver.CUdeviceptr | int | None
-DevicePointerType.__doc__ = """
-A type union of :obj:`~driver.CUdeviceptr`, `int` and `None` for hinting
-:attr:`Buffer.handle`.
-"""
 
 
 ProcessStateType = _Literal["running", "locked", "checkpointed", "failed"]