From 4d1ddb4bff40518750fccbc72864ab7dfbab7603 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Tue, 5 May 2026 10:39:37 -0400 Subject: [PATCH 1/5] Fix #1995: Use StrEnum for enum-like strings --- cuda_core/cuda/core/_linker.pyx | 13 +- .../core/_memory/_managed_memory_resource.pyx | 12 +- .../core/_memory/_virtual_memory_resource.py | 70 ++++----- cuda_core/cuda/core/_module.pyx | 19 +-- cuda_core/cuda/core/_program.pyx | 68 +++++---- .../cuda/core/graph/_graph_definition.pyx | 6 +- cuda_core/cuda/core/graph/_graph_node.pyx | 2 +- cuda_core/cuda/core/graph/_subclasses.pyx | 15 +- cuda_core/cuda/core/system/_device.pyx | 1 + cuda_core/cuda/core/system/_temperature.pxi | 1 + cuda_core/cuda/core/typing.py | 93 +++++++++++- cuda_core/docs/source/api_private.rst | 21 ++- .../tests/graph/test_graph_definition.py | 11 +- cuda_core/tests/test_enum_coverage.py | 134 +++++++++++++++--- cuda_core/tests/test_launcher.py | 5 +- cuda_core/tests/test_memory.py | 30 ++-- cuda_core/tests/test_program.py | 5 +- 17 files changed, 357 insertions(+), 149 deletions(-) diff --git a/cuda_core/cuda/core/_linker.pyx b/cuda_core/cuda/core/_linker.pyx index e89e780b34c..1a28c6ed3aa 100644 --- a/cuda_core/cuda/core/_linker.pyx +++ b/cuda_core/cuda/core/_linker.pyx @@ -39,6 +39,7 @@ from cuda.core._utils.cuda_utils import ( driver, is_sequence, ) +from cuda.core.typing import CompilerBackend ctypedef const char* const_char_ptr ctypedef void* void_ptr @@ -70,12 +71,12 @@ cdef class Linker: def __init__(self, *object_codes: ObjectCode, options: "LinkerOptions" = None): Linker_init(self, object_codes, options) - def link(self, target_type) -> ObjectCode: + def link(self, target_type: ObjectCodeFormat | str) -> ObjectCode: """Link the provided object codes into a single output of the specified target type. Parameters ---------- - target_type : str + target_type : ObjectCodeFormat | str The type of the target output. Must be either "cubin" or "ptx". Returns @@ -88,7 +89,7 @@ cdef class Linker: Ensure that input object codes were compiled with appropriate flags for linking (e.g., relocatable device code enabled). """ - return Linker_link(self, target_type) + return Linker_link(self, str(target_type)) def get_error_log(self) -> str: """Get the error log generated by the linker. @@ -168,9 +169,9 @@ cdef class Linker: return as_py(self._culink_handle) @property - def backend(self) -> str: - """Return this Linker instance's underlying backend.""" - return "nvJitLink" if self._use_nvjitlink else "driver" + def backend(self) -> CompilerBackend: + """Return this Linker instance's underlying :class:`CompilerBackend`.""" + return CompilerBackend.NVJITLINK if self._use_nvjitlink else CompilerBackend.DRIVER # ============================================================================= diff --git a/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx b/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx index 205d3c77545..f37a4f18ee1 100644 --- a/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx +++ b/cuda_core/cuda/core/_memory/_managed_memory_resource.pyx @@ -16,6 +16,8 @@ from dataclasses import dataclass import threading import warnings +from cuda.core.typing import ManagedMemoryLocationType + __all__ = ['ManagedMemoryResource', 'ManagedMemoryResourceOptions'] @@ -30,7 +32,7 @@ cdef class ManagedMemoryResourceOptions: meaning depends on ``preferred_location_type``. (Default to ``None``) - preferred_location_type : ``"device"`` | ``"host"`` | ``"host_numa"`` | None, optional + preferred_location_type : ManagedMemoryLocationType | str | None, optional Controls how ``preferred_location`` is interpreted. When set to ``None`` (the default), legacy behavior is used: @@ -54,7 +56,7 @@ cdef class ManagedMemoryResourceOptions: (Default to ``None``) """ preferred_location: int | None = None - preferred_location_type: str | None = None + preferred_location_type: ManagedMemoryLocationType | str | None = None cdef class ManagedMemoryResource(_MemPool): @@ -97,7 +99,7 @@ cdef class ManagedMemoryResource(_MemPool): return -1 @property - def preferred_location(self) -> tuple | None: + def preferred_location(self) -> tuple[ManagedMemoryLocationType, int | None] | None: """The preferred location for managed memory allocations. Returns ``None`` if no preferred location is set (driver decides), @@ -108,8 +110,8 @@ cdef class ManagedMemoryResource(_MemPool): if self._pref_loc_type is None: return None if self._pref_loc_type == "host": - return ("host", None) - return (self._pref_loc_type, self._pref_loc_id) + return (ManagedMemoryLocationType.HOST, None) + return (ManagedMemoryLocationType(self._pref_loc_type), self._pref_loc_id) @property def is_device_accessible(self) -> bool: diff --git a/cuda_core/cuda/core/_memory/_virtual_memory_resource.py b/cuda_core/cuda/core/_memory/_virtual_memory_resource.py index 7d952e102fd..a60436a4305 100644 --- a/cuda_core/cuda/core/_memory/_virtual_memory_resource.py +++ b/cuda_core/cuda/core/_memory/_virtual_memory_resource.py @@ -1,11 +1,11 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 from __future__ import annotations from dataclasses import dataclass, field -from typing import TYPE_CHECKING, Iterable, Literal +from typing import TYPE_CHECKING, Iterable if TYPE_CHECKING: from cuda.core._stream import Stream @@ -21,15 +21,16 @@ _check_driver_error as raise_if_driver_error, ) from cuda.core._utils.version import binding_version +from cuda.core.typing import ( + VirtualMemoryAccessType, + VirtualMemoryAllocationType, + VirtualMemoryGranularityType, + VirtualMemoryHandleType, + VirtualMemoryLocationType, +) __all__ = ["VirtualMemoryResource", "VirtualMemoryResourceOptions"] -VirtualMemoryHandleTypeT = Literal["posix_fd", "generic", "win32_kmt", "fabric"] | None -VirtualMemoryLocationTypeT = Literal["device", "host", "host_numa", "host_numa_current"] -VirtualMemoryGranularityT = Literal["minimum", "recommended"] -VirtualMemoryAccessTypeT = Literal["rw", "r"] | None -VirtualMemoryAllocationTypeT = Literal["pinned", "managed"] - @dataclass class VirtualMemoryResourceOptions: @@ -38,18 +39,18 @@ class VirtualMemoryResourceOptions: Attributes ---------- - allocation_type: :obj:`~_memory.VirtualMemoryAllocationTypeT` + allocation_type: :obj:`~_memory.VirtualMemoryAllocationType` | str Controls the type of allocation. - location_type: :obj:`~_memory.VirtualMemoryLocationTypeT` + location_type: :obj:`~_memory.VirtualMemoryLocationType` | str Controls the location of the allocation. - handle_type: :obj:`~_memory.VirtualMemoryHandleTypeT` + handle_type: :obj:`~_memory.VirtualMemoryHandleType` | str Export handle type for the physical allocation. Use ``"posix_fd"`` on Linux if you plan to import/export the allocation (required for cuMemRetainAllocationHandle). Use `None` if you don't need an exportable handle. gpu_direct_rdma: bool Hint that the allocation should be GDR-capable (if supported). - granularity: :obj:`~_memory.VirtualMemoryGranularityT` + granularity: :obj:`~_memory.VirtualMemoryGranularityType` | str Controls granularity query and size rounding. addr_hint: int A (optional) virtual address hint to try to reserve at. Setting it to 0 lets the CUDA driver decide. @@ -57,50 +58,53 @@ class VirtualMemoryResourceOptions: Alignment for the VA reservation. If `None`, use the queried granularity. peers: Iterable[int] Extra device IDs that should be granted access in addition to ``device``. - self_access: :obj:`~_memory.VirtualMemoryAccessTypeT` + self_access: :obj:`~_memory.VirtualMemoryAccessType` | None | str Access flags for the owning device. - peer_access: :obj:`~_memory.VirtualMemoryAccessTypeT` + peer_access: :obj:`~_memory.VirtualMemoryAccessType` | None | str Access flags for peers. """ - # Human-friendly strings; normalized in __post_init__ - allocation_type: VirtualMemoryAllocationTypeT = "pinned" - location_type: VirtualMemoryLocationTypeT = "device" - handle_type: VirtualMemoryHandleTypeT = "posix_fd" - granularity: VirtualMemoryGranularityT = "recommended" + allocation_type: VirtualMemoryAllocationType = VirtualMemoryAllocationType.PINNED + location_type: VirtualMemoryLocationType = VirtualMemoryLocationType.DEVICE + handle_type: VirtualMemoryHandleType = VirtualMemoryHandleType.POSIX_FD + granularity: VirtualMemoryGranularityType = VirtualMemoryGranularityType.RECOMMENDED gpu_direct_rdma: bool = False addr_hint: int | None = 0 addr_align: int | None = None peers: Iterable[int] = field(default_factory=tuple) - self_access: VirtualMemoryAccessTypeT = "rw" - peer_access: VirtualMemoryAccessTypeT = "rw" + self_access: VirtualMemoryAccessType = VirtualMemoryAccessType.READ_WRITE + peer_access: VirtualMemoryAccessType = VirtualMemoryAccessType.READ_WRITE _a = driver.CUmemAccess_flags - _access_flags = {"rw": _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE, "r": _a.CU_MEM_ACCESS_FLAGS_PROT_READ, None: 0} # noqa: RUF012 + _access_flags = { # noqa: RUF012 + VirtualMemoryAccessType.READ_WRITE: _a.CU_MEM_ACCESS_FLAGS_PROT_READWRITE, + VirtualMemoryAccessType.READ: _a.CU_MEM_ACCESS_FLAGS_PROT_READ, + None: 0, + } _h = driver.CUmemAllocationHandleType _handle_types = { # noqa: RUF012 None: _h.CU_MEM_HANDLE_TYPE_NONE, - "posix_fd": _h.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR, - "win32_kmt": _h.CU_MEM_HANDLE_TYPE_WIN32_KMT, - "fabric": _h.CU_MEM_HANDLE_TYPE_FABRIC, + VirtualMemoryHandleType.POSIX_FD: _h.CU_MEM_HANDLE_TYPE_POSIX_FILE_DESCRIPTOR, + VirtualMemoryHandleType.WIN32_KMT: _h.CU_MEM_HANDLE_TYPE_WIN32_KMT, + VirtualMemoryHandleType.FABRIC: _h.CU_MEM_HANDLE_TYPE_FABRIC, } _g = driver.CUmemAllocationGranularity_flags _granularity = { # noqa: RUF012 - "recommended": _g.CU_MEM_ALLOC_GRANULARITY_RECOMMENDED, - "minimum": _g.CU_MEM_ALLOC_GRANULARITY_MINIMUM, + VirtualMemoryGranularityType.RECOMMENDED: _g.CU_MEM_ALLOC_GRANULARITY_RECOMMENDED, + VirtualMemoryGranularityType.MINIMUM: _g.CU_MEM_ALLOC_GRANULARITY_MINIMUM, } _l = driver.CUmemLocationType _location_type = { # noqa: RUF012 - "device": _l.CU_MEM_LOCATION_TYPE_DEVICE, - "host": _l.CU_MEM_LOCATION_TYPE_HOST, - "host_numa": _l.CU_MEM_LOCATION_TYPE_HOST_NUMA, - "host_numa_current": _l.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT, + VirtualMemoryLocationType.DEVICE: _l.CU_MEM_LOCATION_TYPE_DEVICE, + VirtualMemoryLocationType.HOST: _l.CU_MEM_LOCATION_TYPE_HOST, + VirtualMemoryLocationType.HOST_NUMA: _l.CU_MEM_LOCATION_TYPE_HOST_NUMA, + VirtualMemoryLocationType.HOST_NUMA_CURRENT: _l.CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT, } _t = driver.CUmemAllocationType # CUDA 13+ exposes MANAGED in CUmemAllocationType; older 12.x does not - _allocation_type = {"pinned": _t.CU_MEM_ALLOCATION_TYPE_PINNED} # noqa: RUF012 + _allocation_type = {VirtualMemoryAllocationType.PINNED: _t.CU_MEM_ALLOCATION_TYPE_PINNED} # noqa: RUF012 if binding_version() >= (13, 0, 0): - _allocation_type["managed"] = _t.CU_MEM_ALLOCATION_TYPE_MANAGED + _allocation_type[VirtualMemoryAllocationType.MANAGED] = _t.CU_MEM_ALLOCATION_TYPE_MANAGED @staticmethod def _access_to_flags(spec: str): diff --git a/cuda_core/cuda/core/_module.pyx b/cuda_core/cuda/core/_module.pyx index d6c9481b82f..4156e4d7806 100644 --- a/cuda_core/cuda/core/_module.pyx +++ b/cuda_core/cuda/core/_module.pyx @@ -12,6 +12,7 @@ from cuda.core._device import Device from cuda.core._launch_config cimport LaunchConfig from cuda.core._launch_config import LaunchConfig from cuda.core._stream cimport Stream +from cuda.core._program import ObjectCodeFormat from cuda.core._resource_handles cimport ( LibraryHandle, KernelHandle, @@ -569,7 +570,7 @@ cdef class Kernel: CodeTypeT = bytes | bytearray | str -cdef tuple _supported_code_type = ("cubin", "ptx", "ltoir", "fatbin", "object", "library") +cdef tuple _supported_code_type = tuple(ObjectCodeFormat.__members__.values()) cdef class ObjectCode: """Represent a compiled program to be loaded onto the device. @@ -599,7 +600,7 @@ cdef class ObjectCode: # _h_library is assigned during _lazy_load_module self._h_library = LibraryHandle() # Empty handle - self._code_type = code_type + self._code_type = str(code_type) self._module = module self._sym_map = {} if symbol_mapping is None else symbol_mapping self._name = name if name else "" @@ -629,7 +630,7 @@ cdef class ObjectCode: should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, "cubin", name=name, symbol_mapping=symbol_mapping) + return ObjectCode._init(module, ObjectCodeFormat.CUBIN, name=name, symbol_mapping=symbol_mapping) @staticmethod def from_ptx(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode: @@ -647,7 +648,7 @@ cdef class ObjectCode: should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, "ptx", name=name, symbol_mapping=symbol_mapping) + return ObjectCode._init(module, ObjectCodeFormat.PTX, name=name, symbol_mapping=symbol_mapping) @staticmethod def from_ltoir(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode: @@ -665,7 +666,7 @@ cdef class ObjectCode: should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, "ltoir", name=name, symbol_mapping=symbol_mapping) + return ObjectCode._init(module, ObjectCodeFormat.LTOIR, name=name, symbol_mapping=symbol_mapping) @staticmethod def from_fatbin(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode: @@ -683,7 +684,7 @@ cdef class ObjectCode: should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, "fatbin", name=name, symbol_mapping=symbol_mapping) + return ObjectCode._init(module, ObjectCodeFormat.FATBIN, name=name, symbol_mapping=symbol_mapping) @staticmethod def from_object(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode: @@ -701,7 +702,7 @@ cdef class ObjectCode: should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, "object", name=name, symbol_mapping=symbol_mapping) + return ObjectCode._init(module, ObjectCodeFormat.OBJECT, name=name, symbol_mapping=symbol_mapping) @staticmethod def from_library(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode: @@ -719,7 +720,7 @@ cdef class ObjectCode: should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, "library", name=name, symbol_mapping=symbol_mapping) + return ObjectCode._init(module, ObjectCodeFormat.LIBRARY, name=name, symbol_mapping=symbol_mapping) # TODO: do we want to unload in a finalizer? Probably not.. @@ -758,7 +759,7 @@ cdef class ObjectCode: """ self._lazy_load_module() - supported_code_types = ("cubin", "ptx", "fatbin") + supported_code_types = (ObjectCodeFormat.CUBIN, ObjectCodeFormat.PTX, ObjectCodeFormat.FATBIN) if self._code_type not in supported_code_types: raise RuntimeError(f'Unsupported code type "{self._code_type}" ({supported_code_types=})') try: diff --git a/cuda_core/cuda/core/_program.pyx b/cuda_core/cuda/core/_program.pyx index cfc66451c86..f7040919d11 100644 --- a/cuda_core/cuda/core/_program.pyx +++ b/cuda_core/cuda/core/_program.pyx @@ -39,6 +39,7 @@ from cuda.core._utils.cuda_utils import ( is_sequence, ) from cuda.core._utils.version import binding_version, driver_version +from cuda.core.typing import ObjectCodeFormat, CompilerBackend, PCHStatus, SourceType __all__ = ["Program", "ProgramOptions"] @@ -67,14 +68,13 @@ cdef class Program: code : str | bytes | bytearray The source code to compile. For C++ and PTX, must be a string. For NVVM IR, can be str, bytes, or bytearray. - code_type : str + code_type : SourceType | str The type of source code. Must be one of ``"c++"``, ``"ptx"``, or ``"nvvm"``. options : :class:`ProgramOptions`, optional Options to customize the compilation process. """ - - def __init__(self, code: str | bytes | bytearray, code_type: str, options: ProgramOptions | None = None): - Program_init(self, code, code_type, options) + def __init__(self, code: str | bytes | bytearray, code_type: SourceType | str, options: ProgramOptions | None = None): + Program_init(self, code, str(code_type), options) def close(self): """Destroy this program.""" @@ -85,13 +85,13 @@ cdef class Program: self._h_nvvm.reset() def compile( - self, target_type: str, name_expressions: tuple | list = (), logs = None + self, target_type: ObjectCodeFormat | str, name_expressions: tuple | list = (), logs = None ) -> ObjectCode: """Compile the program to the specified target type. Parameters ---------- - target_type : str + target_type : ObjectCodeFormat | str The compilation target. Must be one of ``"ptx"``, ``"cubin"``, or ``"ltoir"``. name_expressions : tuple | list, optional Sequence of name expressions to make accessible in the compiled code. @@ -104,10 +104,10 @@ cdef class Program: :class:`~cuda.core.ObjectCode` The compiled object code. """ - return Program_compile(self, target_type, name_expressions, logs) + return Program_compile(self, str(target_type), name_expressions, logs) @property - def pch_status(self) -> str | None: + def pch_status(self) -> PCHStatus | None: """PCH creation outcome from the most recent :meth:`compile` call. Possible values: @@ -130,12 +130,14 @@ cdef class Program: use the NVRTC backend. For PTX and NVVM programs this property always returns ``None``. """ - return self._pch_status + if self._pch_status is None: + return None + return PCHStatus(self._pch_status) @property - def backend(self) -> str: - """Return this Program instance's underlying backend.""" - return self._backend + def backend(self) -> CompilerBackend: + """Return this Program instance's underlying :class:`CompilerBackend`.""" + return CompilerBackend(self._backend) @property def handle(self) -> ProgramHandleT: @@ -435,7 +437,7 @@ class ProgramOptions: def _prepare_nvvm_options(self, as_bytes: bool = True) -> list[bytes] | list[str]: return _prepare_nvvm_options_impl(self, as_bytes) - def as_bytes(self, backend: str, target_type: str | None = None) -> list[bytes]: + def as_bytes(self, backend: CompilerBackend | str, target_type: ObjectCodeFormat | str | None = None) -> list[bytes]: """Convert program options to bytes format for the specified backend. This method transforms the program options into a format suitable for the @@ -444,9 +446,9 @@ class ProgramOptions: Parameters ---------- - backend : str + backend : CompilerBackend | str The compiler backend to prepare options for. Must be either "nvrtc" or "nvvm". - target_type : str, optional + target_type : ObjectCodeFormat | str, optional The compilation target type (e.g., "ptx", "cubin", "ltoir"). Some backends require additional options based on the target type. @@ -467,7 +469,7 @@ class ProgramOptions: >>> options = ProgramOptions(arch="sm_80", debug=True) >>> nvrtc_options = options.as_bytes("nvrtc") """ - backend = backend.lower() + backend = str(backend).lower() if backend == "nvrtc": return self._prepare_nvrtc_options() elif backend == "nvvm": @@ -639,7 +641,7 @@ cdef inline int Program_init(Program self, object code, str code_type, object op &nvrtc_prog, code_ptr, name_ptr, 0, NULL, NULL)) self._h_nvrtc = create_nvrtc_program_handle(nvrtc_prog) self._nvrtc_code = code_bytes - self._backend = "NVRTC" + self._backend = str(CompilerBackend.NVRTC) self._linker = None elif code_type == "ptx": @@ -649,7 +651,7 @@ cdef inline int Program_init(Program self, object code, str code_type, object op self._linker = Linker( ObjectCode._init(code.encode(), code_type), options=_translate_program_options(options) ) - self._backend = self._linker.backend + self._backend = str(self._linker.backend) elif code_type == "nvvm": _get_nvvm_module() # Validate NVVM availability @@ -683,12 +685,11 @@ cdef inline int Program_init(Program self, object code, str code_type, object op if options.use_libdevice: self._use_libdevice = True - self._backend = "NVVM" + self._backend = str(CompilerBackend.NVVM) self._linker = None else: - supported_code_types = ("c++", "ptx", "nvvm") - assert code_type not in supported_code_types, f"{code_type=}" + supported_code_types = tuple(x.value for x in SourceType) if options.use_libdevice: raise ValueError("use_libdevice is only supported by the NVVM backend") raise RuntimeError(f"Unsupported {code_type=} ({supported_code_types=})") @@ -780,23 +781,18 @@ cdef bint _has_nvrtc_pch_apis(): return _nvrtc_pch_apis_cached -cdef str _PCH_STATUS_CREATED = "created" -cdef str _PCH_STATUS_NOT_ATTEMPTED = "not_attempted" -cdef str _PCH_STATUS_FAILED = "failed" - - -cdef str _read_pch_status(cynvrtc.nvrtcProgram prog): +cdef object _read_pch_status(cynvrtc.nvrtcProgram prog): """Query nvrtcGetPCHCreateStatus and translate to a high-level string.""" cdef cynvrtc.nvrtcResult err with nogil: err = cynvrtc.nvrtcGetPCHCreateStatus(prog) if err == cynvrtc.nvrtcResult.NVRTC_SUCCESS: - return _PCH_STATUS_CREATED + return PCHStatus.CREATED if err == cynvrtc.nvrtcResult.NVRTC_ERROR_PCH_CREATE_HEAP_EXHAUSTED: return None # sentinel: caller should auto-retry if err == cynvrtc.nvrtcResult.NVRTC_ERROR_NO_PCH_CREATE_ATTEMPTED: - return _PCH_STATUS_NOT_ATTEMPTED - return _PCH_STATUS_FAILED + return PCHStatus.NOT_ATTEMPTED + return PCHStatus.FAILED cdef object Program_compile_nvrtc(Program self, str target_type, object name_expressions, object logs): @@ -822,7 +818,7 @@ cdef object Program_compile_nvrtc(Program self, str target_type, object name_exp ) from e if status is not None: - self._pch_status = status + self._pch_status = str(status) return result # Heap exhausted — auto-resize and retry with a fresh program @@ -844,7 +840,7 @@ cdef object Program_compile_nvrtc(Program self, str target_type, object name_exp ) status = _read_pch_status(retry_prog) - self._pch_status = status if status is not None else _PCH_STATUS_FAILED + self._pch_status = status if status is not None else str(PCHStatus.FAILED) return result @@ -904,10 +900,10 @@ cdef object Program_compile_nvvm(Program self, str target_type, object logs): # Supported target types per backend cdef dict SUPPORTED_TARGETS = { - "NVRTC": ("ptx", "cubin", "ltoir"), - "NVVM": ("ptx", "ltoir"), - "nvJitLink": ("cubin", "ptx"), - "driver": ("cubin", "ptx"), + CompilerBackend.NVRTC: (ObjectCodeFormat.PTX, ObjectCodeFormat.CUBIN, ObjectCodeFormat.LTOIR), + CompilerBackend.NVVM: (ObjectCodeFormat.PTX, ObjectCodeFormat.LTOIR), + CompilerBackend.NVJITLINK: (ObjectCodeFormat.CUBIN, ObjectCodeFormat.PTX), + CompilerBackend.DRIVER: (ObjectCodeFormat.CUBIN, ObjectCodeFormat.PTX), } diff --git a/cuda_core/cuda/core/graph/_graph_definition.pyx b/cuda_core/cuda/core/graph/_graph_definition.pyx index 9a08232c556..5e4fa60d055 100644 --- a/cuda_core/cuda/core/graph/_graph_definition.pyx +++ b/cuda_core/cuda/core/graph/_graph_definition.pyx @@ -27,6 +27,8 @@ from dataclasses import dataclass from cuda.core._utils.cuda_utils import driver +from cuda.core.typing import GraphMemoryType + __all__ = ['GraphCondition', 'GraphAllocOptions', 'GraphDefinition'] @@ -78,7 +80,7 @@ class GraphAllocOptions: device : int or Device, optional The device on which to allocate memory. If None (default), uses the current CUDA context's device. - memory_type : str, optional + memory_type : GraphMemoryType | str, optional Type of memory to allocate. One of: - ``"device"`` (default): Pinned device memory, optimal for GPU kernels. @@ -101,7 +103,7 @@ class GraphAllocOptions: """ device: int | "Device" | None = None - memory_type: str = "device" + memory_type: GraphMemoryType = GraphMemoryType.DEVICE peer_access: list | None = None diff --git a/cuda_core/cuda/core/graph/_graph_node.pyx b/cuda_core/cuda/core/graph/_graph_node.pyx index 36401776600..c9d1786caa6 100644 --- a/cuda_core/cuda/core/graph/_graph_node.pyx +++ b/cuda_core/cuda/core/graph/_graph_node.pyx @@ -689,7 +689,7 @@ cdef inline AllocNode GN_alloc(GraphNode self, size_t size, object options): cdef str memory_type = "device" if options is not None and options.memory_type is not None: - memory_type = options.memory_type + memory_type = str(options.memory_type) c_memset(&alloc_params, 0, sizeof(alloc_params)) alloc_params.poolProps.handleTypes = cydriver.CUmemAllocationHandleType.CU_MEM_HANDLE_TYPE_NONE diff --git a/cuda_core/cuda/core/graph/_subclasses.pyx b/cuda_core/cuda/core/graph/_subclasses.pyx index 25b648bacef..35009218479 100644 --- a/cuda_core/cuda/core/graph/_subclasses.pyx +++ b/cuda_core/cuda/core/graph/_subclasses.pyx @@ -33,6 +33,7 @@ from cuda.core._utils.cuda_utils cimport HANDLE_RETURN from cuda.core.graph._utils cimport _is_py_host_trampoline from cuda.core._utils.cuda_utils import driver, handle_return +from cuda.core.typing import GraphConditionalType __all__ = [ 'AllocNode', @@ -169,8 +170,8 @@ cdef class AllocNode(GraphNode): The number of bytes allocated. device_id : int The device on which the allocation was made. - memory_type : str - The type of memory allocated (``"device"``, ``"host"``, or ``"managed"``). + memory_type : GraphMemoryType | str + The type of memory allocated. peer_access : tuple of int Device IDs that have read-write access to this allocation. options : GraphAllocOptions @@ -698,8 +699,8 @@ cdef class ConditionalNode(GraphNode): return self._condition @property - def cond_type(self) -> str | None: - """The conditional type as a string: 'if', 'while', or 'switch'. + def cond_type(self) -> GraphConditionalType | None: + """The conditional type: GraphConditionalType.IF, .WHILE, or .SWITCH Returns None when reconstructed from the driver pre-CUDA 13.2, as the conditional type cannot be determined. @@ -707,11 +708,11 @@ cdef class ConditionalNode(GraphNode): if self._condition is None: return None if self._cond_type == cydriver.CU_GRAPH_COND_TYPE_IF: - return "if" + return GraphConditionalType("if") elif self._cond_type == cydriver.CU_GRAPH_COND_TYPE_WHILE: - return "while" + return GraphConditionalType("while") else: - return "switch" + return GraphConditionalType("switch") @property def branches(self) -> tuple: diff --git a/cuda_core/cuda/core/system/_device.pyx b/cuda_core/cuda/core/system/_device.pyx index f3cc62fe546..9fbe001d0dd 100644 --- a/cuda_core/cuda/core/system/_device.pyx +++ b/cuda_core/cuda/core/system/_device.pyx @@ -170,6 +170,7 @@ _GPU_P2P_CAPS_INDEX_MAPPING = { GpuP2PCapsIndex.ATOMICS: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_ATOMICS, GpuP2PCapsIndex.PCI: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_PCI, GpuP2PCapsIndex.PROP: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_PROP, + GpuP2PCapsIndex.UNKNOWN: nvml.GpuP2PCapsIndex.P2P_CAPS_INDEX_UNKNOWN, } diff --git a/cuda_core/cuda/core/system/_temperature.pxi b/cuda_core/cuda/core/system/_temperature.pxi index b322df4a591..d890ba0d128 100644 --- a/cuda_core/cuda/core/system/_temperature.pxi +++ b/cuda_core/cuda/core/system/_temperature.pxi @@ -71,6 +71,7 @@ _THERMAL_CONTROLLER_MAPPING = { nvml.ThermalController.NVSYSCON_E551: ThermalController.NVSYSCON_E551, nvml.ThermalController.MAX6649R: ThermalController.MAX6649R, nvml.ThermalController.ADT7473S: ThermalController.ADT7473S, + nvml.ThermalController.UNKNOWN: ThermalController.UNKNOWN, } diff --git a/cuda_core/cuda/core/typing.py b/cuda_core/cuda/core/typing.py index a66ab1881fb..33405657b24 100644 --- a/cuda_core/cuda/core/typing.py +++ b/cuda_core/cuda/core/typing.py @@ -2,12 +2,103 @@ # # SPDX-License-Identifier: Apache-2.0 -"""Public type aliases and protocols used in cuda.core API signatures.""" +"""Public type aliases, protocols, and enumerations used in cuda.core API signatures.""" + +try: + from enum import StrEnum +except ImportError: + from backports.strenum import StrEnum from cuda.core._memory._buffer import DevicePointerT from cuda.core._stream import IsStreamT __all__ = [ + "CompilerBackend", "DevicePointerT", + "GraphConditionalType", + "GraphMemoryType", "IsStreamT", + "ManagedMemoryLocationType", + "ObjectCodeFormat", + "PCHStatus", + "SourceType", + "VirtualMemoryAccessType", + "VirtualMemoryAllocationType", + "VirtualMemoryGranularityType", + "VirtualMemoryHandleType", + "VirtualMemoryLocationType", ] + + +class SourceType(StrEnum): + CXX = "c++" + PTX = "ptx" + NVVM = "nvvm" + + +class ObjectCodeFormat(StrEnum): + PTX = "ptx" + CUBIN = "cubin" + LTOIR = "ltoir" + FATBIN = "fatbin" + OBJECT = "object" + LIBRARY = "library" + + +class CompilerBackend(StrEnum): + NVRTC = "NVRTC" + NVVM = "NVVM" + NVJITLINK = "nvJitLink" + DRIVER = "driver" + + +class PCHStatus(StrEnum): + CREATED = "created" + NOT_ATTEMPTED = "not_attempted" + FAILED = "failed" + + +class GraphConditionalType(StrEnum): + IF = "if" + WHILE = "while" + SWITCH = "switch" + + +class GraphMemoryType(StrEnum): + DEVICE = "device" + HOST = "host" + MANAGED = "managed" + + +class ManagedMemoryLocationType(StrEnum): + DEVICE = "device" + HOST = "host" + HOST_NUMA = "host_numa" + + +class VirtualMemoryHandleType(StrEnum): + POSIX_FD = "posix_fd" + WIN32_KMT = "win32_kmt" + FABRIC = "fabric" + + +class VirtualMemoryLocationType(StrEnum): + DEVICE = "device" + HOST = "host" + HOST_NUMA = "host_numa" + HOST_NUMA_CURRENT = "host_numa_current" + + +class VirtualMemoryGranularityType(StrEnum): + MINIMUM = "minimum" + RECOMMENDED = "recommended" + + +class VirtualMemoryAccessType(StrEnum): + READ_WRITE = "rw" + READ = "r" + + +class VirtualMemoryAllocationType(StrEnum): + PINNED = "pinned" + MANAGED = "managed" diff --git a/cuda_core/docs/source/api_private.rst b/cuda_core/docs/source/api_private.rst index a3bb4f1395c..725b211798e 100644 --- a/cuda_core/docs/source/api_private.rst +++ b/cuda_core/docs/source/api_private.rst @@ -16,16 +16,23 @@ CUDA runtime .. autosummary:: :toctree: generated/ - typing.DevicePointerT - _memory._virtual_memory_resource.VirtualMemoryAllocationTypeT - _memory._virtual_memory_resource.VirtualMemoryLocationTypeT - _memory._virtual_memory_resource.VirtualMemoryGranularityT - _memory._virtual_memory_resource.VirtualMemoryAccessTypeT - _memory._virtual_memory_resource.VirtualMemoryHandleTypeT _module.KernelAttributes _module.KernelOccupancy - _module.ParamInfo _module.MaxPotentialBlockSizeOccupancyResult + _module.ParamInfo + typing.CompilerBackend + typing.DevicePointerT + typing.GraphConditionalType + typing.GraphMemoryType + typing.ManagedMemoryLocationType + typing.ObjectCodeFormat + typing.PCHStatus + typing.SourceType + typing.VirtualMemoryAccessType + typing.VirtualMemoryAllocationType + typing.VirtualMemoryGranularityType + typing.VirtualMemoryHandleType + typing.VirtualMemoryLocationType :template: autosummary/cyclass.rst diff --git a/cuda_core/tests/graph/test_graph_definition.py b/cuda_core/tests/graph/test_graph_definition.py index e85645e0305..a5d8ff1f34d 100644 --- a/cuda_core/tests/graph/test_graph_definition.py +++ b/cuda_core/tests/graph/test_graph_definition.py @@ -33,6 +33,7 @@ SwitchNode, WhileNode, ) +from cuda.core.typing import GraphConditionalType, GraphMemoryType ALLOC_SIZE = 1024 @@ -275,7 +276,7 @@ def _build_alloc_node(g): def _build_alloc_managed_node(g): _skip_if_no_managed_mempool() device_id = Device().device_id - options = GraphAllocOptions(memory_type="managed") + options = GraphAllocOptions(memory_type=GraphMemoryType.MANAGED) entry = g.allocate(ALLOC_SIZE) node = entry.allocate(ALLOC_SIZE, options) return node, { @@ -421,7 +422,7 @@ def _build_if_then_node(g): node = g.if_then(condition) return node, { "condition": condition, - "cond_type": "if", + "cond_type": lambda v: isinstance(v, GraphConditionalType) and v == "if", "branches": lambda v: isinstance(v, tuple) and len(v) == 1, "then": lambda v: isinstance(v, GraphDefinition), } @@ -432,7 +433,7 @@ def _build_if_else_node(g): node = g.if_else(condition) return node, { "condition": condition, - "cond_type": "if", + "cond_type": lambda v: isinstance(v, GraphConditionalType) and v == "if", "branches": lambda v: isinstance(v, tuple) and len(v) == 2, "then": lambda v: isinstance(v, GraphDefinition), "else_": lambda v: isinstance(v, GraphDefinition), @@ -444,7 +445,7 @@ def _build_while_loop_node(g): node = g.while_loop(condition) return node, { "condition": condition, - "cond_type": "while", + "cond_type": lambda v: isinstance(v, GraphConditionalType) and v == "while", "branches": lambda v: isinstance(v, tuple) and len(v) == 1, "body": lambda v: isinstance(v, GraphDefinition), } @@ -455,7 +456,7 @@ def _build_switch_node(g): node = g.switch(condition, 3) return node, { "condition": condition, - "cond_type": "switch", + "cond_type": lambda v: isinstance(v, GraphConditionalType) and v == "switch", "branches": lambda v: isinstance(v, tuple) and len(v) == 3, } diff --git a/cuda_core/tests/test_enum_coverage.py b/cuda_core/tests/test_enum_coverage.py index 9c70c9f6042..bf65048e853 100644 --- a/cuda_core/tests/test_enum_coverage.py +++ b/cuda_core/tests/test_enum_coverage.py @@ -11,10 +11,13 @@ import inspect import pkgutil import sys +from typing import Any import pytest import cuda.core +import cuda.core.typing +from cuda.bindings import driver from cuda.core import system if sys.version_info >= (3, 11): @@ -38,7 +41,71 @@ # in binding_unmapped appears as either a key or a value of the mapping dict, # and conversely that every str_enum member not in str_enum_unmapped also # appears. -_CASES = [] + +_CASES: list[tuple[Any, StrEnum, dict | None, set[str], set[str]]] = [ + ( + driver.CUgraphConditionalNodeType, + cuda.core.typing.GraphConditionalType, + None, + set(), + set(), + ), + ( + driver.CUmemLocationType, + cuda.core.typing.ManagedMemoryLocationType, + None, + # We have some explicitly unsupported memory location types + { + "CU_MEM_LOCATION_TYPE_NONE", + "CU_MEM_LOCATION_TYPE_HOST_NUMA_CURRENT", + "CU_MEM_LOCATION_TYPE_INVISIBLE", + "CU_MEM_LOCATION_TYPE_MAX", + "CU_MEM_LOCATION_TYPE_INVALID", + }, + set(), + ), + ( + driver.CUmemAccess_flags, + cuda.core.typing.VirtualMemoryAccessType, + cuda.core.VirtualMemoryResourceOptions._access_flags, + {"CU_MEM_ACCESS_FLAGS_PROT_NONE", "CU_MEM_ACCESS_FLAGS_PROT_MAX"}, + set(), + ), + ( + driver.CUmemAllocationType, + cuda.core.typing.VirtualMemoryAllocationType, + cuda.core.VirtualMemoryResourceOptions._allocation_type, + {"CU_MEM_ALLOCATION_TYPE_INVALID", "CU_MEM_ALLOCATION_TYPE_MAX"}, + set(), + ), + ( + driver.CUmemAllocationGranularity_flags, + cuda.core.typing.VirtualMemoryGranularityType, + cuda.core.VirtualMemoryResourceOptions._granularity, + set(), + set(), + ), + ( + driver.CUmemAllocationHandleType, + cuda.core.typing.VirtualMemoryHandleType, + cuda.core.VirtualMemoryResourceOptions._handle_types, + {"CU_MEM_HANDLE_TYPE_NONE", "CU_MEM_HANDLE_TYPE_WIN32", "CU_MEM_HANDLE_TYPE_MAX"}, + {"GENERIC"}, + ), + ( + driver.CUmemLocationType, + cuda.core.typing.VirtualMemoryLocationType, + None, + # We have some explicitly unsupported memory location types + { + "CU_MEM_LOCATION_TYPE_NONE", + "CU_MEM_LOCATION_TYPE_INVISIBLE", + "CU_MEM_LOCATION_TYPE_MAX", + "CU_MEM_LOCATION_TYPE_INVALID", + }, + set(), + ), +] if system.CUDA_BINDINGS_NVML_IS_COMPATIBLE: # Populated below only when NVML bindings are compatible, so that importing @@ -70,7 +137,7 @@ _device._GPU_P2P_STATUS_MAPPING, # Both the typo'd (SUPPORED) and corrected (SUPPORTED) spellings # share the same integer value; the mapping covers both via aliases - set(), + {"P2P_STATUS_CHIPSET_NOT_SUPPORED"}, set(), ), ( @@ -116,11 +183,8 @@ nvml.ThermalController, _device.ThermalController, _device._THERMAL_CONTROLLER_MAPPING, - # NONE and UNKNOWN are both handled by the .get() fallback that - # returns ThermalController.UNKNOWN when the value is not in the mapping - {"NONE", "UNKNOWN"}, - # UNKNOWN is the default returned by .get() for unrecognised controllers - {"UNKNOWN"}, + {"NONE"}, + {"NONE"}, ), ( nvml.ThermalTarget, @@ -156,11 +220,8 @@ nvml.GpuP2PCapsIndex, _device.GpuP2PCapsIndex, _device._GPU_P2P_CAPS_INDEX_MAPPING, - # UNKNOWN is returned by the driver when an index is unrecognised; - # it is not a capability the caller selects - {"P2P_CAPS_INDEX_UNKNOWN"}, - # UNKNOWN is a driver-side fallback, not a caller-selectable index - {"UNKNOWN"}, + set(), + set(), ), ( nvml.GpuTopologyLevel, @@ -208,7 +269,18 @@ # StrEnum subclasses that intentionally have no associated cuda_binding. # Add classes here (with a comment explaining why) when a new StrEnum is # introduced that wraps something other than a cuda_binding enum. -_UNBOUND_STR_ENUMS: frozenset[type] = frozenset() +_UNBOUND_STR_ENUMS: set[StrEnum] = { + cuda.core.typing.ObjectCodeFormat, + cuda.core.typing.CompilerBackend, + # This one enum coordinates values in two cuda_binding enums: + # CUmemAllocationType and CUmemLocationType + cuda.core.typing.GraphMemoryType, + # This should support all of the PCH-related values in nvrtcResult, but + # there is no easy way to check that since they are mixed in with other + # unrelated things + cuda.core.typing.PCHStatus, + cuda.core.typing.SourceType, +} @pytest.mark.parametrize( @@ -223,18 +295,33 @@ def test_wrapper_covers_all_binding_members(binding, str_enum, mapping, binding_ mapping (or be listed in the per-entry str_enum_unmapped set). """ required = set(binding.__members__) - binding_unmapped - # Compare by integer value so that enum aliases (two names, one integer) - # are treated as covered when the canonical member appears in the mapping. - covered_values = frozenset(int(m) for m in (*mapping.keys(), *mapping.values()) if isinstance(m, binding)) - missing = {name for name in required if int(binding.__members__[name]) not in covered_values} - assert not missing, f"{binding.__name__} has members not covered by the wrapper mapping: {missing}" + if mapping is not None: + # Compare by integer value so that enum aliases (two names, one integer) + # are treated as covered when the canonical member appears in the mapping. + covered_values = frozenset(int(m) for m in (*mapping.keys(), *mapping.values()) if isinstance(m, binding)) + missing = {name for name in required if int(binding.__members__[name]) not in covered_values} + assert not missing, f"{binding.__name__} has members not covered by the wrapper mapping: {missing}" # Reverse check: every StrEnum member must also appear in the mapping. if str_enum is not None: - required_str = set(str_enum.__members__) - str_enum_unmapped - covered_str = {m.name for m in (*mapping.keys(), *mapping.values()) if isinstance(m, str_enum)} - missing_str = required_str - covered_str - assert not missing_str, f"{str_enum.__name__} has members not covered by the wrapper mapping: {missing_str}" + if mapping is not None: + required_str = set(str_enum.__members__) - str_enum_unmapped + covered_str = {m.name for m in (*mapping.keys(), *mapping.values()) if isinstance(m, str_enum)} + missing_str = required_str - covered_str + assert not missing_str, f"{str_enum.__name__} has members not covered by the wrapper mapping: {missing_str}" + + # For checking a StrEnum against a cuda_binding enum directly, without a + # mapping, the best we can do is count them, since it's reasonable that + # they have been renamed for clarity. + required_count = len(required) + covered_str_enum = set(str_enum.__members__) - str_enum_unmapped + covered_count = len(covered_str_enum) + if required_count != covered_count: + raise AssertionError( + f"{str_enum.__name__} has {covered_count} members, but expected {required_count} based on {binding.__name__} " + "after accounting for unmapped members. This may indicate that some members are missing from the wrapper, " + "or that some wrapper members do not correspond to actual binding members." + ) def test_all_str_enums_in_cases(): @@ -268,8 +355,9 @@ def discover_str_enums() -> set[type]: covered = {x[1] for x in _CASES if x[1] is not None} uncovered = discover_str_enums() - covered - _UNBOUND_STR_ENUMS + uncovered_names = sorted({c.__qualname__ for c in uncovered}) assert not uncovered, ( f"StrEnum subclasses in cuda.core not covered by _CASES: " - f"{sorted(c.__qualname__ for c in uncovered)}\n" + f"{uncovered_names}\n" "Add a _CASES entry for each, or add to _UNBOUND_STR_ENUMS if it does not wrap a cuda_binding enum." ) diff --git a/cuda_core/tests/test_launcher.py b/cuda_core/tests/test_launcher.py index b3461f5a371..775089de907 100644 --- a/cuda_core/tests/test_launcher.py +++ b/cuda_core/tests/test_launcher.py @@ -26,6 +26,7 @@ ) from cuda.core._memory._legacy import _SynchronousMemoryResource from cuda.core._utils.cuda_utils import CUDAError +from cuda.core.typing import ObjectCodeFormat, SourceType def test_launch_config_init(init_cuda): @@ -126,8 +127,8 @@ def test_launch_config_native_conversion(init_cuda): def test_launch_invalid_values(init_cuda): code = 'extern "C" __global__ void my_kernel() {}' - program = Program(code, "c++") - mod = program.compile("cubin") + program = Program(code, SourceType.CXX) + mod = program.compile(ObjectCodeFormat.CUBIN) stream = Device().create_stream() ker = mod.get_kernel("my_kernel") diff --git a/cuda_core/tests/test_memory.py b/cuda_core/tests/test_memory.py index fb99895616d..3ae6960fc29 100644 --- a/cuda_core/tests/test_memory.py +++ b/cuda_core/tests/test_memory.py @@ -46,6 +46,14 @@ from cuda.core._dlpack import DLDeviceType from cuda.core._memory import IPCBufferDescriptor from cuda.core._utils.cuda_utils import CUDAError, handle_return +from cuda.core.typing import ( + ManagedMemoryLocationType, + VirtualMemoryAccessType, + VirtualMemoryAllocationType, + VirtualMemoryGranularityType, + VirtualMemoryHandleType, + VirtualMemoryLocationType, +) from cuda.core.utils import StridedMemoryView POOL_SIZE = 2097152 # 2MB size @@ -134,19 +142,19 @@ def is_host_accessible(self) -> bool: def test_package_contents(): expected = [ "Buffer", - "MemoryResource", "DeviceMemoryResource", "DeviceMemoryResourceOptions", "GraphMemoryResource", - "IPCBufferDescriptor", "IPCAllocationHandle", + "IPCBufferDescriptor", "LegacyPinnedMemoryResource", "ManagedMemoryResource", "ManagedMemoryResourceOptions", - "PinnedMemoryResourceOptions", + "MemoryResource", "PinnedMemoryResource", - "VirtualMemoryResourceOptions", + "PinnedMemoryResourceOptions", "VirtualMemoryResource", + "VirtualMemoryResourceOptions", ] d = {} exec("from cuda.core._memory import *", d) # noqa: S102 @@ -800,14 +808,14 @@ def test_vmm_allocator_policy_configuration(): # Test with custom VMM config custom_config = VirtualMemoryResourceOptions( - allocation_type="pinned", - location_type="device", - granularity="minimum", + allocation_type=VirtualMemoryAllocationType.PINNED, + location_type=VirtualMemoryLocationType.DEVICE, + granularity=VirtualMemoryGranularityType.MINIMUM, gpu_direct_rdma=True, - handle_type="posix_fd" if not IS_WINDOWS else "win32_kmt", + handle_type=VirtualMemoryHandleType.POSIX_FD if not IS_WINDOWS else VirtualMemoryHandleType.WIN32_KMT, peers=(), - self_access="rw", - peer_access="rw", + self_access=VirtualMemoryAccessType.READ_WRITE, + peer_access=VirtualMemoryAccessType.READ_WRITE, ) vmm_mr = VirtualMemoryResource(device, config=custom_config) @@ -1090,7 +1098,7 @@ def test_managed_memory_resource_preferred_location_device(init_cuda): # Explicit style opts = ManagedMemoryResourceOptions( preferred_location=device.device_id, - preferred_location_type="device", + preferred_location_type=ManagedMemoryLocationType.DEVICE, ) mr = create_managed_memory_resource_or_skip(opts) assert mr.preferred_location == ("device", device.device_id) diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index 992ce336555..d729e2fde8d 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -13,6 +13,7 @@ from cuda.core._module import Kernel, ObjectCode from cuda.core._program import Program, ProgramOptions from cuda.core._utils.cuda_utils import CUDAError, handle_return +from cuda.core.typing import CompilerBackend, PCHStatus pytest_plugins = ("cuda_python_test_helpers.nvvm_bitcode",) @@ -241,6 +242,7 @@ def test_cpp_program_with_various_options(init_cuda, options): code = 'extern "C" __global__ void my_kernel() {}' program = Program(code, "c++", options) assert program.backend == "NVRTC" + assert isinstance(program.backend, CompilerBackend) program.compile("ptx") program.close() @@ -281,6 +283,7 @@ def test_cpp_program_pch_auto_creates(init_cuda, tmp_path): assert program.pch_status is None # not compiled yet program.compile("ptx") assert program.pch_status in ("created", "not_attempted", "failed") + assert isinstance(program.pch_status, PCHStatus) program.close() @@ -681,7 +684,7 @@ def test_cpp_program_with_extra_sources(): def test_program_options_as_bytes_nvrtc(): """Test ProgramOptions.as_bytes() for NVRTC backend""" options = ProgramOptions(arch="sm_80", debug=True, lineinfo=True, ftz=True) - nvrtc_options = options.as_bytes("nvrtc") + nvrtc_options = options.as_bytes(CompilerBackend.NVRTC) assert isinstance(nvrtc_options, list) assert all(isinstance(opt, bytes) for opt in nvrtc_options) options_str = [opt.decode() for opt in nvrtc_options] From f97b30b52a12c8ec2d34011bf1005f1010511f09 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Tue, 5 May 2026 11:52:46 -0400 Subject: [PATCH 2/5] Address renamings from PR comments --- cuda_core/cuda/core/_device.pyx | 6 +-- cuda_core/cuda/core/_launcher.pyx | 2 +- cuda_core/cuda/core/_linker.pyx | 12 ++--- cuda_core/cuda/core/_memory/_buffer.pyx | 14 +++--- .../core/_memory/_graph_memory_resource.pyx | 4 +- cuda_core/cuda/core/_memory/_legacy.py | 6 +-- cuda_core/cuda/core/_memory/_memory_pool.pyx | 4 +- cuda_core/cuda/core/_module.pyx | 18 +++---- cuda_core/cuda/core/_program.pyx | 48 +++++++++---------- cuda_core/cuda/core/_stream.pyx | 4 +- cuda_core/cuda/core/typing.py | 27 ++++++----- cuda_core/docs/source/api_private.rst | 12 ++--- cuda_core/docs/source/interoperability.rst | 2 +- cuda_core/tests/helpers/misc.py | 2 +- cuda_core/tests/test_enum_coverage.py | 18 +++---- cuda_core/tests/test_launcher.py | 6 +-- cuda_core/tests/test_program.py | 8 ++-- cuda_core/tests/test_typing_imports.py | 20 ++++---- 18 files changed, 106 insertions(+), 107 deletions(-) diff --git a/cuda_core/cuda/core/_device.pyx b/cuda_core/cuda/core/_device.pyx index c0d7f09ee44..4dd03e9eaaa 100644 --- a/cuda_core/cuda/core/_device.pyx +++ b/cuda_core/cuda/core/_device.pyx @@ -23,7 +23,7 @@ from cuda.core._resource_handles cimport ( as_cu, ) -from cuda.core._stream import IsStreamT, Stream, StreamOptions +from cuda.core._stream import IsStreamType, Stream, StreamOptions from cuda.core._utils.clear_error_support import assert_type from cuda.core._utils.cuda_utils import ( ComputeCapability, @@ -1268,7 +1268,7 @@ class Device: """ raise NotImplementedError("WIP: https://github.com/NVIDIA/cuda-python/issues/189") - def create_stream(self, obj: IsStreamT | None = None, options: StreamOptions | None = None) -> Stream: + def create_stream(self, obj: IsStreamType | None = None, options: StreamOptions | None = None) -> Stream: """Create a :obj:`~_stream.Stream` object. New stream objects can be created in two different ways: @@ -1285,7 +1285,7 @@ class Device: Parameters ---------- - obj : :obj:`~_stream.IsStreamT`, optional + obj : :obj:`~_stream.IsStreamType`, optional Any object supporting the ``__cuda_stream__`` protocol. options : :obj:`~_stream.StreamOptions`, optional Customizable dataclass for stream creation options. diff --git a/cuda_core/cuda/core/_launcher.pyx b/cuda_core/cuda/core/_launcher.pyx index 87d18f2b881..e6a07ad28e6 100644 --- a/cuda_core/cuda/core/_launcher.pyx +++ b/cuda_core/cuda/core/_launcher.pyx @@ -20,7 +20,7 @@ from cuda.core._stream import Stream from math import prod -def launch(stream: Stream | GraphBuilder | IsStreamT, config: LaunchConfig, kernel: Kernel, *kernel_args): +def launch(stream: Stream | GraphBuilder | IsStreamType, config: LaunchConfig, kernel: Kernel, *kernel_args): """Launches a :obj:`~_module.Kernel` object with launch-time configuration. diff --git a/cuda_core/cuda/core/_linker.pyx b/cuda_core/cuda/core/_linker.pyx index 1a28c6ed3aa..2a3d0b514c8 100644 --- a/cuda_core/cuda/core/_linker.pyx +++ b/cuda_core/cuda/core/_linker.pyx @@ -39,7 +39,7 @@ from cuda.core._utils.cuda_utils import ( driver, is_sequence, ) -from cuda.core.typing import CompilerBackend +from cuda.core.typing import CompilerBackendType ctypedef const char* const_char_ptr ctypedef void* void_ptr @@ -71,12 +71,12 @@ cdef class Linker: def __init__(self, *object_codes: ObjectCode, options: "LinkerOptions" = None): Linker_init(self, object_codes, options) - def link(self, target_type: ObjectCodeFormat | str) -> ObjectCode: + def link(self, target_type: ObjectCodeFormatType | str) -> ObjectCode: """Link the provided object codes into a single output of the specified target type. Parameters ---------- - target_type : ObjectCodeFormat | str + target_type : ObjectCodeFormatType | str The type of the target output. Must be either "cubin" or "ptx". Returns @@ -169,9 +169,9 @@ cdef class Linker: return as_py(self._culink_handle) @property - def backend(self) -> CompilerBackend: - """Return this Linker instance's underlying :class:`CompilerBackend`.""" - return CompilerBackend.NVJITLINK if self._use_nvjitlink else CompilerBackend.DRIVER + def backend(self) -> CompilerBackendType: + """Return this Linker instance's underlying :class:`CompilerBackendType`.""" + return CompilerBackendType.NVJITLINK if self._use_nvjitlink else CompilerBackendType.DRIVER # ============================================================================= diff --git a/cuda_core/cuda/core/_memory/_buffer.pyx b/cuda_core/cuda/core/_memory/_buffer.pyx index a56657a3564..c4e8b5a1121 100644 --- a/cuda_core/cuda/core/_memory/_buffer.pyx +++ b/cuda_core/cuda/core/_memory/_buffer.pyx @@ -65,7 +65,7 @@ register_mr_dealloc_callback(_mr_dealloc_callback) __all__ = ['Buffer', 'MemoryResource'] -DevicePointerT = driver.CUdeviceptr | int | None +DevicePointerType = driver.CUdeviceptr | int | None """ A type union of :obj:`~driver.CUdeviceptr`, `int` and `None` for hinting :attr:`Buffer.handle`. @@ -97,7 +97,7 @@ cdef class Buffer: @classmethod def _init( - cls, ptr: DevicePointerT, size_t size, mr: MemoryResource | None = None, + cls, ptr: DevicePointerType, size_t size, mr: MemoryResource | None = None, ipc_descriptor: IPCBufferDescriptor | None = None, owner : object | None = None ): @@ -132,14 +132,14 @@ cdef class Buffer: @staticmethod def from_handle( - ptr: DevicePointerT, size_t size, mr: MemoryResource | None = None, + ptr: DevicePointerType, size_t size, mr: MemoryResource | None = None, owner: object | None = None, ) -> Buffer: """Create a new :class:`Buffer` object from a pointer. Parameters ---------- - ptr : :obj:`~_memory.DevicePointerT` + ptr : :obj:`~_memory.DevicePointerType` Allocated buffer handle object size : int Memory size of the buffer @@ -347,7 +347,7 @@ cdef class Buffer: return self._mem_attrs.device_id @property - def handle(self) -> DevicePointerT: + def handle(self) -> DevicePointerType: """Return the buffer handle object. .. caution:: @@ -515,12 +515,12 @@ cdef class MemoryResource: """ raise TypeError("MemoryResource.allocate must be implemented by subclasses.") - def deallocate(self, ptr: DevicePointerT, size_t size, stream: Stream | GraphBuilder | None = None): + def deallocate(self, ptr: DevicePointerType, size_t size, stream: Stream | GraphBuilder | None = None): """Deallocate a buffer previously allocated by this resource. Parameters ---------- - ptr : :obj:`~_memory.DevicePointerT` + ptr : :obj:`~_memory.DevicePointerType` The pointer or handle to the buffer to deallocate. size : int The size of the buffer to deallocate, in bytes. diff --git a/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx b/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx index e04f25f1581..2180276ed87 100644 --- a/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx +++ b/cuda_core/cuda/core/_memory/_graph_memory_resource.pyx @@ -1,4 +1,4 @@ -# SPDX-FileCopyrightText: Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# SPDX-FileCopyrightText: Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # SPDX-License-Identifier: Apache-2.0 @@ -111,7 +111,7 @@ cdef class cyGraphMemoryResource(MemoryResource): stream = Stream_accept(stream) if stream is not None else default_stream() return GMR_allocate(self, size, stream) - def deallocate(self, ptr: "DevicePointerT", size_t size, stream: Stream | GraphBuilder | None = None): + def deallocate(self, ptr: "DevicePointerType", size_t size, stream: Stream | GraphBuilder | None = None): """ Deallocate a buffer of the requested size. See documentation for :obj:`~_memory.MemoryResource`. """ diff --git a/cuda_core/cuda/core/_memory/_legacy.py b/cuda_core/cuda/core/_memory/_legacy.py index 24ce88487ea..036b89abdcc 100644 --- a/cuda_core/cuda/core/_memory/_legacy.py +++ b/cuda_core/cuda/core/_memory/_legacy.py @@ -7,7 +7,7 @@ from typing import TYPE_CHECKING if TYPE_CHECKING: - from cuda.core._memory._buffer import DevicePointerT + from cuda.core._memory._buffer import DevicePointerType from cuda.core._memory._buffer import Buffer, MemoryResource from cuda.core._utils.cuda_utils import ( @@ -53,12 +53,12 @@ def allocate(self, size, stream=None) -> Buffer: ptr = 0 return Buffer._init(ptr, size, self) - def deallocate(self, ptr: DevicePointerT, size, stream): + def deallocate(self, ptr: DevicePointerType, size, stream): """Deallocate a buffer previously allocated by this resource. Parameters ---------- - ptr : :obj:`~_memory.DevicePointerT` + ptr : :obj:`~_memory.DevicePointerType` The pointer or handle to the buffer to deallocate. size : int The size of the buffer to deallocate, in bytes. diff --git a/cuda_core/cuda/core/_memory/_memory_pool.pyx b/cuda_core/cuda/core/_memory/_memory_pool.pyx index f8f3b683d12..4e0f99d4529 100644 --- a/cuda_core/cuda/core/_memory/_memory_pool.pyx +++ b/cuda_core/cuda/core/_memory/_memory_pool.pyx @@ -144,12 +144,12 @@ cdef class _MemPool(MemoryResource): stream = Stream_accept(stream) if stream is not None else default_stream() return _MP_allocate(self, size, stream) - def deallocate(self, ptr: "DevicePointerT", size_t size, stream: Stream | GraphBuilder | None = None): + def deallocate(self, ptr: "DevicePointerType", size_t size, stream: Stream | GraphBuilder | None = None): """Deallocate a buffer previously allocated by this resource. Parameters ---------- - ptr : :obj:`~_memory.DevicePointerT` + ptr : :obj:`~_memory.DevicePointerType` The pointer or handle to the buffer to deallocate. size : int The size of the buffer to deallocate, in bytes. diff --git a/cuda_core/cuda/core/_module.pyx b/cuda_core/cuda/core/_module.pyx index 4156e4d7806..fee979b6130 100644 --- a/cuda_core/cuda/core/_module.pyx +++ b/cuda_core/cuda/core/_module.pyx @@ -12,7 +12,7 @@ from cuda.core._device import Device from cuda.core._launch_config cimport LaunchConfig from cuda.core._launch_config import LaunchConfig from cuda.core._stream cimport Stream -from cuda.core._program import ObjectCodeFormat +from cuda.core._program import ObjectCodeFormatType from cuda.core._resource_handles cimport ( LibraryHandle, KernelHandle, @@ -570,7 +570,7 @@ cdef class Kernel: CodeTypeT = bytes | bytearray | str -cdef tuple _supported_code_type = tuple(ObjectCodeFormat.__members__.values()) +cdef tuple _supported_code_type = tuple(ObjectCodeFormatType.__members__.values()) cdef class ObjectCode: """Represent a compiled program to be loaded onto the device. @@ -630,7 +630,7 @@ cdef class ObjectCode: should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, ObjectCodeFormat.CUBIN, name=name, symbol_mapping=symbol_mapping) + return ObjectCode._init(module, ObjectCodeFormatType.CUBIN, name=name, symbol_mapping=symbol_mapping) @staticmethod def from_ptx(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode: @@ -648,7 +648,7 @@ cdef class ObjectCode: should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, ObjectCodeFormat.PTX, name=name, symbol_mapping=symbol_mapping) + return ObjectCode._init(module, ObjectCodeFormatType.PTX, name=name, symbol_mapping=symbol_mapping) @staticmethod def from_ltoir(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode: @@ -666,7 +666,7 @@ cdef class ObjectCode: should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, ObjectCodeFormat.LTOIR, name=name, symbol_mapping=symbol_mapping) + return ObjectCode._init(module, ObjectCodeFormatType.LTOIR, name=name, symbol_mapping=symbol_mapping) @staticmethod def from_fatbin(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode: @@ -684,7 +684,7 @@ cdef class ObjectCode: should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, ObjectCodeFormat.FATBIN, name=name, symbol_mapping=symbol_mapping) + return ObjectCode._init(module, ObjectCodeFormatType.FATBIN, name=name, symbol_mapping=symbol_mapping) @staticmethod def from_object(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode: @@ -702,7 +702,7 @@ cdef class ObjectCode: should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, ObjectCodeFormat.OBJECT, name=name, symbol_mapping=symbol_mapping) + return ObjectCode._init(module, ObjectCodeFormatType.OBJECT, name=name, symbol_mapping=symbol_mapping) @staticmethod def from_library(module: bytes | str, *, name: str = "", symbol_mapping: dict | None = None) -> ObjectCode: @@ -720,7 +720,7 @@ cdef class ObjectCode: should be mapped to the mangled names before trying to retrieve them (default to no mappings). """ - return ObjectCode._init(module, ObjectCodeFormat.LIBRARY, name=name, symbol_mapping=symbol_mapping) + return ObjectCode._init(module, ObjectCodeFormatType.LIBRARY, name=name, symbol_mapping=symbol_mapping) # TODO: do we want to unload in a finalizer? Probably not.. @@ -759,7 +759,7 @@ cdef class ObjectCode: """ self._lazy_load_module() - supported_code_types = (ObjectCodeFormat.CUBIN, ObjectCodeFormat.PTX, ObjectCodeFormat.FATBIN) + supported_code_types = (ObjectCodeFormatType.CUBIN, ObjectCodeFormatType.PTX, ObjectCodeFormatType.FATBIN) if self._code_type not in supported_code_types: raise RuntimeError(f'Unsupported code type "{self._code_type}" ({supported_code_types=})') try: diff --git a/cuda_core/cuda/core/_program.pyx b/cuda_core/cuda/core/_program.pyx index f7040919d11..12d52198b2a 100644 --- a/cuda_core/cuda/core/_program.pyx +++ b/cuda_core/cuda/core/_program.pyx @@ -39,7 +39,7 @@ from cuda.core._utils.cuda_utils import ( is_sequence, ) from cuda.core._utils.version import binding_version, driver_version -from cuda.core.typing import ObjectCodeFormat, CompilerBackend, PCHStatus, SourceType +from cuda.core.typing import ObjectCodeFormatType, CompilerBackendType, PCHStatusType, SourceCodeType __all__ = ["Program", "ProgramOptions"] @@ -68,12 +68,12 @@ cdef class Program: code : str | bytes | bytearray The source code to compile. For C++ and PTX, must be a string. For NVVM IR, can be str, bytes, or bytearray. - code_type : SourceType | str + code_type : SourceCodeType | str The type of source code. Must be one of ``"c++"``, ``"ptx"``, or ``"nvvm"``. options : :class:`ProgramOptions`, optional Options to customize the compilation process. """ - def __init__(self, code: str | bytes | bytearray, code_type: SourceType | str, options: ProgramOptions | None = None): + def __init__(self, code: str | bytes | bytearray, code_type: SourceCodeType | str, options: ProgramOptions | None = None): Program_init(self, code, str(code_type), options) def close(self): @@ -85,13 +85,13 @@ cdef class Program: self._h_nvvm.reset() def compile( - self, target_type: ObjectCodeFormat | str, name_expressions: tuple | list = (), logs = None + self, target_type: ObjectCodeFormatType | str, name_expressions: tuple | list = (), logs = None ) -> ObjectCode: """Compile the program to the specified target type. Parameters ---------- - target_type : ObjectCodeFormat | str + target_type : ObjectCodeFormatType | str The compilation target. Must be one of ``"ptx"``, ``"cubin"``, or ``"ltoir"``. name_expressions : tuple | list, optional Sequence of name expressions to make accessible in the compiled code. @@ -107,7 +107,7 @@ cdef class Program: return Program_compile(self, str(target_type), name_expressions, logs) @property - def pch_status(self) -> PCHStatus | None: + def pch_status(self) -> PCHStatusType | None: """PCH creation outcome from the most recent :meth:`compile` call. Possible values: @@ -132,12 +132,12 @@ cdef class Program: """ if self._pch_status is None: return None - return PCHStatus(self._pch_status) + return PCHStatusType(self._pch_status) @property - def backend(self) -> CompilerBackend: - """Return this Program instance's underlying :class:`CompilerBackend`.""" - return CompilerBackend(self._backend) + def backend(self) -> CompilerBackendType: + """Return this Program instance's underlying :class:`CompilerBackendType`.""" + return CompilerBackendType(self._backend) @property def handle(self) -> ProgramHandleT: @@ -437,7 +437,7 @@ class ProgramOptions: def _prepare_nvvm_options(self, as_bytes: bool = True) -> list[bytes] | list[str]: return _prepare_nvvm_options_impl(self, as_bytes) - def as_bytes(self, backend: CompilerBackend | str, target_type: ObjectCodeFormat | str | None = None) -> list[bytes]: + def as_bytes(self, backend: CompilerBackendType | str, target_type: ObjectCodeFormatType | str | None = None) -> list[bytes]: """Convert program options to bytes format for the specified backend. This method transforms the program options into a format suitable for the @@ -446,9 +446,9 @@ class ProgramOptions: Parameters ---------- - backend : CompilerBackend | str + backend : CompilerBackendType | str The compiler backend to prepare options for. Must be either "nvrtc" or "nvvm". - target_type : ObjectCodeFormat | str, optional + target_type : ObjectCodeFormatType | str, optional The compilation target type (e.g., "ptx", "cubin", "ltoir"). Some backends require additional options based on the target type. @@ -641,7 +641,7 @@ cdef inline int Program_init(Program self, object code, str code_type, object op &nvrtc_prog, code_ptr, name_ptr, 0, NULL, NULL)) self._h_nvrtc = create_nvrtc_program_handle(nvrtc_prog) self._nvrtc_code = code_bytes - self._backend = str(CompilerBackend.NVRTC) + self._backend = str(CompilerBackendType.NVRTC) self._linker = None elif code_type == "ptx": @@ -685,11 +685,11 @@ cdef inline int Program_init(Program self, object code, str code_type, object op if options.use_libdevice: self._use_libdevice = True - self._backend = str(CompilerBackend.NVVM) + self._backend = str(CompilerBackendType.NVVM) self._linker = None else: - supported_code_types = tuple(x.value for x in SourceType) + supported_code_types = tuple(x.value for x in SourceCodeType) if options.use_libdevice: raise ValueError("use_libdevice is only supported by the NVVM backend") raise RuntimeError(f"Unsupported {code_type=} ({supported_code_types=})") @@ -787,12 +787,12 @@ cdef object _read_pch_status(cynvrtc.nvrtcProgram prog): with nogil: err = cynvrtc.nvrtcGetPCHCreateStatus(prog) if err == cynvrtc.nvrtcResult.NVRTC_SUCCESS: - return PCHStatus.CREATED + return PCHStatusType.CREATED if err == cynvrtc.nvrtcResult.NVRTC_ERROR_PCH_CREATE_HEAP_EXHAUSTED: return None # sentinel: caller should auto-retry if err == cynvrtc.nvrtcResult.NVRTC_ERROR_NO_PCH_CREATE_ATTEMPTED: - return PCHStatus.NOT_ATTEMPTED - return PCHStatus.FAILED + return PCHStatusType.NOT_ATTEMPTED + return PCHStatusType.FAILED cdef object Program_compile_nvrtc(Program self, str target_type, object name_expressions, object logs): @@ -840,7 +840,7 @@ cdef object Program_compile_nvrtc(Program self, str target_type, object name_exp ) status = _read_pch_status(retry_prog) - self._pch_status = status if status is not None else str(PCHStatus.FAILED) + self._pch_status = status if status is not None else str(PCHStatusType.FAILED) return result @@ -900,10 +900,10 @@ cdef object Program_compile_nvvm(Program self, str target_type, object logs): # Supported target types per backend cdef dict SUPPORTED_TARGETS = { - CompilerBackend.NVRTC: (ObjectCodeFormat.PTX, ObjectCodeFormat.CUBIN, ObjectCodeFormat.LTOIR), - CompilerBackend.NVVM: (ObjectCodeFormat.PTX, ObjectCodeFormat.LTOIR), - CompilerBackend.NVJITLINK: (ObjectCodeFormat.CUBIN, ObjectCodeFormat.PTX), - CompilerBackend.DRIVER: (ObjectCodeFormat.CUBIN, ObjectCodeFormat.PTX), + CompilerBackendType.NVRTC: (ObjectCodeFormatType.PTX, ObjectCodeFormatType.CUBIN, ObjectCodeFormatType.LTOIR), + CompilerBackendType.NVVM: (ObjectCodeFormatType.PTX, ObjectCodeFormatType.LTOIR), + CompilerBackendType.NVJITLINK: (ObjectCodeFormatType.CUBIN, ObjectCodeFormatType.PTX), + CompilerBackendType.DRIVER: (ObjectCodeFormatType.CUBIN, ObjectCodeFormatType.PTX), } diff --git a/cuda_core/cuda/core/_stream.pyx b/cuda_core/cuda/core/_stream.pyx index fdb617f0325..97a95cdb861 100644 --- a/cuda_core/cuda/core/_stream.pyx +++ b/cuda_core/cuda/core/_stream.pyx @@ -58,7 +58,7 @@ cdef class StreamOptions: priority: int | None = None -class IsStreamT(Protocol): +class IsStreamType(Protocol): def __cuda_stream__(self) -> tuple[int, int]: """ For any Python object that is meant to be interpreted as a CUDA stream, the intent @@ -113,7 +113,7 @@ cdef class Stream: return Stream._from_handle(cls, get_per_thread_stream()) @classmethod - def _init(cls, obj: IsStreamT | None = None, options=None, device_id: int = None, + def _init(cls, obj: IsStreamType | None = None, options=None, device_id: int = None, ctx: Context = None): cdef StreamHandle h_stream cdef cydriver.CUstream borrowed diff --git a/cuda_core/cuda/core/typing.py b/cuda_core/cuda/core/typing.py index 33405657b24..12829e070db 100644 --- a/cuda_core/cuda/core/typing.py +++ b/cuda_core/cuda/core/typing.py @@ -9,19 +9,19 @@ except ImportError: from backports.strenum import StrEnum -from cuda.core._memory._buffer import DevicePointerT -from cuda.core._stream import IsStreamT +from cuda.core._memory._buffer import DevicePointerType +from cuda.core._stream import IsStreamType __all__ = [ - "CompilerBackend", - "DevicePointerT", + "CompilerBackendType", + "DevicePointerType", "GraphConditionalType", "GraphMemoryType", - "IsStreamT", + "IsStreamType", "ManagedMemoryLocationType", - "ObjectCodeFormat", - "PCHStatus", - "SourceType", + "ObjectCodeFormatType", + "PCHStatusType", + "SourceCodeType", "VirtualMemoryAccessType", "VirtualMemoryAllocationType", "VirtualMemoryGranularityType", @@ -30,13 +30,13 @@ ] -class SourceType(StrEnum): +class SourceCodeType(StrEnum): CXX = "c++" PTX = "ptx" NVVM = "nvvm" -class ObjectCodeFormat(StrEnum): +class ObjectCodeFormatType(StrEnum): PTX = "ptx" CUBIN = "cubin" LTOIR = "ltoir" @@ -45,14 +45,14 @@ class ObjectCodeFormat(StrEnum): LIBRARY = "library" -class CompilerBackend(StrEnum): +class CompilerBackendType(StrEnum): NVRTC = "NVRTC" NVVM = "NVVM" NVJITLINK = "nvJitLink" DRIVER = "driver" -class PCHStatus(StrEnum): +class PCHStatusType(StrEnum): CREATED = "created" NOT_ATTEMPTED = "not_attempted" FAILED = "failed" @@ -102,3 +102,6 @@ class VirtualMemoryAccessType(StrEnum): class VirtualMemoryAllocationType(StrEnum): PINNED = "pinned" MANAGED = "managed" + + +del StrEnum diff --git a/cuda_core/docs/source/api_private.rst b/cuda_core/docs/source/api_private.rst index 725b211798e..1df64f91d4b 100644 --- a/cuda_core/docs/source/api_private.rst +++ b/cuda_core/docs/source/api_private.rst @@ -20,14 +20,14 @@ CUDA runtime _module.KernelOccupancy _module.MaxPotentialBlockSizeOccupancyResult _module.ParamInfo - typing.CompilerBackend - typing.DevicePointerT + typing.CompilerBackendType + typing.DevicePointerType typing.GraphConditionalType typing.GraphMemoryType typing.ManagedMemoryLocationType - typing.ObjectCodeFormat - typing.PCHStatus - typing.SourceType + typing.ObjectCodeFormatType + typing.PCHStatusType + typing.SourceCodeType typing.VirtualMemoryAccessType typing.VirtualMemoryAllocationType typing.VirtualMemoryGranularityType @@ -57,7 +57,7 @@ CUDA protocols :toctree: generated/ :template: protocol.rst - typing.IsStreamT + typing.IsStreamType NVML ---- diff --git a/cuda_core/docs/source/interoperability.rst b/cuda_core/docs/source/interoperability.rst index ae109bbad0b..4aac89d13df 100644 --- a/cuda_core/docs/source/interoperability.rst +++ b/cuda_core/docs/source/interoperability.rst @@ -35,7 +35,7 @@ in Python. While we encourage new Python projects to start using streams (and ot CUDA types) from ``cuda.core``, we understand that there are already several projects exposing their own stream types. -To address this issue, we propose the :attr:`~_stream.IsStreamT.__cuda_stream__` protocol +To address this issue, we propose the :attr:`~_stream.IsStreamType.__cuda_stream__` protocol (currently version 0) as follows: For any Python objects that are meant to be interpreted as a stream, they should add a ``__cuda_stream__`` *method* that returns a 2-tuple: The version number (``0``) and the address of ``cudaStream_t`` (both as Python ``int``): diff --git a/cuda_core/tests/helpers/misc.py b/cuda_core/tests/helpers/misc.py index 89bc175a97d..ec879755cd2 100644 --- a/cuda_core/tests/helpers/misc.py +++ b/cuda_core/tests/helpers/misc.py @@ -16,7 +16,7 @@ def try_create_condition(g, default_value=1): class StreamWrapper: """ - A wrapper around Stream for testing IsStreamT conversions. + A wrapper around Stream for testing IsStreamType conversions. """ def __init__(self, stream): diff --git a/cuda_core/tests/test_enum_coverage.py b/cuda_core/tests/test_enum_coverage.py index bf65048e853..024f71378f4 100644 --- a/cuda_core/tests/test_enum_coverage.py +++ b/cuda_core/tests/test_enum_coverage.py @@ -71,13 +71,6 @@ {"CU_MEM_ACCESS_FLAGS_PROT_NONE", "CU_MEM_ACCESS_FLAGS_PROT_MAX"}, set(), ), - ( - driver.CUmemAllocationType, - cuda.core.typing.VirtualMemoryAllocationType, - cuda.core.VirtualMemoryResourceOptions._allocation_type, - {"CU_MEM_ALLOCATION_TYPE_INVALID", "CU_MEM_ALLOCATION_TYPE_MAX"}, - set(), - ), ( driver.CUmemAllocationGranularity_flags, cuda.core.typing.VirtualMemoryGranularityType, @@ -270,16 +263,18 @@ # Add classes here (with a comment explaining why) when a new StrEnum is # introduced that wraps something other than a cuda_binding enum. _UNBOUND_STR_ENUMS: set[StrEnum] = { - cuda.core.typing.ObjectCodeFormat, - cuda.core.typing.CompilerBackend, + cuda.core.typing.ObjectCodeFormatType, + cuda.core.typing.CompilerBackendType, # This one enum coordinates values in two cuda_binding enums: # CUmemAllocationType and CUmemLocationType cuda.core.typing.GraphMemoryType, # This should support all of the PCH-related values in nvrtcResult, but # there is no easy way to check that since they are mixed in with other # unrelated things - cuda.core.typing.PCHStatus, - cuda.core.typing.SourceType, + cuda.core.typing.PCHStatusType, + cuda.core.typing.SourceCodeType, + # This enum is dynamic depending on the version of CTK installed. + cuda.core.typing.VirtualMemoryAllocationType, } @@ -324,6 +319,7 @@ def test_wrapper_covers_all_binding_members(binding, str_enum, mapping, binding_ ) +@pytest.mark.skipif(sys.version_info < (3, 11), reason="Requires Python 3.11+ for StrEnum") def test_all_str_enums_in_cases(): """Every StrEnum subclass in cuda.core must appear in _CASES or _UNBOUND_STR_ENUMS. diff --git a/cuda_core/tests/test_launcher.py b/cuda_core/tests/test_launcher.py index 775089de907..f4858cdaef7 100644 --- a/cuda_core/tests/test_launcher.py +++ b/cuda_core/tests/test_launcher.py @@ -26,7 +26,7 @@ ) from cuda.core._memory._legacy import _SynchronousMemoryResource from cuda.core._utils.cuda_utils import CUDAError -from cuda.core.typing import ObjectCodeFormat, SourceType +from cuda.core.typing import ObjectCodeFormatType, SourceCodeType def test_launch_config_init(init_cuda): @@ -127,8 +127,8 @@ def test_launch_config_native_conversion(init_cuda): def test_launch_invalid_values(init_cuda): code = 'extern "C" __global__ void my_kernel() {}' - program = Program(code, SourceType.CXX) - mod = program.compile(ObjectCodeFormat.CUBIN) + program = Program(code, SourceCodeType.CXX) + mod = program.compile(ObjectCodeFormatType.CUBIN) stream = Device().create_stream() ker = mod.get_kernel("my_kernel") diff --git a/cuda_core/tests/test_program.py b/cuda_core/tests/test_program.py index d729e2fde8d..eda78bd0fd6 100644 --- a/cuda_core/tests/test_program.py +++ b/cuda_core/tests/test_program.py @@ -13,7 +13,7 @@ from cuda.core._module import Kernel, ObjectCode from cuda.core._program import Program, ProgramOptions from cuda.core._utils.cuda_utils import CUDAError, handle_return -from cuda.core.typing import CompilerBackend, PCHStatus +from cuda.core.typing import CompilerBackendType, PCHStatusType pytest_plugins = ("cuda_python_test_helpers.nvvm_bitcode",) @@ -242,7 +242,7 @@ def test_cpp_program_with_various_options(init_cuda, options): code = 'extern "C" __global__ void my_kernel() {}' program = Program(code, "c++", options) assert program.backend == "NVRTC" - assert isinstance(program.backend, CompilerBackend) + assert isinstance(program.backend, CompilerBackendType) program.compile("ptx") program.close() @@ -283,7 +283,7 @@ def test_cpp_program_pch_auto_creates(init_cuda, tmp_path): assert program.pch_status is None # not compiled yet program.compile("ptx") assert program.pch_status in ("created", "not_attempted", "failed") - assert isinstance(program.pch_status, PCHStatus) + assert isinstance(program.pch_status, PCHStatusType) program.close() @@ -684,7 +684,7 @@ def test_cpp_program_with_extra_sources(): def test_program_options_as_bytes_nvrtc(): """Test ProgramOptions.as_bytes() for NVRTC backend""" options = ProgramOptions(arch="sm_80", debug=True, lineinfo=True, ftz=True) - nvrtc_options = options.as_bytes(CompilerBackend.NVRTC) + nvrtc_options = options.as_bytes(CompilerBackendType.NVRTC) assert isinstance(nvrtc_options, list) assert all(isinstance(opt, bytes) for opt in nvrtc_options) options_str = [opt.decode() for opt in nvrtc_options] diff --git a/cuda_core/tests/test_typing_imports.py b/cuda_core/tests/test_typing_imports.py index c05e3ae3b37..a7edd2d5069 100644 --- a/cuda_core/tests/test_typing_imports.py +++ b/cuda_core/tests/test_typing_imports.py @@ -8,22 +8,22 @@ def test_typing_module_imports(): """All type aliases and protocols are importable from cuda.core.typing.""" from cuda.core.typing import ( - DevicePointerT, - IsStreamT, + DevicePointerType, + IsStreamType, ) - assert DevicePointerT is not None - assert IsStreamT is not None + assert DevicePointerType is not None + assert IsStreamType is not None def test_typing_matches_private_definitions(): """cuda.core.typing re-exports match the original private definitions.""" - from cuda.core._memory._buffer import DevicePointerT as _DevicePointerT - from cuda.core._stream import IsStreamT as _IsStreamT + from cuda.core._memory._buffer import DevicePointerType as _DevicePointerT + from cuda.core._stream import IsStreamType as _IsStreamT from cuda.core.typing import ( - DevicePointerT, - IsStreamT, + DevicePointerType, + IsStreamType, ) - assert DevicePointerT is _DevicePointerT - assert IsStreamT is _IsStreamT + assert DevicePointerType is _DevicePointerT + assert IsStreamType is _IsStreamT From 9d50fec8b0879c11f94a04ad7606671d5a3fe8e8 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Tue, 5 May 2026 12:20:41 -0400 Subject: [PATCH 3/5] Add release notes --- cuda_core/docs/source/release/1.0.0-notes.rst | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/cuda_core/docs/source/release/1.0.0-notes.rst b/cuda_core/docs/source/release/1.0.0-notes.rst index 3f61a30ec1e..360d95e36f2 100644 --- a/cuda_core/docs/source/release/1.0.0-notes.rst +++ b/cuda_core/docs/source/release/1.0.0-notes.rst @@ -113,6 +113,10 @@ Breaking changes ``CUgraphConditionalHandle`` value. Previously, ``.handle`` had to be extracted explicitly. +- Consistent naming of types annotation helpers + (`#2016 `__): + - :obj:`cuda.core.typing.DevicePointerT` -> :obj:`cuda.core.typing.DevicePointerType` + - :obj:`cuda.core.typing.IsStreamT` -> :obj:`cuda.core.typing.IsStreamType` Fixes and enhancements ----------------------- @@ -128,3 +132,22 @@ Fixes and enhancements stream and the consumer stream, matching the DLPack synchronization contract. Requires PyTorch >= 2.3. (`#749 `__) + +- Enums are not available in places where a small number of string values are + accepted or returned. You may continue to use the string values, or use + enumerations for better linting and type-checking. + (`#2016 `__) + The new enums are: + + - :class:`cuda.core.typing.CompilerBackendType` + - :class:`cuda.core.typing.GraphConditionalType` + - :class:`cuda.core.typing.GraphMemoryType` + - :class:`cuda.core.typing.ManagedMemoryLocationType` + - :class:`cuda.core.typing.ObjectCodeFormatType` + - :class:`cuda.core.typing.PCHStatusType` + - :class:`cuda.core.typing.SourceCodeType` + - :class:`cuda.core.typing.VirtualMemoryAccessType` + - :class:`cuda.core.typing.VirtualMemoryAllocationType` + - :class:`cuda.core.typing.VirtualMemoryGranularityType` + - :class:`cuda.core.typing.VirtualMemoryHandleType` + - :class:`cuda.core.typing.VirtualMemoryLocationType` From 07b2bdf33727186b76706c845d90329ee3591a1d Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Tue, 5 May 2026 13:53:53 -0400 Subject: [PATCH 4/5] Fix missing enum test --- cuda_core/tests/test_enum_coverage.py | 38 ++++++++++----------------- 1 file changed, 14 insertions(+), 24 deletions(-) diff --git a/cuda_core/tests/test_enum_coverage.py b/cuda_core/tests/test_enum_coverage.py index 024f71378f4..a02ca8f15f9 100644 --- a/cuda_core/tests/test_enum_coverage.py +++ b/cuda_core/tests/test_enum_coverage.py @@ -7,9 +7,7 @@ # mapping dicts at import time, so it runs on any CI host that has a # compatible cuda.bindings version. -import importlib import inspect -import pkgutil import sys from typing import Any @@ -25,6 +23,8 @@ else: from backports.strenum import StrEnum +_MODULES = [cuda.core.typing] + # Each entry is: # (cuda_binding_enum, str_enum, mapping_dict, binding_unmapped, str_enum_unmapped) # @@ -320,7 +320,11 @@ def test_wrapper_covers_all_binding_members(binding, str_enum, mapping, binding_ @pytest.mark.skipif(sys.version_info < (3, 11), reason="Requires Python 3.11+ for StrEnum") -def test_all_str_enums_in_cases(): +@pytest.mark.parametrize( + "module", + _MODULES, +) +def test_all_str_enums_in_cases(module): """Every StrEnum subclass in cuda.core must appear in _CASES or _UNBOUND_STR_ENUMS. This ensures that when a new StrEnum wrapper is added to cuda.core, the @@ -328,29 +332,15 @@ def test_all_str_enums_in_cases(): declare it as unbound in _UNBOUND_STR_ENUMS). """ - def discover_str_enums() -> set[type]: - """Walk all submodules of cuda.core and return every StrEnum subclass found.""" - found: set[type] = set() - for _, modname, _ in pkgutil.walk_packages( - path=cuda.core.__path__, - prefix=cuda.core.__name__ + ".", - onerror=lambda _: None, - ): - try: - mod = importlib.import_module(modname) - except Exception: # noqa - continue - try: - members = inspect.getmembers(mod, inspect.isclass) - except Exception: # noqa - continue - for _, obj in members: - if obj is not StrEnum and issubclass(obj, StrEnum): - found.add(obj) - return found + found = set() + + members = inspect.getmembers(module, inspect.isclass) + for _, obj in members: + if obj is not StrEnum and issubclass(obj, StrEnum): + found.add(obj) covered = {x[1] for x in _CASES if x[1] is not None} - uncovered = discover_str_enums() - covered - _UNBOUND_STR_ENUMS + uncovered = found - covered - _UNBOUND_STR_ENUMS uncovered_names = sorted({c.__qualname__ for c in uncovered}) assert not uncovered, ( f"StrEnum subclasses in cuda.core not covered by _CASES: " From 73c458902deb159690427a0916cff40a48ac6b68 Mon Sep 17 00:00:00 2001 From: Michael Droettboom Date: Tue, 5 May 2026 15:22:50 -0400 Subject: [PATCH 5/5] Fix docstring --- cuda_core/cuda/core/typing.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/cuda_core/cuda/core/typing.py b/cuda_core/cuda/core/typing.py index 36a301d4766..54418f7d84b 100644 --- a/cuda_core/cuda/core/typing.py +++ b/cuda_core/cuda/core/typing.py @@ -32,11 +32,9 @@ ] +# A type union of :obj:`~driver.CUdeviceptr`, `int` and `None` for hinting +# :attr:`Buffer.handle`. DevicePointerType = driver.CUdeviceptr | int | None -DevicePointerType.__doc__ = """ -A type union of :obj:`~driver.CUdeviceptr`, `int` and `None` for hinting -:attr:`Buffer.handle`. -""" ProcessStateType = _Literal["running", "locked", "checkpointed", "failed"]