tenferro_gpu/
lib.rs

1//! GPU backend implementations for tenferro tensors.
2//!
3//! # Examples
4//!
5//! ```rust
6//! #[cfg(feature = "cuda")]
7//! use tenferro_gpu::{cuda_devices, CudaBackend, CudaDeviceError};
8//!
9//! #[cfg(feature = "cuda")]
10//! fn first_cuda_backend() -> Result<Option<CudaBackend>, CudaDeviceError> {
11//!     let devices = cuda_devices()?;
12//!     let Some(device) = devices.first() else {
13//!         return Ok(None);
14//!     };
15//!     Ok(Some(CudaBackend::new(device.id())?))
16//! }
17//!
18//! // This ordinary doctest checks the discovery-based selection API without
19//! // requiring CUDA hardware at test time.
20//! #[cfg(feature = "cuda")]
21//! let _example: fn() -> Result<Option<CudaBackend>, CudaDeviceError> = first_cuda_backend;
22//! ```
23
24#[cfg(feature = "cuda")]
25use std::any::Any;
26
27#[cfg(feature = "cuda")]
28mod cubecl;
29#[cfg(any(feature = "cuda", feature = "webgpu"))]
30mod event_domain_admission;
31#[cfg(any(feature = "cuda", feature = "webgpu"))]
32mod event_retirement;
33#[cfg(any(feature = "cuda", feature = "webgpu"))]
34mod kernels;
35#[cfg(any(feature = "cuda", feature = "webgpu"))]
36mod native_permutation;
37#[cfg(feature = "webgpu")]
38mod webgpu;
39
40#[cfg(feature = "cuda")]
41pub use cubecl::{
42    cuda_capabilities, cuda_devices, cuda_runtime_engine_registration, cuda_runtime_hardware_class,
43    device_ptr, download_tensor, gpu_available, upload_tensor, with_cuda_exec_session, CudaBackend,
44    CudaDeviceError, CudaDeviceId, CudaDeviceInfo, CudaExecSession, CudaRuntime,
45    CudaRuntimeIdentity,
46};
47#[cfg(feature = "cuda")]
48#[doc(hidden)]
49pub use cubecl::{CudaExtensionCache, CudaExtensionCacheGuard};
50#[cfg(feature = "webgpu")]
51pub use webgpu::{
52    download_webgpu_tensor, upload_webgpu_tensor, webgpu_available, webgpu_runtime_engine_id,
53    webgpu_runtime_engine_registration, webgpu_runtime_engine_registration_with_id,
54    webgpu_runtime_hardware_class, with_webgpu_exec_session, AppleContext, AppleTransferStats,
55    WebGpuBackend, WebGpuExecSession, WebGpuRuntime, WebGpuRuntimeIdentity,
56};
57
58/// Narrow owner-scoped WebGPU handle interop for extension crates.
59#[cfg(feature = "webgpu")]
60#[doc(hidden)]
61pub mod webgpu_interop {
62    pub use crate::webgpu::interop::*;
63}
64
65#[cfg(feature = "cuda")]
66#[doc(hidden)]
67pub mod cuda_interop {
68    pub use crate::cubecl::interop::*;
69    pub use crate::cubecl::{CudaExtensionCache, CudaExtensionCacheGuard};
70}
71
72#[cfg(any(feature = "cuda", feature = "webgpu"))]
73use tenferro_tensor::*;
74
75#[cfg(feature = "cuda")]
76pub(crate) mod backend {
77    pub use tenferro_tensor::backend::*;
78}
79
80#[cfg(feature = "cuda")]
81pub(crate) mod config {
82    pub use tenferro_tensor::config::*;
83}
84
85#[cfg(feature = "cuda")]
86pub(crate) mod types {
87    pub(crate) use crate::CubeclBuffer;
88    pub use tenferro_tensor::types::*;
89}
90
91/// CubeCL-managed GPU buffer stored behind tensor backend-buffer trait objects.
92#[cfg(feature = "cuda")]
93#[derive(Clone)]
94pub(crate) struct CubeclBuffer<T> {
95    handle: cubecl_runtime::server::Handle,
96    len: usize,
97    device_ordinal: usize,
98    pub(crate) _marker: std::marker::PhantomData<T>,
99}
100
101#[cfg(feature = "cuda")]
102impl<T> std::fmt::Debug for CubeclBuffer<T> {
103    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104        f.debug_struct("CubeclBuffer")
105            .field("len", &self.len)
106            .field("device_ordinal", &self.device_ordinal)
107            .finish()
108    }
109}
110
111#[cfg(feature = "cuda")]
112impl<T> CubeclBuffer<T> {
113    pub(crate) fn new(
114        handle: cubecl_runtime::server::Handle,
115        len: usize,
116        device_ordinal: usize,
117    ) -> Self {
118        Self {
119            handle,
120            len,
121            device_ordinal,
122            _marker: std::marker::PhantomData,
123        }
124    }
125
126    pub(crate) fn handle(&self) -> &cubecl_runtime::server::Handle {
127        &self.handle
128    }
129
130    pub(crate) fn element_len(&self) -> usize {
131        self.len
132    }
133
134    pub(crate) fn device_ordinal(&self) -> usize {
135        self.device_ordinal
136    }
137}
138
139#[cfg(feature = "cuda")]
140impl<T: Send + Sync + 'static> BackendBuffer<T> for CubeclBuffer<T> {
141    fn backend_family(&self) -> &'static str {
142        "cubecl"
143    }
144
145    fn len(&self) -> usize {
146        self.len
147    }
148
149    fn as_any(&self) -> &dyn Any {
150        self
151    }
152}
tenferro_gpu/lib.rs

tenferro_gpu/
lib.rs