tenferro_tensor/
lib.rs

1//! Dense tensor type with CPU/GPU support.
2//!
3//! This crate provides [`Tensor<T>`], a multi-dimensional array type composed of
4//! shape, strides, and a device-aware [`DataBuffer`]. It supports:
5//!
6//! - **Zero-copy view operations**: [`Tensor::permute`], [`Tensor::broadcast`],
7//!   [`Tensor::diagonal`], [`Tensor::select`], [`Tensor::narrow`] modify only
8//!   metadata (dims/strides)
9//! - **Data operations**: [`Tensor::contiguous`] / [`Tensor::into_contiguous`] copy
10//!   data into a contiguous layout (the consuming variant avoids allocation when
11//!   the tensor is already contiguous); [`Tensor::tril`] / [`Tensor::triu`] extract
12//!   triangular parts
13//! - **Factory functions**: [`Tensor::zeros`], [`Tensor::ones`], [`Tensor::eye`]
14//! - **DLPack interop**: [`DataBuffer`] supports both Rust-owned (`Vec<T>`) and
15//!   externally-owned memory (e.g., imported via DLPack) with automatic cleanup.
16//!
17//! # Memory layout
18//!
19//! [`Tensor`] stores explicit strides and is not tied to any particular memory
20//! order. [`MemoryOrder`] is only used as a parameter when allocating new memory
21//! (e.g., [`Tensor::zeros`], [`Tensor::contiguous`]).
22//!
23//! # No strided-rs dependency
24//!
25//! This crate does **not** depend on `strided-rs`. The strided-rs types
26//! (`StridedView`, `StridedViewMut`) are backend implementation details
27//! used only in `tenferro-prims`. To pass tensor data to prims backends,
28//! use [`DataBuffer::as_slice`] combined with [`Tensor::dims`],
29//! [`Tensor::strides`], and [`Tensor::offset`].
30//!
31//! # Examples
32//!
33//! ## Creating tensors
34//!
35//! ```ignore
36//! use tenferro_tensor::{Tensor, MemoryOrder};
37//! use tenferro_device::LogicalMemorySpace;
38//!
39//! // Zeros / ones
40//! let a = Tensor::<f64>::zeros(&[3, 4], LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
41//! let b = Tensor::<f64>::ones(&[3, 4], LogicalMemorySpace::MainMemory, MemoryOrder::RowMajor);
42//!
43//! // From existing data (column-major: Julia convention)
44//! let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
45//! let m = Tensor::<f64>::from_slice(&data, &[2, 3], MemoryOrder::ColumnMajor).unwrap();
46//! // Logical layout:
47//! //   [[1, 3, 5],
48//! //    [2, 4, 6]]
49//! ```
50//!
51//! ## Transpose and reshape
52//!
53//! ```ignore
54//! // Transpose a matrix (zero-copy, only strides change)
55//! let mt = m.permute(&[1, 0]).unwrap();
56//! assert_eq!(mt.dims(), &[3, 2]);
57//!
58//! // Reshape (requires contiguous data)
59//! let flat = m.reshape(&[6]).unwrap();
60//! assert_eq!(flat.dims(), &[6]);
61//! ```
62//!
63//! ## Broadcasting
64//!
65//! ```ignore
66//! // Column vector [3,1] broadcast to [3,4] for element-wise ops
67//! let col = Tensor::<f64>::ones(&[3, 1], LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
68//! let expanded = col.broadcast(&[3, 4]).unwrap();
69//! assert_eq!(expanded.dims(), &[3, 4]);
70//! // No data is copied; stride along axis 1 is set to 0
71//! ```
72//!
73//! ## TensorView — borrowed, zero-copy views
74//!
75//! [`TensorView`] is the borrowed counterpart to [`Tensor`], following the
76//! `String` / `&str` pattern. View operations modify only metadata
77//! (dims, strides, offset) and never copy data.
78//!
79//! ```ignore
80//! // tensor_view() borrows the tensor — no data copy
81//! let tv = m.tensor_view();
82//! assert_eq!(tv.dims(), m.dims());
83//!
84//! // permute: reorder dimensions (zero-copy, strides reordered)
85//! let tv_t = tv.permute(&[1, 0]).unwrap();
86//! assert_eq!(tv_t.dims(), &[3, 2]);
87//!
88//! // broadcast: expand size-1 dims (zero-copy, stride set to 0)
89//! let col = Tensor::<f64>::from_slice(&[1.0, 2.0, 3.0], &[3, 1],
90//!     MemoryOrder::ColumnMajor).unwrap();
91//! let col_tv = col.tensor_view();
92//! let expanded = col_tv.broadcast(&[3, 4]).unwrap();
93//! assert_eq!(expanded.dims(), &[3, 4]);
94//!
95//! // diagonal: extract diagonal view (zero-copy, strides merged)
96//! let sq = Tensor::<f64>::zeros(&[4, 4],
97//!     LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
98//! let sq_tv = sq.tensor_view();
99//! let diag = sq_tv.diagonal(&[(0, 1)]).unwrap();
100//! assert_eq!(diag.dims(), &[4]);
101//!
102//! // to_tensor() / contiguous(): materialize a view into owned Tensor
103//! let owned = tv_t.to_tensor(MemoryOrder::ColumnMajor);
104//! ```
105
106use tenferro_algebra::{Conjugate, Scalar};
107use tenferro_device::{ComputeDevice, LogicalMemorySpace, OpKind, Result};
108
109/// Memory ordering for new allocations.
110///
111/// Specifies how elements are laid out in memory when creating new tensors
112/// or copying data into a contiguous buffer. This is **not** stored on the
113/// tensor itself — the tensor's [`strides`](Tensor::strides) fully describe
114/// the memory layout.
115///
116/// - [`ColumnMajor`](MemoryOrder::ColumnMajor): First dimension is contiguous
117///   (Fortran/Julia convention)
118/// - [`RowMajor`](MemoryOrder::RowMajor): Last dimension is contiguous
119///   (C/NumPy convention)
120#[derive(Debug, Clone, Copy, PartialEq, Eq)]
121pub enum MemoryOrder {
122    /// Column-major (Fortran/Julia order). First dimension has stride 1.
123    ColumnMajor,
124    /// Row-major (C/NumPy order). Last dimension has stride 1.
125    RowMajor,
126}
127
128// ============================================================================
129// DataBuffer — unified owned/external storage
130// ============================================================================
131
132/// Data storage for tensor elements.
133///
134/// Abstracts over ownership: data may be Rust-owned ([`Vec<T>`]) or
135/// externally-owned (e.g., imported via DLPack with a release callback).
136/// Shape and stride metadata are NOT stored here — they live on
137/// [`Tensor<T>`].
138///
139/// # Clone behavior
140///
141/// Cloning an externally-owned buffer performs a **deep copy** into a new
142/// Rust-owned `Vec<T>`. The release callback cannot be cloned; the clone
143/// is always Rust-owned.
144pub struct DataBuffer<T> {
145    inner: BufferInner<T>,
146}
147
148/// Private ownership representation.
149enum BufferInner<T> {
150    /// Rust-owned contiguous data.
151    Owned(Vec<T>),
152    /// Externally-owned data with release callback.
153    External {
154        ptr: *const T,
155        len: usize,
156        /// Called on drop to notify the external owner.
157        release: Option<Box<dyn FnOnce() + Send>>,
158    },
159}
160
161// Safety: External buffer pointers are treated as Send/Sync since
162// the external framework guarantees the data is valid for the lifetime
163// of the DataBuffer. The release callback is Send.
164unsafe impl<T: Send> Send for DataBuffer<T> {}
165unsafe impl<T: Sync> Sync for DataBuffer<T> {}
166
167impl<T: Copy> Clone for DataBuffer<T> {
168    fn clone(&self) -> Self {
169        match &self.inner {
170            BufferInner::Owned(v) => DataBuffer {
171                inner: BufferInner::Owned(v.clone()),
172            },
173            // Deep copy: can't clone the release callback.
174            BufferInner::External { ptr, len, .. } => {
175                let slice = unsafe { std::slice::from_raw_parts(*ptr, *len) };
176                DataBuffer {
177                    inner: BufferInner::Owned(slice.to_vec()),
178                }
179            }
180        }
181    }
182}
183
184impl<T> Drop for DataBuffer<T> {
185    fn drop(&mut self) {
186        if let BufferInner::External { release, .. } = &mut self.inner {
187            if let Some(f) = release.take() {
188                f();
189            }
190        }
191    }
192}
193
194impl<T> DataBuffer<T> {
195    /// Create a buffer from an owned `Vec<T>`.
196    ///
197    /// # Examples
198    ///
199    /// ```ignore
200    /// use tenferro_tensor::DataBuffer;
201    ///
202    /// let buf = DataBuffer::from_vec(vec![1.0, 2.0, 3.0]);
203    /// assert_eq!(buf.len(), 3);
204    /// assert!(buf.is_owned());
205    /// ```
206    pub fn from_vec(v: Vec<T>) -> Self {
207        DataBuffer {
208            inner: BufferInner::Owned(v),
209        }
210    }
211
212    /// Create a buffer from externally-owned data with a release callback.
213    ///
214    /// # Safety
215    ///
216    /// - `ptr` must point to a valid, properly aligned allocation of at
217    ///   least `len` elements of type `T`.
218    /// - The allocation must remain valid until the release callback is invoked
219    ///   (which happens when this `DataBuffer` is dropped).
220    /// - The release callback must correctly notify the external owner.
221    ///
222    /// # Examples
223    ///
224    /// ```ignore
225    /// use tenferro_tensor::DataBuffer;
226    ///
227    /// let data = vec![1.0, 2.0, 3.0];
228    /// let ptr = data.as_ptr();
229    /// let len = data.len();
230    /// let buf = unsafe {
231    ///     DataBuffer::from_external(ptr, len, move || drop(data))
232    /// };
233    /// assert!(!buf.is_owned());
234    /// ```
235    pub unsafe fn from_external(
236        ptr: *const T,
237        len: usize,
238        release: impl FnOnce() + Send + 'static,
239    ) -> Self {
240        DataBuffer {
241            inner: BufferInner::External {
242                ptr,
243                len,
244                release: Some(Box::new(release)),
245            },
246        }
247    }
248
249    /// Returns the raw data as a slice.
250    pub fn as_slice(&self) -> &[T] {
251        match &self.inner {
252            BufferInner::Owned(v) => v.as_slice(),
253            BufferInner::External { ptr, len, .. } => unsafe {
254                std::slice::from_raw_parts(*ptr, *len)
255            },
256        }
257    }
258
259    /// Returns the raw data as a mutable slice, if Rust-owned.
260    ///
261    /// Returns `None` for externally-owned buffers (they are read-only
262    /// through tenferro).
263    pub fn as_mut_slice(&mut self) -> Option<&mut [T]> {
264        match &mut self.inner {
265            BufferInner::Owned(v) => Some(v.as_mut_slice()),
266            BufferInner::External { .. } => None,
267        }
268    }
269
270    /// Returns the number of elements in the buffer.
271    pub fn len(&self) -> usize {
272        match &self.inner {
273            BufferInner::Owned(v) => v.len(),
274            BufferInner::External { len, .. } => *len,
275        }
276    }
277
278    /// Returns `true` if the buffer has no elements.
279    pub fn is_empty(&self) -> bool {
280        self.len() == 0
281    }
282
283    /// Returns `true` if the buffer is Rust-owned (backed by `Vec<T>`).
284    pub fn is_owned(&self) -> bool {
285        matches!(self.inner, BufferInner::Owned(_))
286    }
287
288    /// Returns a raw pointer to the data.
289    pub fn as_ptr(&self) -> *const T {
290        match &self.inner {
291            BufferInner::Owned(v) => v.as_ptr(),
292            BufferInner::External { ptr, .. } => *ptr,
293        }
294    }
295}
296
297// ============================================================================
298// Tensor<T>
299// ============================================================================
300
301/// Multi-dimensional dense tensor.
302///
303/// `Tensor<T>` is the primary data type in tenferro. It owns its data via
304/// [`DataBuffer`] and carries shape, strides, and memory space information.
305///
306/// ## Zero-copy views
307///
308/// Operations like [`permute`](Tensor::permute), [`broadcast`](Tensor::broadcast),
309/// and [`diagonal`](Tensor::diagonal) return new `Tensor` values that share the
310/// same underlying data buffer, modifying only the dims/strides/offset metadata.
311///
312/// ## Accessing raw data
313///
314/// Use [`DataBuffer::as_slice`] via [`Tensor::buffer`] combined with
315/// [`dims`](Tensor::dims), [`strides`](Tensor::strides), and
316/// [`offset`](Tensor::offset) to construct backend-specific views
317/// (e.g., `StridedView` in `tenferro-prims`).
318///
319/// ## GPU async support
320///
321/// The `event` field tracks pending GPU computation via
322/// [`CompletionEvent`]. When a GPU operation produces a tensor, `event`
323/// is set to `Some(...)`. Passing this tensor to another GPU operation
324/// chains via stream dependencies without CPU synchronization. Methods
325/// that access data from CPU call [`wait`](Tensor::wait) internally.
326/// For CPU tensors, `event` is always `None` with zero overhead.
327///
328/// See `tenferro-einsum` crate docs for async chaining examples.
329pub struct Tensor<T: Scalar> {
330    buffer: DataBuffer<T>,
331    dims: Vec<usize>,
332    strides: Vec<isize>,
333    offset: isize,
334    /// The logical memory space where this tensor's data resides.
335    logical_memory_space: LogicalMemorySpace,
336    /// Optional preferred compute device override.
337    preferred_compute_device: Option<ComputeDevice>,
338    /// Pending GPU computation event.
339    event: Option<CompletionEvent>,
340}
341
342/// Borrowed tensor view, lifetime-tied to the source [`Tensor`].
343///
344/// `TensorView` is the borrowed counterpart to [`Tensor`], following the
345/// `String`/`&str` pattern. It references the source tensor's data buffer
346/// without copying.
347///
348/// ## Public vs. internal views
349///
350/// Public API methods ([`Tensor::tensor_view`], etc.) call
351/// [`Tensor::wait`] before constructing a view, so the returned
352/// `TensorView` always has `event = None` — data is ready to read.
353///
354/// The crate-internal `as_operand_view()` skips the wait and
355/// propagates the pending event, allowing accelerator operations to chain
356/// without CPU synchronization.
357pub struct TensorView<'a, T: Scalar> {
358    data: &'a DataBuffer<T>,
359    dims: Vec<usize>,
360    strides: Vec<isize>,
361    offset: isize,
362    /// The logical memory space where the source tensor's data resides.
363    logical_memory_space: LogicalMemorySpace,
364    /// Optional preferred compute device override from the source tensor.
365    preferred_compute_device: Option<ComputeDevice>,
366    /// Pending event from the source tensor. Always `None` in public API.
367    event: Option<&'a CompletionEvent>,
368}
369
370impl<'a, T: Scalar> TensorView<'a, T> {
371    /// Returns the shape (size of each dimension).
372    pub fn dims(&self) -> &[usize] {
373        &self.dims
374    }
375
376    /// Returns the strides (in units of `T`).
377    pub fn strides(&self) -> &[isize] {
378        &self.strides
379    }
380
381    /// Returns the number of dimensions (rank).
382    pub fn ndim(&self) -> usize {
383        self.dims.len()
384    }
385
386    /// Returns the logical memory space where the source tensor's data resides.
387    pub fn logical_memory_space(&self) -> LogicalMemorySpace {
388        self.logical_memory_space
389    }
390
391    /// Returns the preferred compute device override, if set.
392    pub fn preferred_compute_device(&self) -> Option<ComputeDevice> {
393        self.preferred_compute_device
394    }
395
396    /// Returns a reference to the underlying data buffer.
397    pub fn buffer(&self) -> &DataBuffer<T> {
398        self.data
399    }
400
401    /// Returns the element offset into the data buffer.
402    pub fn offset(&self) -> isize {
403        self.offset
404    }
405
406    // ========================================================================
407    // View operations (zero-copy)
408    // ========================================================================
409
410    /// Permute (reorder) the dimensions of this view.
411    ///
412    /// Returns a new `TensorView` with reordered dims and strides (zero-copy).
413    ///
414    /// # Errors
415    ///
416    /// Returns an error if `perm` is not a valid permutation of `0..ndim()`.
417    pub fn permute(&self, _perm: &[usize]) -> Result<TensorView<'a, T>> {
418        todo!()
419    }
420
421    /// Broadcast this view to a larger shape.
422    ///
423    /// Dimensions of size 1 are expanded to the target size (zero-copy
424    /// via stride 0).
425    ///
426    /// # Errors
427    ///
428    /// Returns an error if `target_dims` is incompatible with the current shape.
429    pub fn broadcast(&self, _target_dims: &[usize]) -> Result<TensorView<'a, T>> {
430        todo!()
431    }
432
433    /// Extract a diagonal view by merging pairs of axes.
434    ///
435    /// # Errors
436    ///
437    /// Returns an error if any axis is out of range or paired dimensions
438    /// have different sizes.
439    pub fn diagonal(&self, _axes: &[(usize, usize)]) -> Result<TensorView<'a, T>> {
440        todo!()
441    }
442
443    /// Select a single index along a dimension, removing that dimension.
444    ///
445    /// Returns a view with `ndim() - 1` dimensions. Zero-copy: adjusts
446    /// offset and removes the selected dimension from dims/strides.
447    ///
448    /// # Errors
449    ///
450    /// Returns an error if `dim >= ndim()` or `index >= dims()[dim]`.
451    ///
452    /// # Examples
453    ///
454    /// ```ignore
455    /// use tenferro_tensor::{Tensor, MemoryOrder};
456    /// use tenferro_device::LogicalMemorySpace;
457    ///
458    /// let a = Tensor::<f64>::zeros(&[3, 4, 10],
459    ///     LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
460    /// let tv = a.tensor_view();
461    /// // Select batch index 5 → view of shape [3, 4]
462    /// let mat = tv.select(2, 5).unwrap();
463    /// assert_eq!(mat.dims(), &[3, 4]);
464    /// ```
465    pub fn select(&self, _dim: usize, _index: usize) -> Result<TensorView<'a, T>> {
466        todo!()
467    }
468
469    /// Narrow (slice) a dimension to a sub-range.
470    ///
471    /// Returns a view with the same number of dimensions, but
472    /// `dims()[dim]` reduced to `length`. Zero-copy: only offset and
473    /// dim size change.
474    ///
475    /// # Errors
476    ///
477    /// Returns an error if `dim >= ndim()` or `start + length > dims()[dim]`.
478    ///
479    /// # Examples
480    ///
481    /// ```ignore
482    /// use tenferro_tensor::{Tensor, MemoryOrder};
483    /// use tenferro_device::LogicalMemorySpace;
484    ///
485    /// let a = Tensor::<f64>::zeros(&[3, 10],
486    ///     LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
487    /// let tv = a.tensor_view();
488    /// // Take columns 2..5 → view of shape [3, 3]
489    /// let sub = tv.narrow(1, 2, 3).unwrap();
490    /// assert_eq!(sub.dims(), &[3, 3]);
491    /// ```
492    pub fn narrow(&self, _dim: usize, _start: usize, _length: usize) -> Result<TensorView<'a, T>> {
493        todo!()
494    }
495
496    // ========================================================================
497    // Materialize (copy data into a new owned Tensor)
498    // ========================================================================
499
500    /// Copy this view into an owned [`Tensor`].
501    pub fn to_tensor(&self, _order: MemoryOrder) -> Tensor<T> {
502        todo!()
503    }
504
505    /// Return a contiguous copy of this view's data.
506    pub fn contiguous(&self, _order: MemoryOrder) -> Tensor<T> {
507        todo!()
508    }
509
510    /// Return a tensor with complex-conjugated elements from this view.
511    ///
512    /// For real types, returns a copy unchanged.
513    pub fn conj(&self) -> Tensor<T>
514    where
515        T: Conjugate,
516    {
517        todo!()
518    }
519}
520
521/// Placeholder for an accelerator synchronization event.
522///
523/// Tracks completion of asynchronous operations on accelerator devices
524/// (GPU, FPGA, etc.), enabling operation chaining without CPU
525/// synchronization. Will be replaced with an actual implementation
526/// (e.g., CUDA/HIP event handle) when accelerator backends are added.
527#[derive(Clone)]
528pub struct CompletionEvent {
529    _private: (),
530}
531
532impl<T: Scalar> Clone for Tensor<T> {
533    fn clone(&self) -> Self {
534        Self {
535            buffer: self.buffer.clone(),
536            dims: self.dims.clone(),
537            strides: self.strides.clone(),
538            offset: self.offset,
539            logical_memory_space: self.logical_memory_space,
540            preferred_compute_device: self.preferred_compute_device,
541            // Cloned tensor starts with no pending event — the data in the
542            // cloned buffer is a snapshot taken after any pending computation
543            // completes (clone reads the buffer, which requires completion).
544            event: None,
545        }
546    }
547}
548
549impl<T: Scalar> Tensor<T> {
550    // ========================================================================
551    // Constructors
552    // ========================================================================
553
554    /// Create a tensor filled with zeros.
555    ///
556    /// # Arguments
557    ///
558    /// * `dims` — Shape of the tensor (e.g., `&[3, 4]` for a 3×4 matrix)
559    /// * `memory_space` — Logical memory space for the allocation
560    /// * `order` — Memory layout for the new allocation
561    ///
562    /// # Examples
563    ///
564    /// ```ignore
565    /// use tenferro_tensor::{Tensor, MemoryOrder};
566    /// use tenferro_device::LogicalMemorySpace;
567    ///
568    /// let a = Tensor::<f64>::zeros(
569    ///     &[3, 4],
570    ///     LogicalMemorySpace::MainMemory,
571    ///     MemoryOrder::ColumnMajor,
572    /// );
573    /// ```
574    pub fn zeros(_dims: &[usize], _memory_space: LogicalMemorySpace, _order: MemoryOrder) -> Self {
575        todo!()
576    }
577
578    /// Create a tensor filled with ones.
579    ///
580    /// # Arguments
581    ///
582    /// * `dims` — Shape of the tensor
583    /// * `memory_space` — Logical memory space for the allocation
584    /// * `order` — Memory layout for the new allocation
585    pub fn ones(_dims: &[usize], _memory_space: LogicalMemorySpace, _order: MemoryOrder) -> Self {
586        todo!()
587    }
588
589    /// Create a tensor from a data slice.
590    ///
591    /// The slice length must equal the product of `dims`.
592    /// Data is copied into owned storage with the specified memory order.
593    /// Memory space is set to [`LogicalMemorySpace::MainMemory`].
594    ///
595    /// # Errors
596    ///
597    /// Returns an error if `data.len()` does not match the product of `dims`.
598    pub fn from_slice(_data: &[T], _dims: &[usize], _order: MemoryOrder) -> Result<Self> {
599        todo!()
600    }
601
602    /// Create a tensor from an owned `Vec<T>` with explicit layout.
603    ///
604    /// Takes ownership of the data. The caller specifies the dims, strides,
605    /// and offset that describe how the data is laid out.
606    ///
607    /// # Errors
608    ///
609    /// Returns an error if the layout is inconsistent with the data length.
610    ///
611    /// # Examples
612    ///
613    /// ```ignore
614    /// use tenferro_tensor::Tensor;
615    ///
616    /// // 2×3 column-major: strides [1, 2], offset 0
617    /// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
618    /// let t = Tensor::<f64>::from_vec(data, &[2, 3], &[1, 2], 0).unwrap();
619    /// ```
620    pub fn from_vec(
621        _data: Vec<T>,
622        _dims: &[usize],
623        _strides: &[isize],
624        _offset: isize,
625    ) -> Result<Self> {
626        todo!()
627    }
628
629    /// Create an identity matrix.
630    ///
631    /// Returns a 2D tensor of shape `[n, n]` with ones on the diagonal
632    /// and zeros elsewhere.
633    ///
634    /// # Examples
635    ///
636    /// ```ignore
637    /// use tenferro_tensor::{Tensor, MemoryOrder};
638    /// use tenferro_device::LogicalMemorySpace;
639    ///
640    /// let id = Tensor::<f64>::eye(3,
641    ///     LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
642    /// assert_eq!(id.dims(), &[3, 3]);
643    /// ```
644    pub fn eye(_n: usize, _memory_space: LogicalMemorySpace, _order: MemoryOrder) -> Self {
645        todo!()
646    }
647
648    // ========================================================================
649    // Metadata
650    // ========================================================================
651
652    /// Returns the shape (size of each dimension).
653    pub fn dims(&self) -> &[usize] {
654        &self.dims
655    }
656
657    /// Returns the strides (in units of `T`).
658    pub fn strides(&self) -> &[isize] {
659        &self.strides
660    }
661
662    /// Returns the element offset into the data buffer.
663    pub fn offset(&self) -> isize {
664        self.offset
665    }
666
667    /// Returns a reference to the underlying data buffer.
668    pub fn buffer(&self) -> &DataBuffer<T> {
669        &self.buffer
670    }
671
672    /// Returns a mutable reference to the underlying data buffer.
673    pub fn buffer_mut(&mut self) -> &mut DataBuffer<T> {
674        &mut self.buffer
675    }
676
677    /// Returns the number of dimensions (rank).
678    pub fn ndim(&self) -> usize {
679        self.dims.len()
680    }
681
682    /// Returns the total number of elements.
683    pub fn len(&self) -> usize {
684        todo!()
685    }
686
687    /// Returns `true` if the tensor has zero elements.
688    pub fn is_empty(&self) -> bool {
689        todo!()
690    }
691
692    /// Returns the logical memory space where this tensor's data resides.
693    pub fn logical_memory_space(&self) -> LogicalMemorySpace {
694        self.logical_memory_space
695    }
696
697    /// Returns the preferred compute device override, if set.
698    pub fn preferred_compute_device(&self) -> Option<ComputeDevice> {
699        self.preferred_compute_device
700    }
701
702    /// Set the preferred compute device override.
703    ///
704    /// When set, this device will be used for operations on this tensor
705    /// instead of the default device selected by
706    /// [`preferred_compute_devices`](tenferro_device::preferred_compute_devices).
707    /// Pass `None` to clear the override and revert to automatic selection.
708    pub fn set_preferred_compute_device(&mut self, device: Option<ComputeDevice>) {
709        self.preferred_compute_device = device;
710    }
711
712    /// Return the effective compute devices for a given operation kind.
713    ///
714    /// If a preferred compute device is set, returns a single-element vector
715    /// containing that device. Otherwise, delegates to
716    /// [`preferred_compute_devices`](tenferro_device::preferred_compute_devices).
717    ///
718    /// # Errors
719    ///
720    /// Returns an error if no compatible compute device is found.
721    pub fn effective_compute_devices(
722        &self,
723        _op_kind: OpKind,
724    ) -> tenferro_device::Result<Vec<ComputeDevice>> {
725        todo!()
726    }
727
728    // ========================================================================
729    // View operations (zero-copy, public API waits if pending)
730    // ========================================================================
731
732    /// Returns a [`TensorView`] for data inspection.
733    ///
734    /// Waits for any pending accelerator computation before returning.
735    /// The returned view has `event = None` (data is ready to read).
736    pub fn tensor_view(&self) -> TensorView<'_, T> {
737        self.wait();
738        TensorView {
739            data: &self.buffer,
740            dims: self.dims.clone(),
741            strides: self.strides.clone(),
742            offset: self.offset,
743            logical_memory_space: self.logical_memory_space,
744            preferred_compute_device: self.preferred_compute_device,
745            event: None,
746        }
747    }
748
749    /// Returns a non-blocking [`TensorView`] that propagates the
750    /// pending event (if any) from the source tensor.
751    ///
752    /// This is an internal API used by `einsum` and other accelerator
753    /// operations to chain computations without CPU synchronization.
754    pub(crate) fn as_operand_view(&self) -> TensorView<'_, T> {
755        TensorView {
756            data: &self.buffer,
757            dims: self.dims.clone(),
758            strides: self.strides.clone(),
759            offset: self.offset,
760            logical_memory_space: self.logical_memory_space,
761            preferred_compute_device: self.preferred_compute_device,
762            event: self.event.as_ref(),
763        }
764    }
765
766    /// Permute (reorder) the dimensions of the tensor.
767    ///
768    /// This is a zero-copy operation that only modifies dims and strides.
769    /// Waits for any pending accelerator computation before returning.
770    ///
771    /// # Arguments
772    ///
773    /// * `perm` — Permutation of dimension indices (e.g., `&[1, 0]` to transpose)
774    ///
775    /// # Errors
776    ///
777    /// Returns an error if `perm` is not a valid permutation of `0..ndim()`.
778    pub fn permute(&self, _perm: &[usize]) -> Result<Tensor<T>> {
779        todo!()
780    }
781
782    /// Broadcast the tensor to a larger shape.
783    ///
784    /// Dimensions of size 1 are expanded to the target size (zero-copy via
785    /// stride 0). This is a zero-copy metadata operation.
786    ///
787    /// # Errors
788    ///
789    /// Returns an error if `target_dims` is incompatible with the current shape.
790    pub fn broadcast(&self, _target_dims: &[usize]) -> Result<Tensor<T>> {
791        todo!()
792    }
793
794    /// Extract a diagonal view by merging pairs of axes.
795    ///
796    /// For each `(axis_i, axis_j)` pair, the two dimensions are replaced
797    /// by a single diagonal dimension. This is a zero-copy stride trick.
798    ///
799    /// # Errors
800    ///
801    /// Returns an error if any axis is out of range or the paired
802    /// dimensions have different sizes.
803    pub fn diagonal(&self, _axes: &[(usize, usize)]) -> Result<Tensor<T>> {
804        todo!()
805    }
806
807    /// Reshape the tensor to a new shape.
808    ///
809    /// The total number of elements must remain the same.
810    /// Requires contiguous data; returns an error if the tensor is not contiguous.
811    ///
812    /// # Errors
813    ///
814    /// Returns an error if the tensor is not contiguous or the new shape
815    /// has a different total element count.
816    pub fn reshape(&self, _new_dims: &[usize]) -> Result<Tensor<T>> {
817        todo!()
818    }
819
820    /// Select a single index along a dimension, removing that dimension.
821    ///
822    /// Returns a tensor with `ndim() - 1` dimensions. This is a zero-copy
823    /// operation that adjusts the offset and removes the selected dimension.
824    ///
825    /// # Errors
826    ///
827    /// Returns an error if `dim >= ndim()` or `index >= dims()[dim]`.
828    ///
829    /// # Examples
830    ///
831    /// ```ignore
832    /// use tenferro_tensor::{Tensor, MemoryOrder};
833    /// use tenferro_device::LogicalMemorySpace;
834    ///
835    /// // Batched matrices [m, n, batch] = [3, 4, 10]
836    /// let a = Tensor::<f64>::zeros(&[3, 4, 10],
837    ///     LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
838    /// // Select batch index 5 → [3, 4]
839    /// let mat = a.select(2, 5).unwrap();
840    /// assert_eq!(mat.dims(), &[3, 4]);
841    /// ```
842    pub fn select(&self, _dim: usize, _index: usize) -> Result<Tensor<T>> {
843        todo!()
844    }
845
846    /// Narrow (slice) a dimension to a sub-range.
847    ///
848    /// Returns a tensor with the same number of dimensions, but
849    /// `dims()[dim]` reduced to `length`. Zero-copy: only offset and
850    /// dim size change.
851    ///
852    /// # Errors
853    ///
854    /// Returns an error if `dim >= ndim()` or `start + length > dims()[dim]`.
855    ///
856    /// # Examples
857    ///
858    /// ```ignore
859    /// use tenferro_tensor::{Tensor, MemoryOrder};
860    /// use tenferro_device::LogicalMemorySpace;
861    ///
862    /// let a = Tensor::<f64>::zeros(&[3, 10],
863    ///     LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
864    /// // Take columns 2..5 → [3, 3]
865    /// let sub = a.narrow(1, 2, 3).unwrap();
866    /// assert_eq!(sub.dims(), &[3, 3]);
867    /// ```
868    pub fn narrow(&self, _dim: usize, _start: usize, _length: usize) -> Result<Tensor<T>> {
869        todo!()
870    }
871
872    // ========================================================================
873    // Data operations
874    // ========================================================================
875
876    /// Return a contiguous copy of this tensor in the given memory order.
877    ///
878    /// If the tensor is already contiguous in the requested order,
879    /// this may avoid copying (implementation-defined).
880    pub fn contiguous(&self, _order: MemoryOrder) -> Tensor<T> {
881        todo!()
882    }
883
884    /// Consume this tensor and return a contiguous version.
885    ///
886    /// If the tensor is already contiguous in the requested order, returns
887    /// `self` without copying or allocating. Otherwise, copies data into a
888    /// new contiguous buffer.
889    ///
890    /// Prefer this over [`contiguous`](Tensor::contiguous) when you no
891    /// longer need the original tensor, as it avoids unnecessary allocation
892    /// and reference-count overhead.
893    ///
894    /// # Examples
895    ///
896    /// ```ignore
897    /// use tenferro_tensor::{Tensor, MemoryOrder};
898    /// use tenferro_device::LogicalMemorySpace;
899    ///
900    /// let a = Tensor::<f64>::zeros(
901    ///     &[3, 4],
902    ///     LogicalMemorySpace::MainMemory,
903    ///     MemoryOrder::ColumnMajor,
904    /// );
905    ///
906    /// // Transpose creates a non-contiguous view
907    /// let at = a.permute(&[1, 0]).unwrap();
908    /// assert!(!at.is_contiguous());
909    ///
910    /// // into_contiguous copies only when necessary
911    /// let at_contig = at.into_contiguous(MemoryOrder::ColumnMajor);
912    /// assert!(at_contig.is_contiguous());
913    ///
914    /// // Already contiguous: zero-cost passthrough
915    /// let b = Tensor::<f64>::zeros(
916    ///     &[3, 4],
917    ///     LogicalMemorySpace::MainMemory,
918    ///     MemoryOrder::RowMajor,
919    /// );
920    /// let b2 = b.into_contiguous(MemoryOrder::RowMajor); // no copy
921    /// ```
922    pub fn into_contiguous(self, _order: MemoryOrder) -> Tensor<T> {
923        todo!()
924    }
925
926    /// Returns `true` if the tensor data is contiguous in memory.
927    ///
928    /// A tensor is contiguous if its elements occupy a dense block of
929    /// memory with no gaps, in either column-major or row-major order.
930    pub fn is_contiguous(&self) -> bool {
931        todo!()
932    }
933
934    /// Return a tensor with complex-conjugated elements.
935    ///
936    /// For real types (`f32`, `f64`), returns a copy unchanged.
937    /// For complex types (`Complex32`, `Complex64`), negates the imaginary part.
938    ///
939    /// # Examples
940    ///
941    /// ```ignore
942    /// use tenferro_tensor::{Tensor, MemoryOrder};
943    /// use num_complex::Complex64;
944    ///
945    /// let data = vec![Complex64::new(1.0, 2.0), Complex64::new(3.0, -4.0)];
946    /// let a = Tensor::from_slice(&data, &[2], MemoryOrder::ColumnMajor).unwrap();
947    /// let a_conj = a.conj();
948    /// // a_conj contains [1.0 - 2.0i, 3.0 + 4.0i]
949    /// ```
950    pub fn conj(&self) -> Tensor<T>
951    where
952        T: Conjugate,
953    {
954        // Conjugation is element-wise and position-independent,
955        // so we conjugate the raw buffer directly and preserve layout.
956        let conj_data: Vec<T> = self
957            .buffer
958            .as_slice()
959            .iter()
960            .copied()
961            .map(T::conj)
962            .collect();
963        Tensor {
964            buffer: DataBuffer::from_vec(conj_data),
965            dims: self.dims.clone(),
966            strides: self.strides.clone(),
967            offset: self.offset,
968            logical_memory_space: self.logical_memory_space,
969            preferred_compute_device: self.preferred_compute_device,
970            event: None,
971        }
972    }
973
974    /// Consume this tensor and return one with complex-conjugated elements.
975    ///
976    /// Like [`conj`](Tensor::conj) but consumes `self`, potentially
977    /// reusing the buffer if no other references exist.
978    pub fn into_conj(self) -> Tensor<T>
979    where
980        T: Conjugate,
981    {
982        todo!()
983    }
984
985    /// Extract the lower triangular part of a matrix.
986    ///
987    /// Returns a new tensor with elements above the `diagonal`-th diagonal
988    /// set to zero. For batched tensors `(m, n, *)`, applies independently
989    /// to each batch element.
990    ///
991    /// - `diagonal = 0`: main diagonal (default)
992    /// - `diagonal > 0`: above main diagonal
993    /// - `diagonal < 0`: below main diagonal
994    ///
995    /// # Examples
996    ///
997    /// ```ignore
998    /// use tenferro_tensor::{Tensor, MemoryOrder};
999    /// use tenferro_device::LogicalMemorySpace;
1000    ///
1001    /// let a = Tensor::<f64>::ones(&[3, 3],
1002    ///     LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
1003    /// let lower = a.tril(0);
1004    /// // [[1, 0, 0],
1005    /// //  [1, 1, 0],
1006    /// //  [1, 1, 1]]
1007    /// ```
1008    pub fn tril(&self, _diagonal: isize) -> Tensor<T> {
1009        todo!()
1010    }
1011
1012    /// Extract the upper triangular part of a matrix.
1013    ///
1014    /// Returns a new tensor with elements below the `diagonal`-th diagonal
1015    /// set to zero. For batched tensors `(m, n, *)`, applies independently
1016    /// to each batch element.
1017    ///
1018    /// - `diagonal = 0`: main diagonal (default)
1019    /// - `diagonal > 0`: above main diagonal
1020    /// - `diagonal < 0`: below main diagonal
1021    ///
1022    /// # Examples
1023    ///
1024    /// ```ignore
1025    /// use tenferro_tensor::{Tensor, MemoryOrder};
1026    /// use tenferro_device::LogicalMemorySpace;
1027    ///
1028    /// let a = Tensor::<f64>::ones(&[3, 3],
1029    ///     LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
1030    /// let upper = a.triu(0);
1031    /// // [[1, 1, 1],
1032    /// //  [0, 1, 1],
1033    /// //  [0, 0, 1]]
1034    /// ```
1035    pub fn triu(&self, _diagonal: isize) -> Tensor<T> {
1036        todo!()
1037    }
1038
1039    /// Asynchronously transfer this tensor to a different memory space.
1040    ///
1041    /// Returns a new tensor in the target memory space. If the source
1042    /// and destination spaces are the same, returns a zero-copy no-op.
1043    /// Otherwise, data is copied (potentially asynchronously for GPU).
1044    ///
1045    /// # Errors
1046    ///
1047    /// Returns an error if the transfer is not supported.
1048    pub fn to_memory_space_async(&self, _target: LogicalMemorySpace) -> Result<Tensor<T>> {
1049        todo!()
1050    }
1051
1052    // ========================================================================
1053    // GPU async support
1054    // ========================================================================
1055
1056    /// Wait for any pending GPU computation to complete.
1057    ///
1058    /// No-op for CPU tensors or when GPU computation has already completed.
1059    /// Methods that access tensor data from CPU call this internally, so
1060    /// explicit calls are only needed when the caller wants to ensure
1061    /// completion at a specific point.
1062    ///
1063    /// # Examples
1064    ///
1065    /// ```ignore
1066    /// // GPU einsum returns immediately with pending event
1067    /// let c = einsum("ij,jk->ik", &[&a_gpu, &b_gpu]).unwrap();
1068    /// assert!(!c.is_ready());
1069    ///
1070    /// // Explicit wait
1071    /// c.wait();
1072    /// assert!(c.is_ready());
1073    ///
1074    /// // Chaining: implicit sync via stream dependencies, no CPU wait
1075    /// let d = einsum("ij,jk->ik", &[&c, &e_gpu]).unwrap();
1076    /// //  → detects c.event → chains on GPU → returns immediately
1077    /// ```
1078    pub fn wait(&self) {
1079        // Currently a no-op: only CPU tensors exist (event is always None).
1080        // Will synchronize on CompletionEvent when GPU backends are implemented.
1081    }
1082
1083    /// Check if tensor data is ready without blocking.
1084    ///
1085    /// Returns `true` for CPU tensors (always ready) and for GPU tensors
1086    /// whose computation has completed. Returns `false` if a GPU operation
1087    /// is still in progress.
1088    pub fn is_ready(&self) -> bool {
1089        self.event.is_none()
1090    }
1091}
1092
1093// ============================================================================
1094// Differentiable impl — connects Tensor<T> to the generic AD framework
1095// ============================================================================
1096
1097impl<T: Scalar> chainrules_core::Differentiable for Tensor<T> {
1098    type Tangent = Tensor<T>;
1099
1100    fn zero_tangent(&self) -> Tensor<T> {
1101        todo!()
1102    }
1103
1104    fn accumulate_tangent(_a: Tensor<T>, _b: &Tensor<T>) -> Tensor<T> {
1105        todo!()
1106    }
1107}
1108
1109// ============================================================================
1110// PhantomData usage for unused type parameter warning suppression
1111// ============================================================================
1112
1113// DataBuffer<T> uses T directly in Vec<T> and *const T, so no PhantomData needed.
1114// This module-level comment documents the design decision.