tenferro_tensor/lib.rs
1//! Dense tensor type with CPU/GPU support.
2//!
3//! This crate provides [`Tensor<T>`], a multi-dimensional array type composed of
4//! shape, strides, and a device-aware [`DataBuffer`]. It supports:
5//!
6//! - **Zero-copy view operations**: [`Tensor::permute`], [`Tensor::broadcast`],
7//! [`Tensor::diagonal`], [`Tensor::select`], [`Tensor::narrow`] modify only
8//! metadata (dims/strides)
9//! - **Data operations**: [`Tensor::contiguous`] / [`Tensor::into_contiguous`] copy
10//! data into a contiguous layout (the consuming variant avoids allocation when
11//! the tensor is already contiguous); [`Tensor::tril`] / [`Tensor::triu`] extract
12//! triangular parts
13//! - **Factory functions**: [`Tensor::zeros`], [`Tensor::ones`], [`Tensor::eye`]
14//! - **DLPack interop**: [`DataBuffer`] supports both Rust-owned (`Vec<T>`) and
15//! externally-owned memory (e.g., imported via DLPack) with automatic cleanup.
16//!
17//! # Memory layout
18//!
19//! [`Tensor`] stores explicit strides and is not tied to any particular memory
20//! order. [`MemoryOrder`] is only used as a parameter when allocating new memory
21//! (e.g., [`Tensor::zeros`], [`Tensor::contiguous`]).
22//!
23//! # No strided-rs dependency
24//!
25//! This crate does **not** depend on `strided-rs`. The strided-rs types
26//! (`StridedView`, `StridedViewMut`) are backend implementation details
27//! used only in `tenferro-prims`. To pass tensor data to prims backends,
28//! use [`DataBuffer::as_slice`] combined with [`Tensor::dims`],
29//! [`Tensor::strides`], and [`Tensor::offset`].
30//!
31//! # Examples
32//!
33//! ## Creating tensors
34//!
35//! ```ignore
36//! use tenferro_tensor::{Tensor, MemoryOrder};
37//! use tenferro_device::LogicalMemorySpace;
38//!
39//! // Zeros / ones
40//! let a = Tensor::<f64>::zeros(&[3, 4], LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
41//! let b = Tensor::<f64>::ones(&[3, 4], LogicalMemorySpace::MainMemory, MemoryOrder::RowMajor);
42//!
43//! // From existing data (column-major: Julia convention)
44//! let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
45//! let m = Tensor::<f64>::from_slice(&data, &[2, 3], MemoryOrder::ColumnMajor).unwrap();
46//! // Logical layout:
47//! // [[1, 3, 5],
48//! // [2, 4, 6]]
49//! ```
50//!
51//! ## Transpose and reshape
52//!
53//! ```ignore
54//! // Transpose a matrix (zero-copy, only strides change)
55//! let mt = m.permute(&[1, 0]).unwrap();
56//! assert_eq!(mt.dims(), &[3, 2]);
57//!
58//! // Reshape (requires contiguous data)
59//! let flat = m.reshape(&[6]).unwrap();
60//! assert_eq!(flat.dims(), &[6]);
61//! ```
62//!
63//! ## Broadcasting
64//!
65//! ```ignore
66//! // Column vector [3,1] broadcast to [3,4] for element-wise ops
67//! let col = Tensor::<f64>::ones(&[3, 1], LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
68//! let expanded = col.broadcast(&[3, 4]).unwrap();
69//! assert_eq!(expanded.dims(), &[3, 4]);
70//! // No data is copied; stride along axis 1 is set to 0
71//! ```
72//!
73//! ## TensorView — borrowed, zero-copy views
74//!
75//! [`TensorView`] is the borrowed counterpart to [`Tensor`], following the
76//! `String` / `&str` pattern. View operations modify only metadata
77//! (dims, strides, offset) and never copy data.
78//!
79//! ```ignore
80//! // tensor_view() borrows the tensor — no data copy
81//! let tv = m.tensor_view();
82//! assert_eq!(tv.dims(), m.dims());
83//!
84//! // permute: reorder dimensions (zero-copy, strides reordered)
85//! let tv_t = tv.permute(&[1, 0]).unwrap();
86//! assert_eq!(tv_t.dims(), &[3, 2]);
87//!
88//! // broadcast: expand size-1 dims (zero-copy, stride set to 0)
89//! let col = Tensor::<f64>::from_slice(&[1.0, 2.0, 3.0], &[3, 1],
90//! MemoryOrder::ColumnMajor).unwrap();
91//! let col_tv = col.tensor_view();
92//! let expanded = col_tv.broadcast(&[3, 4]).unwrap();
93//! assert_eq!(expanded.dims(), &[3, 4]);
94//!
95//! // diagonal: extract diagonal view (zero-copy, strides merged)
96//! let sq = Tensor::<f64>::zeros(&[4, 4],
97//! LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
98//! let sq_tv = sq.tensor_view();
99//! let diag = sq_tv.diagonal(&[(0, 1)]).unwrap();
100//! assert_eq!(diag.dims(), &[4]);
101//!
102//! // to_tensor() / contiguous(): materialize a view into owned Tensor
103//! let owned = tv_t.to_tensor(MemoryOrder::ColumnMajor);
104//! ```
105
106use tenferro_algebra::{Conjugate, Scalar};
107use tenferro_device::{ComputeDevice, LogicalMemorySpace, OpKind, Result};
108
109/// Memory ordering for new allocations.
110///
111/// Specifies how elements are laid out in memory when creating new tensors
112/// or copying data into a contiguous buffer. This is **not** stored on the
113/// tensor itself — the tensor's [`strides`](Tensor::strides) fully describe
114/// the memory layout.
115///
116/// - [`ColumnMajor`](MemoryOrder::ColumnMajor): First dimension is contiguous
117/// (Fortran/Julia convention)
118/// - [`RowMajor`](MemoryOrder::RowMajor): Last dimension is contiguous
119/// (C/NumPy convention)
120#[derive(Debug, Clone, Copy, PartialEq, Eq)]
121pub enum MemoryOrder {
122 /// Column-major (Fortran/Julia order). First dimension has stride 1.
123 ColumnMajor,
124 /// Row-major (C/NumPy order). Last dimension has stride 1.
125 RowMajor,
126}
127
128// ============================================================================
129// DataBuffer — unified owned/external storage
130// ============================================================================
131
132/// Data storage for tensor elements.
133///
134/// Abstracts over ownership: data may be Rust-owned ([`Vec<T>`]) or
135/// externally-owned (e.g., imported via DLPack with a release callback).
136/// Shape and stride metadata are NOT stored here — they live on
137/// [`Tensor<T>`].
138///
139/// # Clone behavior
140///
141/// Cloning an externally-owned buffer performs a **deep copy** into a new
142/// Rust-owned `Vec<T>`. The release callback cannot be cloned; the clone
143/// is always Rust-owned.
144pub struct DataBuffer<T> {
145 inner: BufferInner<T>,
146}
147
148/// Private ownership representation.
149enum BufferInner<T> {
150 /// Rust-owned contiguous data.
151 Owned(Vec<T>),
152 /// Externally-owned data with release callback.
153 External {
154 ptr: *const T,
155 len: usize,
156 /// Called on drop to notify the external owner.
157 release: Option<Box<dyn FnOnce() + Send>>,
158 },
159}
160
161// Safety: External buffer pointers are treated as Send/Sync since
162// the external framework guarantees the data is valid for the lifetime
163// of the DataBuffer. The release callback is Send.
164unsafe impl<T: Send> Send for DataBuffer<T> {}
165unsafe impl<T: Sync> Sync for DataBuffer<T> {}
166
167impl<T: Copy> Clone for DataBuffer<T> {
168 fn clone(&self) -> Self {
169 match &self.inner {
170 BufferInner::Owned(v) => DataBuffer {
171 inner: BufferInner::Owned(v.clone()),
172 },
173 // Deep copy: can't clone the release callback.
174 BufferInner::External { ptr, len, .. } => {
175 let slice = unsafe { std::slice::from_raw_parts(*ptr, *len) };
176 DataBuffer {
177 inner: BufferInner::Owned(slice.to_vec()),
178 }
179 }
180 }
181 }
182}
183
184impl<T> Drop for DataBuffer<T> {
185 fn drop(&mut self) {
186 if let BufferInner::External { release, .. } = &mut self.inner {
187 if let Some(f) = release.take() {
188 f();
189 }
190 }
191 }
192}
193
194impl<T> DataBuffer<T> {
195 /// Create a buffer from an owned `Vec<T>`.
196 ///
197 /// # Examples
198 ///
199 /// ```ignore
200 /// use tenferro_tensor::DataBuffer;
201 ///
202 /// let buf = DataBuffer::from_vec(vec![1.0, 2.0, 3.0]);
203 /// assert_eq!(buf.len(), 3);
204 /// assert!(buf.is_owned());
205 /// ```
206 pub fn from_vec(v: Vec<T>) -> Self {
207 DataBuffer {
208 inner: BufferInner::Owned(v),
209 }
210 }
211
212 /// Create a buffer from externally-owned data with a release callback.
213 ///
214 /// # Safety
215 ///
216 /// - `ptr` must point to a valid, properly aligned allocation of at
217 /// least `len` elements of type `T`.
218 /// - The allocation must remain valid until the release callback is invoked
219 /// (which happens when this `DataBuffer` is dropped).
220 /// - The release callback must correctly notify the external owner.
221 ///
222 /// # Examples
223 ///
224 /// ```ignore
225 /// use tenferro_tensor::DataBuffer;
226 ///
227 /// let data = vec![1.0, 2.0, 3.0];
228 /// let ptr = data.as_ptr();
229 /// let len = data.len();
230 /// let buf = unsafe {
231 /// DataBuffer::from_external(ptr, len, move || drop(data))
232 /// };
233 /// assert!(!buf.is_owned());
234 /// ```
235 pub unsafe fn from_external(
236 ptr: *const T,
237 len: usize,
238 release: impl FnOnce() + Send + 'static,
239 ) -> Self {
240 DataBuffer {
241 inner: BufferInner::External {
242 ptr,
243 len,
244 release: Some(Box::new(release)),
245 },
246 }
247 }
248
249 /// Returns the raw data as a slice.
250 pub fn as_slice(&self) -> &[T] {
251 match &self.inner {
252 BufferInner::Owned(v) => v.as_slice(),
253 BufferInner::External { ptr, len, .. } => unsafe {
254 std::slice::from_raw_parts(*ptr, *len)
255 },
256 }
257 }
258
259 /// Returns the raw data as a mutable slice, if Rust-owned.
260 ///
261 /// Returns `None` for externally-owned buffers (they are read-only
262 /// through tenferro).
263 pub fn as_mut_slice(&mut self) -> Option<&mut [T]> {
264 match &mut self.inner {
265 BufferInner::Owned(v) => Some(v.as_mut_slice()),
266 BufferInner::External { .. } => None,
267 }
268 }
269
270 /// Returns the number of elements in the buffer.
271 pub fn len(&self) -> usize {
272 match &self.inner {
273 BufferInner::Owned(v) => v.len(),
274 BufferInner::External { len, .. } => *len,
275 }
276 }
277
278 /// Returns `true` if the buffer has no elements.
279 pub fn is_empty(&self) -> bool {
280 self.len() == 0
281 }
282
283 /// Returns `true` if the buffer is Rust-owned (backed by `Vec<T>`).
284 pub fn is_owned(&self) -> bool {
285 matches!(self.inner, BufferInner::Owned(_))
286 }
287
288 /// Returns a raw pointer to the data.
289 pub fn as_ptr(&self) -> *const T {
290 match &self.inner {
291 BufferInner::Owned(v) => v.as_ptr(),
292 BufferInner::External { ptr, .. } => *ptr,
293 }
294 }
295}
296
297// ============================================================================
298// Tensor<T>
299// ============================================================================
300
301/// Multi-dimensional dense tensor.
302///
303/// `Tensor<T>` is the primary data type in tenferro. It owns its data via
304/// [`DataBuffer`] and carries shape, strides, and memory space information.
305///
306/// ## Zero-copy views
307///
308/// Operations like [`permute`](Tensor::permute), [`broadcast`](Tensor::broadcast),
309/// and [`diagonal`](Tensor::diagonal) return new `Tensor` values that share the
310/// same underlying data buffer, modifying only the dims/strides/offset metadata.
311///
312/// ## Accessing raw data
313///
314/// Use [`DataBuffer::as_slice`] via [`Tensor::buffer`] combined with
315/// [`dims`](Tensor::dims), [`strides`](Tensor::strides), and
316/// [`offset`](Tensor::offset) to construct backend-specific views
317/// (e.g., `StridedView` in `tenferro-prims`).
318///
319/// ## GPU async support
320///
321/// The `event` field tracks pending GPU computation via
322/// [`CompletionEvent`]. When a GPU operation produces a tensor, `event`
323/// is set to `Some(...)`. Passing this tensor to another GPU operation
324/// chains via stream dependencies without CPU synchronization. Methods
325/// that access data from CPU call [`wait`](Tensor::wait) internally.
326/// For CPU tensors, `event` is always `None` with zero overhead.
327///
328/// See `tenferro-einsum` crate docs for async chaining examples.
329pub struct Tensor<T: Scalar> {
330 buffer: DataBuffer<T>,
331 dims: Vec<usize>,
332 strides: Vec<isize>,
333 offset: isize,
334 /// The logical memory space where this tensor's data resides.
335 logical_memory_space: LogicalMemorySpace,
336 /// Optional preferred compute device override.
337 preferred_compute_device: Option<ComputeDevice>,
338 /// Pending GPU computation event.
339 event: Option<CompletionEvent>,
340}
341
342/// Borrowed tensor view, lifetime-tied to the source [`Tensor`].
343///
344/// `TensorView` is the borrowed counterpart to [`Tensor`], following the
345/// `String`/`&str` pattern. It references the source tensor's data buffer
346/// without copying.
347///
348/// ## Public vs. internal views
349///
350/// Public API methods ([`Tensor::tensor_view`], etc.) call
351/// [`Tensor::wait`] before constructing a view, so the returned
352/// `TensorView` always has `event = None` — data is ready to read.
353///
354/// The crate-internal `as_operand_view()` skips the wait and
355/// propagates the pending event, allowing accelerator operations to chain
356/// without CPU synchronization.
357pub struct TensorView<'a, T: Scalar> {
358 data: &'a DataBuffer<T>,
359 dims: Vec<usize>,
360 strides: Vec<isize>,
361 offset: isize,
362 /// The logical memory space where the source tensor's data resides.
363 logical_memory_space: LogicalMemorySpace,
364 /// Optional preferred compute device override from the source tensor.
365 preferred_compute_device: Option<ComputeDevice>,
366 /// Pending event from the source tensor. Always `None` in public API.
367 event: Option<&'a CompletionEvent>,
368}
369
370impl<'a, T: Scalar> TensorView<'a, T> {
371 /// Returns the shape (size of each dimension).
372 pub fn dims(&self) -> &[usize] {
373 &self.dims
374 }
375
376 /// Returns the strides (in units of `T`).
377 pub fn strides(&self) -> &[isize] {
378 &self.strides
379 }
380
381 /// Returns the number of dimensions (rank).
382 pub fn ndim(&self) -> usize {
383 self.dims.len()
384 }
385
386 /// Returns the logical memory space where the source tensor's data resides.
387 pub fn logical_memory_space(&self) -> LogicalMemorySpace {
388 self.logical_memory_space
389 }
390
391 /// Returns the preferred compute device override, if set.
392 pub fn preferred_compute_device(&self) -> Option<ComputeDevice> {
393 self.preferred_compute_device
394 }
395
396 /// Returns a reference to the underlying data buffer.
397 pub fn buffer(&self) -> &DataBuffer<T> {
398 self.data
399 }
400
401 /// Returns the element offset into the data buffer.
402 pub fn offset(&self) -> isize {
403 self.offset
404 }
405
406 // ========================================================================
407 // View operations (zero-copy)
408 // ========================================================================
409
410 /// Permute (reorder) the dimensions of this view.
411 ///
412 /// Returns a new `TensorView` with reordered dims and strides (zero-copy).
413 ///
414 /// # Errors
415 ///
416 /// Returns an error if `perm` is not a valid permutation of `0..ndim()`.
417 pub fn permute(&self, _perm: &[usize]) -> Result<TensorView<'a, T>> {
418 todo!()
419 }
420
421 /// Broadcast this view to a larger shape.
422 ///
423 /// Dimensions of size 1 are expanded to the target size (zero-copy
424 /// via stride 0).
425 ///
426 /// # Errors
427 ///
428 /// Returns an error if `target_dims` is incompatible with the current shape.
429 pub fn broadcast(&self, _target_dims: &[usize]) -> Result<TensorView<'a, T>> {
430 todo!()
431 }
432
433 /// Extract a diagonal view by merging pairs of axes.
434 ///
435 /// # Errors
436 ///
437 /// Returns an error if any axis is out of range or paired dimensions
438 /// have different sizes.
439 pub fn diagonal(&self, _axes: &[(usize, usize)]) -> Result<TensorView<'a, T>> {
440 todo!()
441 }
442
443 /// Select a single index along a dimension, removing that dimension.
444 ///
445 /// Returns a view with `ndim() - 1` dimensions. Zero-copy: adjusts
446 /// offset and removes the selected dimension from dims/strides.
447 ///
448 /// # Errors
449 ///
450 /// Returns an error if `dim >= ndim()` or `index >= dims()[dim]`.
451 ///
452 /// # Examples
453 ///
454 /// ```ignore
455 /// use tenferro_tensor::{Tensor, MemoryOrder};
456 /// use tenferro_device::LogicalMemorySpace;
457 ///
458 /// let a = Tensor::<f64>::zeros(&[3, 4, 10],
459 /// LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
460 /// let tv = a.tensor_view();
461 /// // Select batch index 5 → view of shape [3, 4]
462 /// let mat = tv.select(2, 5).unwrap();
463 /// assert_eq!(mat.dims(), &[3, 4]);
464 /// ```
465 pub fn select(&self, _dim: usize, _index: usize) -> Result<TensorView<'a, T>> {
466 todo!()
467 }
468
469 /// Narrow (slice) a dimension to a sub-range.
470 ///
471 /// Returns a view with the same number of dimensions, but
472 /// `dims()[dim]` reduced to `length`. Zero-copy: only offset and
473 /// dim size change.
474 ///
475 /// # Errors
476 ///
477 /// Returns an error if `dim >= ndim()` or `start + length > dims()[dim]`.
478 ///
479 /// # Examples
480 ///
481 /// ```ignore
482 /// use tenferro_tensor::{Tensor, MemoryOrder};
483 /// use tenferro_device::LogicalMemorySpace;
484 ///
485 /// let a = Tensor::<f64>::zeros(&[3, 10],
486 /// LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
487 /// let tv = a.tensor_view();
488 /// // Take columns 2..5 → view of shape [3, 3]
489 /// let sub = tv.narrow(1, 2, 3).unwrap();
490 /// assert_eq!(sub.dims(), &[3, 3]);
491 /// ```
492 pub fn narrow(&self, _dim: usize, _start: usize, _length: usize) -> Result<TensorView<'a, T>> {
493 todo!()
494 }
495
496 // ========================================================================
497 // Materialize (copy data into a new owned Tensor)
498 // ========================================================================
499
500 /// Copy this view into an owned [`Tensor`].
501 pub fn to_tensor(&self, _order: MemoryOrder) -> Tensor<T> {
502 todo!()
503 }
504
505 /// Return a contiguous copy of this view's data.
506 pub fn contiguous(&self, _order: MemoryOrder) -> Tensor<T> {
507 todo!()
508 }
509
510 /// Return a tensor with complex-conjugated elements from this view.
511 ///
512 /// For real types, returns a copy unchanged.
513 pub fn conj(&self) -> Tensor<T>
514 where
515 T: Conjugate,
516 {
517 todo!()
518 }
519}
520
521/// Placeholder for an accelerator synchronization event.
522///
523/// Tracks completion of asynchronous operations on accelerator devices
524/// (GPU, FPGA, etc.), enabling operation chaining without CPU
525/// synchronization. Will be replaced with an actual implementation
526/// (e.g., CUDA/HIP event handle) when accelerator backends are added.
527#[derive(Clone)]
528pub struct CompletionEvent {
529 _private: (),
530}
531
532impl<T: Scalar> Clone for Tensor<T> {
533 fn clone(&self) -> Self {
534 Self {
535 buffer: self.buffer.clone(),
536 dims: self.dims.clone(),
537 strides: self.strides.clone(),
538 offset: self.offset,
539 logical_memory_space: self.logical_memory_space,
540 preferred_compute_device: self.preferred_compute_device,
541 // Cloned tensor starts with no pending event — the data in the
542 // cloned buffer is a snapshot taken after any pending computation
543 // completes (clone reads the buffer, which requires completion).
544 event: None,
545 }
546 }
547}
548
549impl<T: Scalar> Tensor<T> {
550 // ========================================================================
551 // Constructors
552 // ========================================================================
553
554 /// Create a tensor filled with zeros.
555 ///
556 /// # Arguments
557 ///
558 /// * `dims` — Shape of the tensor (e.g., `&[3, 4]` for a 3×4 matrix)
559 /// * `memory_space` — Logical memory space for the allocation
560 /// * `order` — Memory layout for the new allocation
561 ///
562 /// # Examples
563 ///
564 /// ```ignore
565 /// use tenferro_tensor::{Tensor, MemoryOrder};
566 /// use tenferro_device::LogicalMemorySpace;
567 ///
568 /// let a = Tensor::<f64>::zeros(
569 /// &[3, 4],
570 /// LogicalMemorySpace::MainMemory,
571 /// MemoryOrder::ColumnMajor,
572 /// );
573 /// ```
574 pub fn zeros(_dims: &[usize], _memory_space: LogicalMemorySpace, _order: MemoryOrder) -> Self {
575 todo!()
576 }
577
578 /// Create a tensor filled with ones.
579 ///
580 /// # Arguments
581 ///
582 /// * `dims` — Shape of the tensor
583 /// * `memory_space` — Logical memory space for the allocation
584 /// * `order` — Memory layout for the new allocation
585 pub fn ones(_dims: &[usize], _memory_space: LogicalMemorySpace, _order: MemoryOrder) -> Self {
586 todo!()
587 }
588
589 /// Create a tensor from a data slice.
590 ///
591 /// The slice length must equal the product of `dims`.
592 /// Data is copied into owned storage with the specified memory order.
593 /// Memory space is set to [`LogicalMemorySpace::MainMemory`].
594 ///
595 /// # Errors
596 ///
597 /// Returns an error if `data.len()` does not match the product of `dims`.
598 pub fn from_slice(_data: &[T], _dims: &[usize], _order: MemoryOrder) -> Result<Self> {
599 todo!()
600 }
601
602 /// Create a tensor from an owned `Vec<T>` with explicit layout.
603 ///
604 /// Takes ownership of the data. The caller specifies the dims, strides,
605 /// and offset that describe how the data is laid out.
606 ///
607 /// # Errors
608 ///
609 /// Returns an error if the layout is inconsistent with the data length.
610 ///
611 /// # Examples
612 ///
613 /// ```ignore
614 /// use tenferro_tensor::Tensor;
615 ///
616 /// // 2×3 column-major: strides [1, 2], offset 0
617 /// let data = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0];
618 /// let t = Tensor::<f64>::from_vec(data, &[2, 3], &[1, 2], 0).unwrap();
619 /// ```
620 pub fn from_vec(
621 _data: Vec<T>,
622 _dims: &[usize],
623 _strides: &[isize],
624 _offset: isize,
625 ) -> Result<Self> {
626 todo!()
627 }
628
629 /// Create an identity matrix.
630 ///
631 /// Returns a 2D tensor of shape `[n, n]` with ones on the diagonal
632 /// and zeros elsewhere.
633 ///
634 /// # Examples
635 ///
636 /// ```ignore
637 /// use tenferro_tensor::{Tensor, MemoryOrder};
638 /// use tenferro_device::LogicalMemorySpace;
639 ///
640 /// let id = Tensor::<f64>::eye(3,
641 /// LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
642 /// assert_eq!(id.dims(), &[3, 3]);
643 /// ```
644 pub fn eye(_n: usize, _memory_space: LogicalMemorySpace, _order: MemoryOrder) -> Self {
645 todo!()
646 }
647
648 // ========================================================================
649 // Metadata
650 // ========================================================================
651
652 /// Returns the shape (size of each dimension).
653 pub fn dims(&self) -> &[usize] {
654 &self.dims
655 }
656
657 /// Returns the strides (in units of `T`).
658 pub fn strides(&self) -> &[isize] {
659 &self.strides
660 }
661
662 /// Returns the element offset into the data buffer.
663 pub fn offset(&self) -> isize {
664 self.offset
665 }
666
667 /// Returns a reference to the underlying data buffer.
668 pub fn buffer(&self) -> &DataBuffer<T> {
669 &self.buffer
670 }
671
672 /// Returns a mutable reference to the underlying data buffer.
673 pub fn buffer_mut(&mut self) -> &mut DataBuffer<T> {
674 &mut self.buffer
675 }
676
677 /// Returns the number of dimensions (rank).
678 pub fn ndim(&self) -> usize {
679 self.dims.len()
680 }
681
682 /// Returns the total number of elements.
683 pub fn len(&self) -> usize {
684 todo!()
685 }
686
687 /// Returns `true` if the tensor has zero elements.
688 pub fn is_empty(&self) -> bool {
689 todo!()
690 }
691
692 /// Returns the logical memory space where this tensor's data resides.
693 pub fn logical_memory_space(&self) -> LogicalMemorySpace {
694 self.logical_memory_space
695 }
696
697 /// Returns the preferred compute device override, if set.
698 pub fn preferred_compute_device(&self) -> Option<ComputeDevice> {
699 self.preferred_compute_device
700 }
701
702 /// Set the preferred compute device override.
703 ///
704 /// When set, this device will be used for operations on this tensor
705 /// instead of the default device selected by
706 /// [`preferred_compute_devices`](tenferro_device::preferred_compute_devices).
707 /// Pass `None` to clear the override and revert to automatic selection.
708 pub fn set_preferred_compute_device(&mut self, device: Option<ComputeDevice>) {
709 self.preferred_compute_device = device;
710 }
711
712 /// Return the effective compute devices for a given operation kind.
713 ///
714 /// If a preferred compute device is set, returns a single-element vector
715 /// containing that device. Otherwise, delegates to
716 /// [`preferred_compute_devices`](tenferro_device::preferred_compute_devices).
717 ///
718 /// # Errors
719 ///
720 /// Returns an error if no compatible compute device is found.
721 pub fn effective_compute_devices(
722 &self,
723 _op_kind: OpKind,
724 ) -> tenferro_device::Result<Vec<ComputeDevice>> {
725 todo!()
726 }
727
728 // ========================================================================
729 // View operations (zero-copy, public API waits if pending)
730 // ========================================================================
731
732 /// Returns a [`TensorView`] for data inspection.
733 ///
734 /// Waits for any pending accelerator computation before returning.
735 /// The returned view has `event = None` (data is ready to read).
736 pub fn tensor_view(&self) -> TensorView<'_, T> {
737 self.wait();
738 TensorView {
739 data: &self.buffer,
740 dims: self.dims.clone(),
741 strides: self.strides.clone(),
742 offset: self.offset,
743 logical_memory_space: self.logical_memory_space,
744 preferred_compute_device: self.preferred_compute_device,
745 event: None,
746 }
747 }
748
749 /// Returns a non-blocking [`TensorView`] that propagates the
750 /// pending event (if any) from the source tensor.
751 ///
752 /// This is an internal API used by `einsum` and other accelerator
753 /// operations to chain computations without CPU synchronization.
754 pub(crate) fn as_operand_view(&self) -> TensorView<'_, T> {
755 TensorView {
756 data: &self.buffer,
757 dims: self.dims.clone(),
758 strides: self.strides.clone(),
759 offset: self.offset,
760 logical_memory_space: self.logical_memory_space,
761 preferred_compute_device: self.preferred_compute_device,
762 event: self.event.as_ref(),
763 }
764 }
765
766 /// Permute (reorder) the dimensions of the tensor.
767 ///
768 /// This is a zero-copy operation that only modifies dims and strides.
769 /// Waits for any pending accelerator computation before returning.
770 ///
771 /// # Arguments
772 ///
773 /// * `perm` — Permutation of dimension indices (e.g., `&[1, 0]` to transpose)
774 ///
775 /// # Errors
776 ///
777 /// Returns an error if `perm` is not a valid permutation of `0..ndim()`.
778 pub fn permute(&self, _perm: &[usize]) -> Result<Tensor<T>> {
779 todo!()
780 }
781
782 /// Broadcast the tensor to a larger shape.
783 ///
784 /// Dimensions of size 1 are expanded to the target size (zero-copy via
785 /// stride 0). This is a zero-copy metadata operation.
786 ///
787 /// # Errors
788 ///
789 /// Returns an error if `target_dims` is incompatible with the current shape.
790 pub fn broadcast(&self, _target_dims: &[usize]) -> Result<Tensor<T>> {
791 todo!()
792 }
793
794 /// Extract a diagonal view by merging pairs of axes.
795 ///
796 /// For each `(axis_i, axis_j)` pair, the two dimensions are replaced
797 /// by a single diagonal dimension. This is a zero-copy stride trick.
798 ///
799 /// # Errors
800 ///
801 /// Returns an error if any axis is out of range or the paired
802 /// dimensions have different sizes.
803 pub fn diagonal(&self, _axes: &[(usize, usize)]) -> Result<Tensor<T>> {
804 todo!()
805 }
806
807 /// Reshape the tensor to a new shape.
808 ///
809 /// The total number of elements must remain the same.
810 /// Requires contiguous data; returns an error if the tensor is not contiguous.
811 ///
812 /// # Errors
813 ///
814 /// Returns an error if the tensor is not contiguous or the new shape
815 /// has a different total element count.
816 pub fn reshape(&self, _new_dims: &[usize]) -> Result<Tensor<T>> {
817 todo!()
818 }
819
820 /// Select a single index along a dimension, removing that dimension.
821 ///
822 /// Returns a tensor with `ndim() - 1` dimensions. This is a zero-copy
823 /// operation that adjusts the offset and removes the selected dimension.
824 ///
825 /// # Errors
826 ///
827 /// Returns an error if `dim >= ndim()` or `index >= dims()[dim]`.
828 ///
829 /// # Examples
830 ///
831 /// ```ignore
832 /// use tenferro_tensor::{Tensor, MemoryOrder};
833 /// use tenferro_device::LogicalMemorySpace;
834 ///
835 /// // Batched matrices [m, n, batch] = [3, 4, 10]
836 /// let a = Tensor::<f64>::zeros(&[3, 4, 10],
837 /// LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
838 /// // Select batch index 5 → [3, 4]
839 /// let mat = a.select(2, 5).unwrap();
840 /// assert_eq!(mat.dims(), &[3, 4]);
841 /// ```
842 pub fn select(&self, _dim: usize, _index: usize) -> Result<Tensor<T>> {
843 todo!()
844 }
845
846 /// Narrow (slice) a dimension to a sub-range.
847 ///
848 /// Returns a tensor with the same number of dimensions, but
849 /// `dims()[dim]` reduced to `length`. Zero-copy: only offset and
850 /// dim size change.
851 ///
852 /// # Errors
853 ///
854 /// Returns an error if `dim >= ndim()` or `start + length > dims()[dim]`.
855 ///
856 /// # Examples
857 ///
858 /// ```ignore
859 /// use tenferro_tensor::{Tensor, MemoryOrder};
860 /// use tenferro_device::LogicalMemorySpace;
861 ///
862 /// let a = Tensor::<f64>::zeros(&[3, 10],
863 /// LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
864 /// // Take columns 2..5 → [3, 3]
865 /// let sub = a.narrow(1, 2, 3).unwrap();
866 /// assert_eq!(sub.dims(), &[3, 3]);
867 /// ```
868 pub fn narrow(&self, _dim: usize, _start: usize, _length: usize) -> Result<Tensor<T>> {
869 todo!()
870 }
871
872 // ========================================================================
873 // Data operations
874 // ========================================================================
875
876 /// Return a contiguous copy of this tensor in the given memory order.
877 ///
878 /// If the tensor is already contiguous in the requested order,
879 /// this may avoid copying (implementation-defined).
880 pub fn contiguous(&self, _order: MemoryOrder) -> Tensor<T> {
881 todo!()
882 }
883
884 /// Consume this tensor and return a contiguous version.
885 ///
886 /// If the tensor is already contiguous in the requested order, returns
887 /// `self` without copying or allocating. Otherwise, copies data into a
888 /// new contiguous buffer.
889 ///
890 /// Prefer this over [`contiguous`](Tensor::contiguous) when you no
891 /// longer need the original tensor, as it avoids unnecessary allocation
892 /// and reference-count overhead.
893 ///
894 /// # Examples
895 ///
896 /// ```ignore
897 /// use tenferro_tensor::{Tensor, MemoryOrder};
898 /// use tenferro_device::LogicalMemorySpace;
899 ///
900 /// let a = Tensor::<f64>::zeros(
901 /// &[3, 4],
902 /// LogicalMemorySpace::MainMemory,
903 /// MemoryOrder::ColumnMajor,
904 /// );
905 ///
906 /// // Transpose creates a non-contiguous view
907 /// let at = a.permute(&[1, 0]).unwrap();
908 /// assert!(!at.is_contiguous());
909 ///
910 /// // into_contiguous copies only when necessary
911 /// let at_contig = at.into_contiguous(MemoryOrder::ColumnMajor);
912 /// assert!(at_contig.is_contiguous());
913 ///
914 /// // Already contiguous: zero-cost passthrough
915 /// let b = Tensor::<f64>::zeros(
916 /// &[3, 4],
917 /// LogicalMemorySpace::MainMemory,
918 /// MemoryOrder::RowMajor,
919 /// );
920 /// let b2 = b.into_contiguous(MemoryOrder::RowMajor); // no copy
921 /// ```
922 pub fn into_contiguous(self, _order: MemoryOrder) -> Tensor<T> {
923 todo!()
924 }
925
926 /// Returns `true` if the tensor data is contiguous in memory.
927 ///
928 /// A tensor is contiguous if its elements occupy a dense block of
929 /// memory with no gaps, in either column-major or row-major order.
930 pub fn is_contiguous(&self) -> bool {
931 todo!()
932 }
933
934 /// Return a tensor with complex-conjugated elements.
935 ///
936 /// For real types (`f32`, `f64`), returns a copy unchanged.
937 /// For complex types (`Complex32`, `Complex64`), negates the imaginary part.
938 ///
939 /// # Examples
940 ///
941 /// ```ignore
942 /// use tenferro_tensor::{Tensor, MemoryOrder};
943 /// use num_complex::Complex64;
944 ///
945 /// let data = vec![Complex64::new(1.0, 2.0), Complex64::new(3.0, -4.0)];
946 /// let a = Tensor::from_slice(&data, &[2], MemoryOrder::ColumnMajor).unwrap();
947 /// let a_conj = a.conj();
948 /// // a_conj contains [1.0 - 2.0i, 3.0 + 4.0i]
949 /// ```
950 pub fn conj(&self) -> Tensor<T>
951 where
952 T: Conjugate,
953 {
954 // Conjugation is element-wise and position-independent,
955 // so we conjugate the raw buffer directly and preserve layout.
956 let conj_data: Vec<T> = self
957 .buffer
958 .as_slice()
959 .iter()
960 .copied()
961 .map(T::conj)
962 .collect();
963 Tensor {
964 buffer: DataBuffer::from_vec(conj_data),
965 dims: self.dims.clone(),
966 strides: self.strides.clone(),
967 offset: self.offset,
968 logical_memory_space: self.logical_memory_space,
969 preferred_compute_device: self.preferred_compute_device,
970 event: None,
971 }
972 }
973
974 /// Consume this tensor and return one with complex-conjugated elements.
975 ///
976 /// Like [`conj`](Tensor::conj) but consumes `self`, potentially
977 /// reusing the buffer if no other references exist.
978 pub fn into_conj(self) -> Tensor<T>
979 where
980 T: Conjugate,
981 {
982 todo!()
983 }
984
985 /// Extract the lower triangular part of a matrix.
986 ///
987 /// Returns a new tensor with elements above the `diagonal`-th diagonal
988 /// set to zero. For batched tensors `(m, n, *)`, applies independently
989 /// to each batch element.
990 ///
991 /// - `diagonal = 0`: main diagonal (default)
992 /// - `diagonal > 0`: above main diagonal
993 /// - `diagonal < 0`: below main diagonal
994 ///
995 /// # Examples
996 ///
997 /// ```ignore
998 /// use tenferro_tensor::{Tensor, MemoryOrder};
999 /// use tenferro_device::LogicalMemorySpace;
1000 ///
1001 /// let a = Tensor::<f64>::ones(&[3, 3],
1002 /// LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
1003 /// let lower = a.tril(0);
1004 /// // [[1, 0, 0],
1005 /// // [1, 1, 0],
1006 /// // [1, 1, 1]]
1007 /// ```
1008 pub fn tril(&self, _diagonal: isize) -> Tensor<T> {
1009 todo!()
1010 }
1011
1012 /// Extract the upper triangular part of a matrix.
1013 ///
1014 /// Returns a new tensor with elements below the `diagonal`-th diagonal
1015 /// set to zero. For batched tensors `(m, n, *)`, applies independently
1016 /// to each batch element.
1017 ///
1018 /// - `diagonal = 0`: main diagonal (default)
1019 /// - `diagonal > 0`: above main diagonal
1020 /// - `diagonal < 0`: below main diagonal
1021 ///
1022 /// # Examples
1023 ///
1024 /// ```ignore
1025 /// use tenferro_tensor::{Tensor, MemoryOrder};
1026 /// use tenferro_device::LogicalMemorySpace;
1027 ///
1028 /// let a = Tensor::<f64>::ones(&[3, 3],
1029 /// LogicalMemorySpace::MainMemory, MemoryOrder::ColumnMajor);
1030 /// let upper = a.triu(0);
1031 /// // [[1, 1, 1],
1032 /// // [0, 1, 1],
1033 /// // [0, 0, 1]]
1034 /// ```
1035 pub fn triu(&self, _diagonal: isize) -> Tensor<T> {
1036 todo!()
1037 }
1038
1039 /// Asynchronously transfer this tensor to a different memory space.
1040 ///
1041 /// Returns a new tensor in the target memory space. If the source
1042 /// and destination spaces are the same, returns a zero-copy no-op.
1043 /// Otherwise, data is copied (potentially asynchronously for GPU).
1044 ///
1045 /// # Errors
1046 ///
1047 /// Returns an error if the transfer is not supported.
1048 pub fn to_memory_space_async(&self, _target: LogicalMemorySpace) -> Result<Tensor<T>> {
1049 todo!()
1050 }
1051
1052 // ========================================================================
1053 // GPU async support
1054 // ========================================================================
1055
1056 /// Wait for any pending GPU computation to complete.
1057 ///
1058 /// No-op for CPU tensors or when GPU computation has already completed.
1059 /// Methods that access tensor data from CPU call this internally, so
1060 /// explicit calls are only needed when the caller wants to ensure
1061 /// completion at a specific point.
1062 ///
1063 /// # Examples
1064 ///
1065 /// ```ignore
1066 /// // GPU einsum returns immediately with pending event
1067 /// let c = einsum("ij,jk->ik", &[&a_gpu, &b_gpu]).unwrap();
1068 /// assert!(!c.is_ready());
1069 ///
1070 /// // Explicit wait
1071 /// c.wait();
1072 /// assert!(c.is_ready());
1073 ///
1074 /// // Chaining: implicit sync via stream dependencies, no CPU wait
1075 /// let d = einsum("ij,jk->ik", &[&c, &e_gpu]).unwrap();
1076 /// // → detects c.event → chains on GPU → returns immediately
1077 /// ```
1078 pub fn wait(&self) {
1079 // Currently a no-op: only CPU tensors exist (event is always None).
1080 // Will synchronize on CompletionEvent when GPU backends are implemented.
1081 }
1082
1083 /// Check if tensor data is ready without blocking.
1084 ///
1085 /// Returns `true` for CPU tensors (always ready) and for GPU tensors
1086 /// whose computation has completed. Returns `false` if a GPU operation
1087 /// is still in progress.
1088 pub fn is_ready(&self) -> bool {
1089 self.event.is_none()
1090 }
1091}
1092
1093// ============================================================================
1094// Differentiable impl — connects Tensor<T> to the generic AD framework
1095// ============================================================================
1096
1097impl<T: Scalar> chainrules_core::Differentiable for Tensor<T> {
1098 type Tangent = Tensor<T>;
1099
1100 fn zero_tangent(&self) -> Tensor<T> {
1101 todo!()
1102 }
1103
1104 fn accumulate_tangent(_a: Tensor<T>, _b: &Tensor<T>) -> Tensor<T> {
1105 todo!()
1106 }
1107}
1108
1109// ============================================================================
1110// PhantomData usage for unused type parameter warning suppression
1111// ============================================================================
1112
1113// DataBuffer<T> uses T directly in Vec<T> and *const T, so no PhantomData needed.
1114// This module-level comment documents the design decision.