tenferro_ad/
eager_ops.rs

1use std::sync::Arc;
2
3use computegraph::GraphOperation;
4use num_complex::{Complex32, Complex64};
5use tenferro_ops::broadcast::{
6    broadcast_error_to_validation, broadcast_in_dim_extent_error, broadcast_input_plan,
7    broadcast_shape, broadcast_shapes,
8};
9use tenferro_ops::dim_expr::DimExpr;
10use tenferro_ops::std_tensor_op::StdTensorOp;
11use tenferro_tensor::{
12    DType, DotGeneralConfig, GatherConfig, PadConfig, ScatterConfig, SliceConfig, Tensor,
13    TensorValue,
14};
15
16use crate::eager::{
17    eager_grad_recording_enabled, eager_op_profile_start, exec_single_output,
18    exec_single_output_read, maybe_print_eager_op_profile, profile_eager_op_section,
19    record_eager_op_profile, record_eager_outputs, record_eager_value_outputs, EagerTensor,
20};
21use crate::eager_exec::exec_dot_general_with_conj_on_tensor_reads;
22use crate::error::{Error, Result};
23use crate::metadata::push_metadata_scope;
24
25pub(crate) fn broadcast_binary(
26    op: &'static str,
27    lhs: &EagerTensor,
28    rhs: &EagerTensor,
29) -> Result<(EagerTensor, EagerTensor)> {
30    ensure_same_context(lhs, rhs)?;
31    let shape =
32        broadcast_shape(lhs.shape(), rhs.shape()).map_err(|err| broadcast_error(op, err))?;
33    Ok((
34        broadcast_to(op, lhs, &shape)?,
35        broadcast_to(op, rhs, &shape)?,
36    ))
37}
38
39pub(crate) fn broadcast_ternary(
40    op: &'static str,
41    first: &EagerTensor,
42    second: &EagerTensor,
43    third: &EagerTensor,
44) -> Result<(EagerTensor, EagerTensor, EagerTensor)> {
45    ensure_same_context(first, second)?;
46    ensure_same_context(first, third)?;
47    let shape = broadcast_shapes([first.shape(), second.shape(), third.shape()])
48        .map_err(|err| broadcast_error(op, err))?;
49    Ok((
50        broadcast_to(op, first, &shape)?,
51        broadcast_to(op, second, &shape)?,
52        broadcast_to(op, third, &shape)?,
53    ))
54}
55
56fn broadcast_to(
57    op: &'static str,
58    input: &EagerTensor,
59    target_shape: &[usize],
60) -> Result<EagerTensor> {
61    let input_shape = input.shape();
62    if input_shape == target_shape {
63        return Ok(input.clone());
64    }
65
66    let plan =
67        broadcast_input_plan(input_shape, target_shape).map_err(|err| broadcast_error(op, err))?;
68    let source = if plan.source_shape == input_shape {
69        input.clone()
70    } else {
71        input.reshape(&plan.source_shape)?
72    };
73    source.broadcast_in_dim(target_shape, &plan.dims)
74}
75
76fn broadcast_error(op: &'static str, err: tenferro_ops::broadcast::BroadcastError) -> Error {
77    tenferro_tensor::Error::validation(op, broadcast_error_to_validation(err)).into()
78}
79
80fn ensure_same_context(lhs: &EagerTensor, rhs: &EagerTensor) -> Result<()> {
81    if !lhs.same_context(rhs) {
82        return Err(Error::ContextMismatch {
83            lhs: lhs.ctx_id(),
84            rhs: rhs.ctx_id(),
85        });
86    }
87    Ok(())
88}
89
90impl std::ops::Add for &EagerTensor {
91    type Output = Result<EagerTensor>;
92
93    fn add(self, rhs: &EagerTensor) -> Result<EagerTensor> {
94        EagerTensor::add(self, rhs)
95    }
96}
97
98impl std::ops::Sub for &EagerTensor {
99    type Output = Result<EagerTensor>;
100
101    fn sub(self, rhs: &EagerTensor) -> Result<EagerTensor> {
102        EagerTensor::sub(self, rhs)
103    }
104}
105
106impl std::ops::Mul for &EagerTensor {
107    type Output = Result<EagerTensor>;
108
109    fn mul(self, rhs: &EagerTensor) -> Result<EagerTensor> {
110        EagerTensor::mul(self, rhs)
111    }
112}
113
114impl std::ops::Div for &EagerTensor {
115    type Output = Result<EagerTensor>;
116
117    fn div(self, rhs: &EagerTensor) -> Result<EagerTensor> {
118        EagerTensor::div(self, rhs)
119    }
120}
121
122impl std::ops::Rem for &EagerTensor {
123    type Output = Result<EagerTensor>;
124
125    fn rem(self, rhs: &EagerTensor) -> Result<EagerTensor> {
126        EagerTensor::rem(self, rhs)
127    }
128}
129
130impl std::ops::Neg for &EagerTensor {
131    type Output = Result<EagerTensor>;
132
133    fn neg(self) -> Result<EagerTensor> {
134        EagerTensor::neg(self)
135    }
136}
137
138impl EagerTensor {
139    /// Elementwise addition.
140    ///
141    /// # Examples
142    ///
143    /// ```
144    /// use tenferro_cpu::CpuBackend;
145    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
146    ///
147    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
148    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2], vec![1.0_f64, 2.0]).unwrap(), ctx.clone()).unwrap();
149    /// let y = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2], vec![3.0_f64, 4.0]).unwrap(), ctx.clone()).unwrap();
150    /// let z = x.add(&y).unwrap();
151    ///
152    /// assert_eq!(z.materialized().unwrap().as_slice::<f64>().unwrap(), &[4.0, 6.0]);
153    /// # Ok::<(), tenferro_ad::Error>(())
154    /// ```
155    ///
156    /// # Errors
157    ///
158    /// Returns [`Error::ContextMismatch`] for tensors from different eager
159    /// runtimes, [`tenferro_tensor::Error::Validation`] with
160    /// `ShapeMismatch`/`DTypeMismatch` for incompatible operands, or a typed
161    /// backend/runtime-state error during execution.
162    pub fn add(&self, other: &Self) -> Result<Self> {
163        let (lhs, rhs) = broadcast_binary("add", self, other)?;
164        lhs.binary_op(&rhs, StdTensorOp::Add)
165    }
166
167    /// Elementwise subtraction.
168    ///
169    /// # Errors
170    ///
171    /// Returns [`Error::ContextMismatch`] for tensors from different eager
172    /// runtimes, [`tenferro_tensor::Error::Validation`] with
173    /// `ShapeMismatch`/`DTypeMismatch` for incompatible operands, or a typed
174    /// backend/runtime-state error during execution.
175    pub fn sub(&self, other: &Self) -> Result<Self> {
176        let (lhs, rhs) = broadcast_binary("sub", self, other)?;
177        lhs.binary_op(&rhs, StdTensorOp::Sub)
178    }
179
180    /// Elementwise multiplication.
181    ///
182    /// # Examples
183    ///
184    /// ```
185    /// use tenferro_cpu::CpuBackend;
186    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
187    ///
188    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
189    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2], vec![1.0_f64, 2.0]).unwrap(), ctx.clone()).unwrap();
190    /// let y = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2], vec![3.0_f64, 4.0]).unwrap(), ctx.clone()).unwrap();
191    /// let z = x.mul(&y).unwrap();
192    ///
193    /// assert_eq!(z.materialized().unwrap().as_slice::<f64>().unwrap(), &[3.0, 8.0]);
194    /// # Ok::<(), tenferro_ad::Error>(())
195    /// ```
196    ///
197    /// # Errors
198    ///
199    /// Returns [`Error::ContextMismatch`] for tensors from different eager
200    /// runtimes, [`tenferro_tensor::Error::Validation`] with
201    /// `ShapeMismatch`/`DTypeMismatch` for incompatible operands, or a typed
202    /// backend/runtime-state error during execution.
203    pub fn mul(&self, other: &Self) -> Result<Self> {
204        let (lhs, rhs) = broadcast_binary("mul", self, other)?;
205        lhs.binary_op(&rhs, StdTensorOp::Mul)
206    }
207
208    /// Negate the tensor.
209    ///
210    /// # Examples
211    ///
212    /// ```
213    /// use tenferro_cpu::CpuBackend;
214    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
215    ///
216    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
217    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2], vec![1.0_f64, -2.0]).unwrap(), ctx.clone()).unwrap();
218    /// let y = x.neg().unwrap();
219    ///
220    /// assert_eq!(y.materialized().unwrap().as_slice::<f64>().unwrap(), &[-1.0, 2.0]);
221    /// # Ok::<(), tenferro_ad::Error>(())
222    /// ```
223    ///
224    /// # Errors
225    ///
226    /// Returns [`tenferro_tensor::Error::Unsupported`] when the backend does
227    /// not implement negation for the dtype, or a typed backend/runtime-state
228    /// error during execution.
229    pub fn neg(&self) -> Result<Self> {
230        self.unary_op(StdTensorOp::Neg)
231    }
232
233    /// Elementwise exponential.
234    ///
235    /// # Examples
236    ///
237    /// ```
238    /// use tenferro_cpu::CpuBackend;
239    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
240    ///
241    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
242    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![1], vec![0.0_f64]).unwrap(), ctx.clone()).unwrap();
243    /// let y = x.exp().unwrap();
244    ///
245    /// assert_eq!(y.materialized().unwrap().as_slice::<f64>().unwrap(), &[1.0]);
246    /// # Ok::<(), tenferro_ad::Error>(())
247    /// ```
248    ///
249    /// # Errors
250    ///
251    /// Returns [`tenferro_tensor::Error::Unsupported`] when the backend does
252    /// not implement exponentiation for the dtype, or a typed backend/
253    /// runtime-state error during execution.
254    pub fn exp(&self) -> Result<Self> {
255        self.unary_op(StdTensorOp::Exp)
256    }
257
258    /// Reduce sum over the requested axes.
259    ///
260    /// # Examples
261    ///
262    /// ```
263    /// use tenferro_cpu::CpuBackend;
264    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
265    ///
266    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
267    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2, 2], vec![1.0_f64, 2.0, 3.0, 4.0]).unwrap(), ctx.clone()).unwrap();
268    /// let y = x.reduce_sum(None).unwrap();
269    ///
270    /// assert_eq!(y.materialized().unwrap().as_slice::<f64>().unwrap(), &[10.0]);
271    /// # Ok::<(), tenferro_ad::Error>(())
272    /// ```
273    ///
274    /// # Errors
275    ///
276    /// Returns [`tenferro_tensor::Error::Validation`] with `AxisOutOfBounds` or
277    /// `DuplicateAxis` for an invalid reduction axis, or a typed
278    /// unsupported/backend/runtime-state error for the selected dtype.
279    pub fn reduce_sum(&self, axes: Option<&[usize]>) -> Result<Self> {
280        let axes = axes.map_or_else(|| (0..self.shape().len()).collect(), <[usize]>::to_vec);
281        validate_eager_axes("EagerTensor::reduce_sum", self.shape().len(), &axes)?;
282        self.unary_op(StdTensorOp::ReduceSum { axes })
283    }
284
285    /// Sum elementwise squares over the requested axes.
286    ///
287    /// Each value is squared in its input dtype before reduction. The initial
288    /// supported dtypes are `f32` and `f64`; other dtypes return a typed
289    /// unsupported error. Passing an empty axis slice returns the elementwise
290    /// square without reducing rank.
291    ///
292    /// This operation is useful when the squared sum is needed directly. Use
293    /// the linalg norm APIs when a square root or complex magnitude semantics
294    /// are required.
295    ///
296    /// # Errors
297    ///
298    /// Returns a typed validation error for invalid axes, a typed unsupported
299    /// error for other dtypes, or a typed backend or runtime-state error during
300    /// execution.
301    pub fn reduce_sum_squares(&self, axes: &[usize]) -> Result<Self> {
302        validate_eager_axes("EagerTensor::reduce_sum_squares", self.shape().len(), axes)?;
303        self.unary_op(StdTensorOp::ReduceSumSquares {
304            axes: axes.to_vec(),
305        })
306    }
307
308    /// Execute a dot-general contraction eagerly.
309    ///
310    /// # Examples
311    ///
312    /// ```
313    /// use tenferro_cpu::CpuBackend;
314    /// use tenferro_ad::{DotGeneralConfig, EagerRuntime, EagerTensor, Tensor};
315    ///
316    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
317    /// let a = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2, 3], vec![1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap(), ctx.clone()).unwrap();
318    /// let b = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![3, 2], vec![1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0]).unwrap(), ctx.clone()).unwrap();
319    /// let c = a.dot_general(&b, DotGeneralConfig {
320    ///     lhs_contracting_dims: vec![1],
321    ///     rhs_contracting_dims: vec![0],
322    ///     lhs_batch_dims: vec![],
323    ///     rhs_batch_dims: vec![],
324    /// }).unwrap();
325    ///
326    /// assert_eq!(c.shape(), &[2, 2]);
327    /// # Ok::<(), tenferro_ad::Error>(())
328    /// ```
329    ///
330    /// # Errors
331    ///
332    /// Returns [`tenferro_tensor::Error::Validation`] with `RankMismatch`,
333    /// `AxisOutOfBounds`, `DuplicateAxis`, `ShapeMismatch`, or `DTypeMismatch`
334    /// when `config` or the operands are invalid; backend and runtime-state
335    /// failures retain their typed sources.
336    pub fn dot_general(&self, other: &Self, config: DotGeneralConfig) -> Result<Self> {
337        validate_eager_dot_general_config(
338            "EagerTensor::dot_general",
339            &config,
340            self.shape().len(),
341            other.shape().len(),
342        )?;
343        self.binary_op(other, StdTensorOp::DotGeneral { config })
344    }
345
346    /// Execute a dot-general contraction, optionally conjugating either operand.
347    ///
348    /// Untracked tensors route the conjugation flags directly to the backend so
349    /// the conjugated operand does not need to be materialized. Tracked tensors
350    /// fall back to explicit `Conj` plus `DotGeneral` so reverse-mode AD keeps
351    /// the same graph semantics as the standard eager ops.
352    ///
353    /// # Errors
354    ///
355    /// Returns [`Error::ContextMismatch`] for operands from different eager
356    /// runtimes, [`tenferro_tensor::Error::Validation`] for rank/axis/shape or
357    /// dtype mismatches in `config`, or a typed backend/runtime-state error.
358    pub fn dot_general_with_conj(
359        &self,
360        other: &Self,
361        config: DotGeneralConfig,
362        lhs_conj: bool,
363        rhs_conj: bool,
364    ) -> Result<Self> {
365        if !self.same_context(other) {
366            return Err(Error::ContextMismatch {
367                lhs: self.ctx_id(),
368                rhs: other.ctx_id(),
369            });
370        }
371        validate_eager_dot_general_config(
372            "EagerTensor::dot_general_with_conj",
373            &config,
374            self.shape().len(),
375            other.shape().len(),
376        )?;
377
378        if !self.requires_grad && !other.requires_grad {
379            let ctx = Arc::clone(&self.ctx);
380            let mut backend = ctx.lock_backend()?;
381            let output = exec_dot_general_with_conj_on_tensor_reads(
382                self.tensor_read(),
383                other.tensor_read(),
384                &config,
385                lhs_conj,
386                rhs_conj,
387                &mut *backend,
388            )?;
389            drop(backend);
390            return Self::new_untracked_result(ctx, output);
391        }
392
393        match (lhs_conj, rhs_conj) {
394            (false, false) => self.dot_general(other, config),
395            (true, false) => self.conj()?.dot_general(other, config),
396            (false, true) => {
397                let rhs = other.conj()?;
398                self.dot_general(&rhs, config)
399            }
400            (true, true) => {
401                let lhs = self.conj()?;
402                let rhs = other.conj()?;
403                lhs.dot_general(&rhs, config)
404            }
405        }
406    }
407
408    /// Scale by a real scalar: `y = factor * x`.
409    ///
410    /// Integer factors are rounded to the nearest integer before multiplication,
411    /// boolean factors map finite zero to `false` and other finite values to
412    /// `true`, and complex tensors receive a zero-imaginary scalar.
413    ///
414    /// # Errors
415    ///
416    /// Returns [`Error::TensorRuntime`] with
417    /// [`tenferro_tensor::ValidationError::InvalidArgument`] when an integer or
418    /// boolean factor is non-finite or outside the input dtype's range. Backend
419    /// and runtime execution failures retain their typed source variants.
420    pub fn scale_real(&self, factor: f64) -> Result<Self> {
421        let scalar = match self.dtype() {
422            DType::F64 => Tensor::from_vec_col_major(vec![], vec![factor])?,
423            DType::F32 => Tensor::from_vec_col_major(vec![], vec![factor as f32])?,
424            DType::I32 => Tensor::from_vec_col_major(vec![], vec![round_real_to_i32(factor)?])?,
425            DType::I64 => Tensor::from_vec_col_major(vec![], vec![round_real_to_i64(factor)?])?,
426            DType::Bool => Tensor::from_vec_col_major(vec![], vec![bool_from_real(factor)?])?,
427            DType::C64 => Tensor::from_vec_col_major(vec![], vec![Complex64::new(factor, 0.0)])?,
428            DType::C32 => {
429                Tensor::from_vec_col_major(vec![], vec![Complex32::new(factor as f32, 0.0)])?
430            }
431        };
432        let scalar = EagerTensor::from_tensor_in(scalar, Arc::clone(&self.ctx))?;
433        self.mul(&scalar)
434    }
435
436    /// Scale a complex tensor by a complex scalar: `y = factor * x`.
437    ///
438    /// # Errors
439    ///
440    /// Returns [`Error::TensorRuntime`] with
441    /// [`tenferro_tensor::ValidationError::InvalidArgument`] for a non-complex
442    /// input dtype. Backend and runtime execution failures retain their typed
443    /// source variants.
444    pub fn scale_complex(&self, factor: Complex64) -> Result<Self> {
445        let scalar = match self.dtype() {
446            DType::C64 => Tensor::from_vec_col_major(vec![], vec![factor])?,
447            DType::C32 => Tensor::from_vec_col_major(
448                vec![],
449                vec![Complex32::new(factor.re as f32, factor.im as f32)],
450            )?,
451            dtype => {
452                return Err(Error::TensorRuntime(
453                    tenferro_tensor::Error::invalid_argument(
454                        "scale_complex",
455                        "dtype",
456                        format!("requires complex tensor dtype, got {dtype:?}"),
457                    ),
458                ));
459            }
460        };
461        let scalar = EagerTensor::from_tensor_in(scalar, Arc::clone(&self.ctx))?;
462        self.mul(&scalar)
463    }
464
465    /// Matrix multiplication for rank-2 tensors.
466    ///
467    /// This is a convenience wrapper over [`Self::dot_general`] that
468    /// contracts the left matrix's column axis with the right matrix's row
469    /// axis.
470    ///
471    /// # Examples
472    ///
473    /// ```
474    /// use tenferro_cpu::CpuBackend;
475    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
476    ///
477    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
478    /// let a = EagerTensor::from_tensor_in(
479    ///     Tensor::from_vec_col_major(vec![2, 2], vec![1.0_f64, 2.0, 3.0, 4.0]).unwrap(),
480    ///     ctx.clone(),
481    /// ).unwrap();
482    /// let b = EagerTensor::from_tensor_in(
483    ///     Tensor::from_vec_col_major(vec![2, 1], vec![5.0_f64, 6.0]).unwrap(),
484    ///     ctx,
485    /// ).unwrap();
486    /// let c = a.matmul(&b).unwrap();
487    ///
488    /// assert_eq!(c.shape(), &[2, 1]);
489    /// assert_eq!(c.materialized().unwrap().as_slice::<f64>().unwrap(), &[23.0, 34.0]);
490    /// # Ok::<(), tenferro_ad::Error>(())
491    /// ```
492    ///
493    /// # Errors
494    ///
495    /// Returns [`tenferro_tensor::ValidationError::RankMismatch`] when either operand is
496    /// not rank 2, `ShapeMismatch` when the inner dimensions differ, or a typed
497    /// dtype/backend/runtime-state error during the contraction.
498    pub fn matmul(&self, other: &Self) -> Result<Self> {
499        let lhs_shape = self.shape();
500        let rhs_shape = other.shape();
501        if lhs_shape.len() != 2 {
502            return Err(tenferro_tensor::Error::rank_mismatch("matmul", 2, lhs_shape.len()).into());
503        }
504        if rhs_shape.len() != 2 {
505            return Err(tenferro_tensor::Error::rank_mismatch("matmul", 2, rhs_shape.len()).into());
506        }
507        if lhs_shape[1] != rhs_shape[0] {
508            return Err(
509                tenferro_tensor::Error::shape_mismatch("matmul", lhs_shape, rhs_shape).into(),
510            );
511        }
512        self.dot_general(
513            other,
514            DotGeneralConfig {
515                lhs_contracting_dims: vec![1],
516                rhs_contracting_dims: vec![0],
517                lhs_batch_dims: vec![],
518                rhs_batch_dims: vec![],
519            },
520        )
521    }
522
523    /// Permute tensor axes.
524    ///
525    /// # Examples
526    ///
527    /// ```
528    /// use tenferro_cpu::CpuBackend;
529    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
530    ///
531    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
532    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(
533    ///     vec![2, 3],
534    ///     vec![1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0],
535    /// ).unwrap(), ctx.clone()).unwrap();
536    /// let y = x.transpose(&[1, 0]).unwrap();
537    ///
538    /// assert_eq!(y.shape(), &[3, 2]);
539    /// assert_eq!(y.materialized().unwrap().as_slice::<f64>().unwrap(), &[1.0, 3.0, 5.0, 2.0, 4.0, 6.0]);
540    /// # Ok::<(), tenferro_ad::Error>(())
541    /// ```
542    ///
543    /// # Errors
544    ///
545    /// Returns [`tenferro_tensor::Error::Validation`] with `AxisOutOfBounds`
546    /// or `DuplicateAxis` when `perm` is not a permutation, or a typed
547    /// backend/runtime-state error while creating the view.
548    pub fn transpose(&self, perm: &[usize]) -> Result<Self> {
549        let op = StdTensorOp::Transpose {
550            perm: perm.to_vec(),
551        };
552        let value = self
553            .value
554            .transpose_view(perm)
555            .map_err(Error::TensorRuntime)?;
556        Self::nary_value_op(&[self], op, value)
557    }
558
559    /// Reshape without changing element order.
560    ///
561    /// # Examples
562    ///
563    /// ```
564    /// use tenferro_cpu::CpuBackend;
565    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
566    ///
567    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
568    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(
569    ///     vec![2, 3],
570    ///     vec![1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0],
571    /// ).unwrap(), ctx.clone()).unwrap();
572    /// let y = x.reshape(&[6]).unwrap();
573    ///
574    /// assert_eq!(y.shape(), &[6]);
575    /// assert_eq!(y.materialized().unwrap().as_slice::<f64>().unwrap(), &[1.0, 2.0, 3.0, 4.0, 5.0, 6.0]);
576    /// # Ok::<(), tenferro_ad::Error>(())
577    /// ```
578    ///
579    /// # Errors
580    ///
581    /// Returns [`tenferro_tensor::ValidationError::ShapeMismatch`] when the element count
582    /// changes, `InvalidArgument` when the target shape product overflows, or a
583    /// typed backend/runtime-state error.
584    pub fn reshape(&self, shape: &[usize]) -> Result<Self> {
585        let op = StdTensorOp::Reshape {
586            to_shape: DimExpr::from_concrete(shape),
587        };
588        if let Ok(value) = self.value.reshape_view(shape) {
589            return Self::nary_value_op(&[self], op, value);
590        }
591        self.unary_op(op)
592    }
593
594    /// Slice with explicit start, limit, and stride per axis.
595    ///
596    /// # Examples
597    ///
598    /// ```
599    /// use tenferro_cpu::CpuBackend;
600    /// use tenferro_ad::{EagerRuntime, EagerTensor, SliceConfig, Tensor};
601    ///
602    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
603    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![4], vec![1.0_f64, 2.0, 3.0, 4.0]).unwrap(), ctx.clone()).unwrap();
604    /// let y = x
605    ///     .slice(SliceConfig {
606    ///         starts: vec![1],
607    ///         limits: vec![3],
608    ///         strides: vec![1],
609    ///     })
610    ///     .unwrap();
611    ///
612    /// assert_eq!(y.materialized().unwrap().as_slice::<f64>().unwrap(), &[2.0, 3.0]);
613    /// # Ok::<(), tenferro_ad::Error>(())
614    /// ```
615    ///
616    /// # Errors
617    ///
618    /// Returns [`tenferro_tensor::Error::Validation`] with
619    /// `AxisOutOfBounds`/`InvalidArgument` when starts, limits, or strides are
620    /// invalid, or a typed backend/runtime-state error while creating the view.
621    pub fn slice(&self, config: SliceConfig) -> Result<Self> {
622        let value = self
623            .value
624            .slice_view(&config)
625            .map_err(Error::TensorRuntime)?;
626        Self::nary_value_op(&[self], StdTensorOp::Slice(config), value)
627    }
628
629    /// Broadcast into a larger shape with explicit dimension placement.
630    ///
631    /// # Examples
632    ///
633    /// ```
634    /// use tenferro_cpu::CpuBackend;
635    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
636    ///
637    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
638    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![3], vec![1.0_f64, 2.0, 3.0]).unwrap(), ctx.clone()).unwrap();
639    /// let y = x.broadcast_in_dim(&[3, 2], &[0]).unwrap();
640    ///
641    /// assert_eq!(y.shape(), &[3, 2]);
642    /// # Ok::<(), tenferro_ad::Error>(())
643    /// ```
644    ///
645    /// # Errors
646    ///
647    /// Returns [`tenferro_tensor::Error::Validation`] with `AxisOutOfBounds`,
648    /// `DuplicateAxis`, or `ShapeMismatch` when `shape`/`dims` cannot broadcast
649    /// the input, or a typed backend/runtime-state error.
650    pub fn broadcast_in_dim(&self, shape: &[usize], dims: &[usize]) -> Result<Self> {
651        if let Some(error) = broadcast_in_dim_extent_error(self.shape(), shape, dims) {
652            return Err(broadcast_error("EagerTensor::broadcast_in_dim", error));
653        }
654        let op = StdTensorOp::BroadcastInDim {
655            shape: DimExpr::from_concrete(shape),
656            dims: dims.to_vec(),
657        };
658        let value = self
659            .value
660            .broadcast_in_dim_view(shape, dims)
661            .map_err(Error::TensorRuntime)?;
662        Self::nary_value_op(&[self], op, value)
663    }
664
665    /// Convert the tensor to a different dtype using checked conversion.
666    ///
667    /// Use [`cast`](Self::cast) when a lossy dtype projection is intended.
668    ///
669    /// # Examples
670    ///
671    /// ```
672    /// use tenferro_cpu::CpuBackend;
673    /// use tenferro_ad::{DType, EagerRuntime, EagerTensor, Tensor};
674    ///
675    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
676    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2], vec![1.0_f64, -2.0]).unwrap(), ctx.clone()).unwrap();
677    /// let y = x.convert(DType::C64).unwrap();
678    ///
679    /// assert_eq!(y.dtype(), DType::C64);
680    /// assert_eq!(y.shape(), &[2]);
681    /// # Ok::<(), tenferro_ad::Error>(())
682    /// ```
683    ///
684    /// # Errors
685    ///
686    /// Returns [`tenferro_tensor::Error::UnsupportedDTypeConversion`] when the
687    /// requested pair is outside tenferro's checked dtype-promotion lattice.
688    /// Use [`cast`](Self::cast) for explicit lossy projection; backend
689    /// execution can additionally return a typed runtime-state error.
690    pub fn convert(&self, to: DType) -> Result<Self> {
691        tenferro_tensor::validate::validate_convert_dtype("EagerTensor::convert", self.dtype(), to)
692            .map_err(Error::TensorRuntime)?;
693        self.cast(to)
694    }
695
696    /// Cast the tensor to a different dtype using explicit dtype projection.
697    ///
698    /// `cast` may truncate, narrow precision, project complex values to their
699    /// real component, or use boolean truthiness where the backend supports the
700    /// requested projection.
701    ///
702    /// # Examples
703    ///
704    /// ```
705    /// use tenferro_cpu::CpuBackend;
706    /// use tenferro_ad::{DType, EagerRuntime, EagerTensor, Tensor};
707    ///
708    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
709    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2], vec![1.2_f64, -2.8]).unwrap(), ctx.clone()).unwrap();
710    /// let y = x.cast(DType::I32).unwrap();
711    ///
712    /// assert_eq!(y.materialized().unwrap().as_slice::<i32>().unwrap(), &[1, -2]);
713    /// # Ok::<(), tenferro_ad::Error>(())
714    /// ```
715    /// # Errors
716    ///
717    /// Returns a typed [`tenferro_tensor::Error::Unsupported`] when the eager
718    /// backend cannot project the requested dtype, or a backend/runtime-state
719    /// error during execution.
720    pub fn cast(&self, to: DType) -> Result<Self> {
721        self.unary_op(StdTensorOp::Convert {
722            from: self.dtype(),
723            to,
724        })
725    }
726
727    /// Pad with zeros using StableHLO-style edge and interior padding.
728    ///
729    /// # Examples
730    ///
731    /// ```
732    /// use tenferro_cpu::CpuBackend;
733    /// use tenferro_ad::{EagerRuntime, EagerTensor, PadConfig, Tensor};
734    ///
735    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
736    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2], vec![1.0_f64, 2.0]).unwrap(), ctx.clone()).unwrap();
737    /// let y = x
738    ///     .pad(PadConfig {
739    ///         edge_padding_low: vec![1],
740    ///         edge_padding_high: vec![1],
741    ///         interior_padding: vec![1],
742    ///     })
743    ///     .unwrap();
744    ///
745    /// assert_eq!(y.materialized().unwrap().as_slice::<f64>().unwrap(), &[0.0, 1.0, 0.0, 2.0, 0.0]);
746    /// # Ok::<(), tenferro_ad::Error>(())
747    /// ```
748    /// # Errors
749    ///
750    /// Returns [`tenferro_runtime::Error::TensorRuntime`] containing
751    /// [`tenferro_tensor::ValidationError::InvalidArgument`] when a
752    /// padding vector has a length different from the input rank, interior
753    /// padding is negative, or edge/interior padding produces a negative
754    /// dimension or checked output-size arithmetic overflows.
755    /// Backend execution and unavailable runtime state are propagated as their
756    /// typed [`tenferro_runtime::Error::TensorRuntime`] or
757    /// [`tenferro_runtime::Error::RuntimeState`] variants.
758    pub fn pad(&self, config: PadConfig) -> Result<Self> {
759        self.unary_op(StdTensorOp::Pad(config))
760    }
761
762    /// Reverse the order of elements along the requested axes.
763    ///
764    /// # Examples
765    ///
766    /// ```
767    /// use tenferro_cpu::CpuBackend;
768    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
769    ///
770    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
771    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![4], vec![1.0_f64, 2.0, 3.0, 4.0]).unwrap(), ctx.clone()).unwrap();
772    /// let y = x.reverse(&[0]).unwrap();
773    ///
774    /// assert_eq!(y.materialized().unwrap().as_slice::<f64>().unwrap(), &[4.0, 3.0, 2.0, 1.0]);
775    /// # Ok::<(), tenferro_ad::Error>(())
776    /// ```
777    /// # Errors
778    ///
779    /// Returns [`tenferro_tensor::Error::Validation`] with `AxisOutOfBounds` or
780    /// `DuplicateAxis` for an invalid axis list, or a typed backend/
781    /// runtime-state error during execution.
782    pub fn reverse(&self, axes: &[usize]) -> Result<Self> {
783        validate_eager_axes("EagerTensor::reverse", self.shape().len(), axes)?;
784        self.unary_op(StdTensorOp::Reverse {
785            axes: axes.to_vec(),
786        })
787    }
788
789    /// Gather slices from `self` using integer start indices.
790    ///
791    /// # Examples
792    ///
793    /// ```
794    /// use tenferro_cpu::CpuBackend;
795    /// use tenferro_ad::{EagerRuntime, EagerTensor, GatherConfig, Tensor};
796    ///
797    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
798    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(
799    ///     vec![5],
800    ///     vec![10.0_f64, 20.0, 30.0, 40.0, 50.0],
801    /// ).unwrap(), ctx.clone()).unwrap();
802    /// let indices = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![3], vec![4_i64, 1, 0]).unwrap(), ctx.clone()).unwrap();
803    /// let y = x
804    ///     .gather(
805    ///         &indices,
806    ///         GatherConfig {
807    ///             offset_dims: vec![],
808    ///             collapsed_slice_dims: vec![0],
809    ///             start_index_map: vec![0],
810    ///             index_vector_dim: 1,
811    ///             slice_sizes: vec![1],
812    ///         },
813    ///     )
814    ///     .unwrap();
815    ///
816    /// assert_eq!(y.materialized().unwrap().as_slice::<f64>().unwrap(), &[50.0, 20.0, 10.0]);
817    /// # Ok::<(), tenferro_ad::Error>(())
818    /// ```
819    /// # Errors
820    ///
821    /// Returns [`tenferro_tensor::Error::Validation`] when the gather
822    /// configuration has an invalid rank, axis, shape, or index dtype, or a
823    /// typed backend/runtime-state error.
824    pub fn gather(&self, indices: &Self, config: GatherConfig) -> Result<Self> {
825        self.binary_op(indices, StdTensorOp::Gather(config))
826    }
827
828    /// Scatter updates into `self` using StableHLO scatter semantics.
829    ///
830    /// # Examples
831    ///
832    /// ```
833    /// use tenferro_cpu::CpuBackend;
834    /// use tenferro_ad::{EagerRuntime, EagerTensor, ScatterConfig, Tensor};
835    ///
836    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
837    /// let operand = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![4], vec![0.0_f64, 0.0, 0.0, 0.0]).unwrap(), ctx.clone()).unwrap();
838    /// let indices = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2, 1], vec![1_i64, 3]).unwrap(), ctx.clone()).unwrap();
839    /// let updates = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2], vec![5.0_f64, 7.0]).unwrap(), ctx.clone()).unwrap();
840    /// let result = operand
841    ///     .scatter(
842    ///         &indices,
843    ///         &updates,
844    ///         ScatterConfig {
845    ///             update_window_dims: vec![],
846    ///             inserted_window_dims: vec![0],
847    ///             scatter_dims_to_operand_dims: vec![0],
848    ///             index_vector_dim: 1,
849    ///         },
850    ///     )
851    ///     .unwrap();
852    ///
853    /// assert_eq!(result.materialized().unwrap().as_slice::<f64>().unwrap(), &[0.0, 5.0, 0.0, 7.0]);
854    /// # Ok::<(), tenferro_ad::Error>(())
855    /// ```
856    /// # Errors
857    ///
858    /// Returns [`tenferro_tensor::Error::Validation`] when the scatter
859    /// configuration, index/update shapes, or index dtype is invalid, or a
860    /// typed backend/runtime-state error.
861    pub fn scatter(&self, indices: &Self, updates: &Self, config: ScatterConfig) -> Result<Self> {
862        self.ternary_op(indices, updates, StdTensorOp::Scatter(config))
863    }
864
865    /// Slice using runtime start indices.
866    ///
867    /// # Examples
868    ///
869    /// ```
870    /// use tenferro_cpu::CpuBackend;
871    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
872    ///
873    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
874    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![5], vec![1.0_f64, 2.0, 3.0, 4.0, 5.0]).unwrap(), ctx.clone()).unwrap();
875    /// let starts = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![1], vec![2_i64]).unwrap(), ctx.clone()).unwrap();
876    /// let y = x.dynamic_slice(&starts, &[2]).unwrap();
877    ///
878    /// assert_eq!(y.materialized().unwrap().as_slice::<f64>().unwrap(), &[3.0, 4.0]);
879    /// # Ok::<(), tenferro_ad::Error>(())
880    /// ```
881    /// # Errors
882    ///
883    /// Returns [`tenferro_tensor::Error::Validation`] when `starts` has the
884    /// wrong dtype/shape or `sizes` exceeds the operand rank, including an
885    /// `AxisOutOfBounds` or `ShapeMismatch`, or a typed backend/runtime-state
886    /// error.
887    pub fn dynamic_slice(&self, starts: &Self, sizes: &[usize]) -> Result<Self> {
888        self.binary_op(
889            starts,
890            StdTensorOp::DynamicSlice {
891                slice_sizes: sizes.to_vec(),
892            },
893        )
894    }
895
896    /// Concatenate tensors along one axis.
897    ///
898    /// # Examples
899    ///
900    /// ```
901    /// use tenferro_cpu::CpuBackend;
902    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
903    ///
904    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
905    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2], vec![1.0_f64, 2.0]).unwrap(), ctx.clone()).unwrap();
906    /// let y = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2], vec![3.0_f64, 4.0]).unwrap(), ctx.clone()).unwrap();
907    /// let z = EagerTensor::concatenate(&[&x, &y], 0).unwrap();
908    ///
909    /// assert_eq!(z.materialized().unwrap().as_slice::<f64>().unwrap(), &[1.0, 2.0, 3.0, 4.0]);
910    /// # Ok::<(), tenferro_ad::Error>(())
911    /// ```
912    /// # Errors
913    ///
914    /// Returns [`tenferro_tensor::ValidationError::InvalidArgument`] when `tensors` is
915    /// empty or `axis` is outside the rank, `ShapeMismatch`/`DTypeMismatch`
916    /// when inputs cannot be concatenated, or a typed backend/runtime-state
917    /// error.
918    pub fn concatenate(tensors: &[&Self], axis: usize) -> Result<Self> {
919        Self::nary_op(
920            tensors,
921            StdTensorOp::Concatenate {
922                axis,
923                input_count: tensors.len(),
924            },
925        )
926    }
927
928    /// Extract the diagonal along two axes.
929    ///
930    /// # Examples
931    ///
932    /// ```
933    /// use tenferro_cpu::CpuBackend;
934    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
935    ///
936    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
937    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(
938    ///     vec![3, 3],
939    ///     vec![1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0, 9.0],
940    /// ).unwrap(), ctx.clone()).unwrap();
941    /// let y = x.extract_diag(0, 1).unwrap();
942    ///
943    /// assert_eq!(y.materialized().unwrap().as_slice::<f64>().unwrap(), &[1.0, 5.0, 9.0]);
944    /// # Ok::<(), tenferro_ad::Error>(())
945    /// ```
946    /// # Errors
947    ///
948    /// Returns [`tenferro_tensor::Error::Validation`] with `RankMismatch`,
949    /// `AxisOutOfBounds`, or `DuplicateAxis` when the selected axes cannot form
950    /// a diagonal, or a typed backend/runtime-state error.
951    pub fn extract_diag(&self, axis_a: usize, axis_b: usize) -> Result<Self> {
952        self.unary_op(StdTensorOp::ExtractDiag { axis_a, axis_b })
953    }
954
955    /// Embed a vector or lower-rank tensor along a diagonal.
956    ///
957    /// # Examples
958    ///
959    /// ```
960    /// use tenferro_cpu::CpuBackend;
961    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
962    ///
963    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
964    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![3], vec![1.0_f64, 2.0, 3.0]).unwrap(), ctx.clone()).unwrap();
965    /// let y = x.embed_diag(0, 1).unwrap();
966    ///
967    /// assert_eq!(y.shape(), &[3, 3]);
968    /// assert_eq!(y.materialized().unwrap().as_slice::<f64>().unwrap(), &[1.0, 0.0, 0.0, 0.0, 2.0, 0.0, 0.0, 0.0, 3.0]);
969    /// # Ok::<(), tenferro_ad::Error>(())
970    /// ```
971    /// # Errors
972    ///
973    /// Returns [`tenferro_tensor::Error::Validation`] with `RankMismatch`,
974    /// `AxisOutOfBounds`, or `DuplicateAxis` when the diagonal axes are not
975    /// valid for embedding, or a typed backend/runtime-state error.
976    pub fn embed_diag(&self, axis_a: usize, axis_b: usize) -> Result<Self> {
977        self.unary_op(StdTensorOp::EmbedDiag { axis_a, axis_b })
978    }
979
980    /// Keep the lower triangle and zero the rest.
981    ///
982    /// # Examples
983    ///
984    /// ```
985    /// use tenferro_cpu::CpuBackend;
986    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
987    ///
988    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
989    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2, 2], vec![1.0_f64, 2.0, 3.0, 4.0]).unwrap(), ctx.clone()).unwrap();
990    /// let y = x.tril(0).unwrap();
991    ///
992    /// assert_eq!(y.materialized().unwrap().as_slice::<f64>().unwrap(), &[1.0, 2.0, 0.0, 4.0]);
993    /// # Ok::<(), tenferro_ad::Error>(())
994    /// ```
995    /// # Errors
996    ///
997    /// Returns [`tenferro_tensor::ValidationError::RankMismatch`] when the operand is not
998    /// a matrix, or a typed unsupported/backend/runtime-state error.
999    pub fn tril(&self, k: i64) -> Result<Self> {
1000        self.unary_op(StdTensorOp::Tril { k })
1001    }
1002
1003    /// Keep the upper triangle and zero the rest.
1004    ///
1005    /// # Examples
1006    ///
1007    /// ```
1008    /// use tenferro_cpu::CpuBackend;
1009    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
1010    ///
1011    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1012    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2, 2], vec![1.0_f64, 2.0, 3.0, 4.0]).unwrap(), ctx.clone()).unwrap();
1013    /// let y = x.triu(0).unwrap();
1014    ///
1015    /// assert_eq!(y.materialized().unwrap().as_slice::<f64>().unwrap(), &[1.0, 0.0, 3.0, 4.0]);
1016    /// # Ok::<(), tenferro_ad::Error>(())
1017    /// ```
1018    /// # Errors
1019    ///
1020    /// Returns [`tenferro_tensor::ValidationError::RankMismatch`] when the operand is not
1021    /// a matrix, or a typed unsupported/backend/runtime-state error.
1022    pub fn triu(&self, k: i64) -> Result<Self> {
1023        self.unary_op(StdTensorOp::Triu { k })
1024    }
1025
1026    /// Reduce product over the requested axes.
1027    ///
1028    /// # Examples
1029    ///
1030    /// ```
1031    /// use tenferro_cpu::CpuBackend;
1032    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
1033    ///
1034    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1035    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2, 2], vec![1.0_f64, 2.0, 3.0, 4.0]).unwrap(), ctx.clone()).unwrap();
1036    /// let y = x.reduce_prod(None).unwrap();
1037    ///
1038    /// assert_eq!(y.materialized().unwrap().as_slice::<f64>().unwrap(), &[24.0]);
1039    /// # Ok::<(), tenferro_ad::Error>(())
1040    /// ```
1041    /// # Errors
1042    ///
1043    /// Returns [`tenferro_tensor::Error::Validation`] with `AxisOutOfBounds` or
1044    /// `DuplicateAxis` for an invalid reduction axis, or a typed
1045    /// unsupported/backend/runtime-state error for the selected dtype.
1046    pub fn reduce_prod(&self, axes: Option<&[usize]>) -> Result<Self> {
1047        let axes = axes.map_or_else(|| (0..self.shape().len()).collect(), <[usize]>::to_vec);
1048        validate_eager_axes("EagerTensor::reduce_prod", self.shape().len(), &axes)?;
1049        self.unary_op(StdTensorOp::ReduceProd { axes })
1050    }
1051
1052    /// Reduce maximum over the requested axes.
1053    ///
1054    /// # Examples
1055    ///
1056    /// ```
1057    /// use tenferro_cpu::CpuBackend;
1058    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
1059    ///
1060    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1061    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2, 2], vec![1.0_f64, 2.0, 3.0, 4.0]).unwrap(), ctx.clone()).unwrap();
1062    /// let y = x.reduce_max(None).unwrap();
1063    ///
1064    /// assert_eq!(y.materialized().unwrap().as_slice::<f64>().unwrap(), &[4.0]);
1065    /// # Ok::<(), tenferro_ad::Error>(())
1066    /// ```
1067    /// # Errors
1068    ///
1069    /// Returns [`tenferro_tensor::Error::Validation`] with `AxisOutOfBounds` or
1070    /// `DuplicateAxis` for an invalid reduction axis, or a typed
1071    /// unsupported/backend/runtime-state error for the selected dtype.
1072    pub fn reduce_max(&self, axes: Option<&[usize]>) -> Result<Self> {
1073        let axes = axes.map_or_else(|| (0..self.shape().len()).collect(), <[usize]>::to_vec);
1074        validate_eager_axes("EagerTensor::reduce_max", self.shape().len(), &axes)?;
1075        self.unary_op(StdTensorOp::ReduceMax { axes })
1076    }
1077
1078    /// Reduce minimum over the requested axes.
1079    ///
1080    /// # Examples
1081    ///
1082    /// ```
1083    /// use tenferro_cpu::CpuBackend;
1084    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
1085    ///
1086    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1087    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2, 2], vec![1.0_f64, 2.0, 3.0, 4.0]).unwrap(), ctx.clone()).unwrap();
1088    /// let y = x.reduce_min(None).unwrap();
1089    ///
1090    /// assert_eq!(y.materialized().unwrap().as_slice::<f64>().unwrap(), &[1.0]);
1091    /// # Ok::<(), tenferro_ad::Error>(())
1092    /// ```
1093    /// # Errors
1094    ///
1095    /// Returns [`tenferro_tensor::Error::Validation`] with `AxisOutOfBounds` or
1096    /// `DuplicateAxis` for an invalid reduction axis, or a typed
1097    /// unsupported/backend/runtime-state error for the selected dtype.
1098    pub fn reduce_min(&self, axes: Option<&[usize]>) -> Result<Self> {
1099        let axes = axes.map_or_else(|| (0..self.shape().len()).collect(), <[usize]>::to_vec);
1100        validate_eager_axes("EagerTensor::reduce_min", self.shape().len(), &axes)?;
1101        self.unary_op(StdTensorOp::ReduceMin { axes })
1102    }
1103
1104    pub(crate) fn unary_op(&self, op: StdTensorOp) -> Result<Self> {
1105        Self::nary_op(&[self], op)
1106    }
1107
1108    pub(crate) fn binary_op(&self, other: &Self, op: StdTensorOp) -> Result<Self> {
1109        Self::nary_op(&[self, other], op)
1110    }
1111
1112    pub(crate) fn ternary_op(&self, b: &Self, c: &Self, op: StdTensorOp) -> Result<Self> {
1113        Self::nary_op(&[self, b, c], op)
1114    }
1115
1116    pub(crate) fn nary_value_op(
1117        tensors: &[&Self],
1118        op: StdTensorOp,
1119        value: TensorValue,
1120    ) -> Result<Self> {
1121        let Some(first) = tensors.first() else {
1122            return Err(empty_nary_input_error(&op));
1123        };
1124
1125        let ctx = Arc::clone(&first.ctx);
1126        for tensor in tensors.iter().skip(1) {
1127            if !first.same_context(tensor) {
1128                return Err(Error::ContextMismatch {
1129                    lhs: first.ctx_id(),
1130                    rhs: tensor.ctx_id(),
1131                });
1132            }
1133        }
1134
1135        if !eager_grad_recording_enabled() {
1136            return Ok(Self::new_untracked_value_result(ctx, value));
1137        }
1138
1139        let output_ref = &value;
1140        let mut recorded = record_eager_value_outputs(&op, &[output_ref], tensors)?;
1141        let trace = recorded.traces.pop().ok_or_else(|| {
1142            Error::Internal(format!("expected one eager trace for {:?}, got 0", op))
1143        })?;
1144        let semantic_trace = recorded.semantic_traces.pop().flatten();
1145        let mut metadata_scopes = vec![Arc::clone(&recorded.metadata_scope)];
1146        for tensor in tensors {
1147            for scope in &tensor.metadata_scopes {
1148                push_metadata_scope(&mut metadata_scopes, Arc::clone(scope));
1149            }
1150        }
1151
1152        Self::new_result_value(
1153            ctx,
1154            trace.key,
1155            value,
1156            trace.requires_grad,
1157            trace.trace,
1158            semantic_trace,
1159            metadata_scopes,
1160        )
1161    }
1162
1163    pub(crate) fn nary_op(tensors: &[&Self], op: StdTensorOp) -> Result<Self> {
1164        let total_started = eager_op_profile_start();
1165        let Some(first) = tensors.first() else {
1166            return Err(empty_nary_input_error(&op));
1167        };
1168        let expected = op.input_count();
1169        if tensors.len() != expected {
1170            return Err(wrong_nary_input_count_error(&op, expected, tensors.len()));
1171        }
1172
1173        let ctx = Arc::clone(&first.ctx);
1174        profile_eager_op_section("nary_op.context_check", || -> Result<()> {
1175            for tensor in tensors.iter().skip(1) {
1176                if !first.same_context(tensor) {
1177                    return Err(Error::ContextMismatch {
1178                        lhs: first.ctx_id(),
1179                        rhs: tensor.ctx_id(),
1180                    });
1181                }
1182            }
1183            Ok(())
1184        })?;
1185
1186        let any_requires_grad = profile_eager_op_section("nary_op.requires_grad_scan", || {
1187            eager_grad_recording_enabled() && tensors.iter().any(|tensor| tensor.requires_grad)
1188        });
1189        if !eager_grad_recording_enabled() {
1190            let input_reads = profile_eager_op_section("nary_op.collect_input_reads", || {
1191                tensors
1192                    .iter()
1193                    .map(|tensor| tensor.tensor_read())
1194                    .collect::<Vec<_>>()
1195            });
1196            let output = profile_eager_op_section("nary_op.exec_single_output_read", || {
1197                exec_single_output_read(&op, &input_reads, &ctx)
1198            })?;
1199            let result = profile_eager_op_section("nary_op.new_untracked_result", || {
1200                Self::new_untracked_result(ctx, output)
1201            });
1202            if let Some(total_started) = total_started {
1203                record_eager_op_profile("nary_op.total", total_started.elapsed());
1204                maybe_print_eager_op_profile();
1205            }
1206            return result;
1207        }
1208
1209        if !any_requires_grad {
1210            let input_reads = profile_eager_op_section("nary_op.collect_input_reads", || {
1211                tensors
1212                    .iter()
1213                    .map(|tensor| tensor.tensor_read())
1214                    .collect::<Vec<_>>()
1215            });
1216            let output = profile_eager_op_section("nary_op.exec_single_output_read", || {
1217                exec_single_output_read(&op, &input_reads, &ctx)
1218            })?;
1219            let output = Arc::new(output);
1220            let outputs = vec![Arc::clone(&output)];
1221            let mut recorded =
1222                profile_eager_op_section("nary_op.record_untracked_outputs", || {
1223                    record_eager_outputs(&op, &outputs, tensors)
1224                })?;
1225            let trace = recorded.traces.pop().ok_or_else(|| {
1226                Error::Internal(format!("expected one eager trace for {:?}, got 0", op))
1227            })?;
1228            let semantic_trace = recorded.semantic_traces.pop().flatten();
1229            let mut metadata_scopes = vec![Arc::clone(&recorded.metadata_scope)];
1230            for tensor in tensors {
1231                for scope in &tensor.metadata_scopes {
1232                    push_metadata_scope(&mut metadata_scopes, Arc::clone(scope));
1233                }
1234            }
1235            let result = profile_eager_op_section("nary_op.new_untracked_semantic_result", || {
1236                Self::new_unregistered_result_arc_with_semantic_trace(
1237                    ctx,
1238                    trace.key,
1239                    output,
1240                    trace.requires_grad,
1241                    trace.trace,
1242                    semantic_trace,
1243                    metadata_scopes,
1244                )
1245            });
1246            if let Some(total_started) = total_started {
1247                record_eager_op_profile("nary_op.total", total_started.elapsed());
1248                maybe_print_eager_op_profile();
1249            }
1250            return result;
1251        }
1252
1253        let input_arcs = profile_eager_op_section("nary_op.materialize_inputs", || {
1254            tensors
1255                .iter()
1256                .map(|tensor| tensor.materialized_arc())
1257                .collect::<Result<Vec<_>>>()
1258        })?;
1259        let inputs: Vec<&Tensor> = profile_eager_op_section("nary_op.collect_inputs", || {
1260            input_arcs.iter().map(|tensor| tensor.as_ref()).collect()
1261        });
1262        let output = profile_eager_op_section("nary_op.exec_single_output", || {
1263            exec_single_output(&op, &inputs, &ctx)
1264        })?;
1265
1266        let output = Arc::new(output);
1267        let outputs = vec![Arc::clone(&output)];
1268        let mut recorded = profile_eager_op_section("nary_op.record_outputs", || {
1269            record_eager_outputs(&op, &outputs, tensors)
1270        })?;
1271        let trace = recorded.traces.pop().ok_or_else(|| {
1272            Error::Internal(format!("expected one eager trace for {:?}, got 0", op))
1273        })?;
1274        let semantic_trace = recorded.semantic_traces.pop().flatten();
1275        let mut metadata_scopes = vec![Arc::clone(&recorded.metadata_scope)];
1276        for tensor in tensors {
1277            for scope in &tensor.metadata_scopes {
1278                push_metadata_scope(&mut metadata_scopes, Arc::clone(scope));
1279            }
1280        }
1281
1282        let result = profile_eager_op_section("nary_op.new_tracked_result", || {
1283            Self::new_result_arc_with_semantic_trace(
1284                ctx,
1285                trace.key,
1286                output,
1287                trace.requires_grad,
1288                trace.trace,
1289                semantic_trace,
1290                metadata_scopes,
1291            )
1292        });
1293        if let Some(total_started) = total_started {
1294            record_eager_op_profile("nary_op.total", total_started.elapsed());
1295            maybe_print_eager_op_profile();
1296        }
1297        result
1298    }
1299}
1300
1301fn validate_eager_axes(op: &'static str, rank: usize, axes: &[usize]) -> Result<()> {
1302    tenferro_tensor::validate::validate_unique_axes(op, "axis", rank, axes)
1303        .map_err(Error::TensorRuntime)
1304}
1305
1306fn validate_eager_dot_general_config(
1307    _op: &'static str,
1308    config: &DotGeneralConfig,
1309    lhs_rank: usize,
1310    rhs_rank: usize,
1311) -> Result<()> {
1312    config
1313        .validate_dims_with_ranks(lhs_rank, rhs_rank)
1314        .map_err(Error::TensorRuntime)
1315}
1316
1317fn empty_nary_input_error(op: &StdTensorOp) -> Error {
1318    Error::TensorRuntime(tenferro_tensor::Error::invalid_argument(
1319        eager_validation_op_name(op),
1320        "inputs",
1321        "operation requires at least one input tensor",
1322    ))
1323}
1324
1325fn wrong_nary_input_count_error(op: &StdTensorOp, expected: usize, actual: usize) -> Error {
1326    Error::TensorRuntime(tenferro_tensor::Error::invalid_argument(
1327        eager_validation_op_name(op),
1328        "inputs",
1329        format!("operation expects {expected} inputs, got {actual}"),
1330    ))
1331}
1332
1333fn eager_validation_op_name(op: &StdTensorOp) -> &'static str {
1334    match op {
1335        StdTensorOp::Concatenate { .. } => "concatenate",
1336        _ => "eager_nary_op",
1337    }
1338}
1339
1340fn finite_real_factor(value: f64) -> Result<f64> {
1341    if value.is_finite() {
1342        Ok(value)
1343    } else {
1344        Err(Error::TensorRuntime(
1345            tenferro_tensor::Error::invalid_argument(
1346                "scale_real",
1347                "factor",
1348                format!("real scalar must be finite, got {value}"),
1349            ),
1350        ))
1351    }
1352}
1353
1354fn round_real_to_i64(value: f64) -> Result<i64> {
1355    let rounded = finite_real_factor(value)?.round();
1356    if rounded < i64::MIN as f64 || rounded >= -(i64::MIN as f64) {
1357        return Err(Error::TensorRuntime(
1358            tenferro_tensor::Error::invalid_argument(
1359                "scale_real",
1360                "factor",
1361                format!("rounded real scalar {rounded} is out of i64 range"),
1362            ),
1363        ));
1364    }
1365    Ok(rounded as i64)
1366}
1367
1368fn round_real_to_i32(value: f64) -> Result<i32> {
1369    let rounded = round_real_to_i64(value)?;
1370    i32::try_from(rounded).map_err(|_| {
1371        Error::TensorRuntime(tenferro_tensor::Error::invalid_argument(
1372            "scale_real",
1373            "factor",
1374            format!("rounded real scalar {rounded} is out of i32 range"),
1375        ))
1376    })
1377}
1378
1379fn bool_from_real(value: f64) -> Result<bool> {
1380    Ok(finite_real_factor(value)? != 0.0)
1381}
tenferro_ad/eager_ops.rs

tenferro_ad/
eager_ops.rs