tenferro_ad/
eager.rs

1use std::cell::{Cell, RefCell};
2use std::cmp::Reverse;
3use std::collections::HashMap;
4use std::env;
5use std::fmt;
6use std::mem::{size_of, size_of_val};
7#[cfg(test)]
8use std::sync::atomic::{AtomicUsize, Ordering};
9use std::sync::{Arc, Mutex, MutexGuard, OnceLock, Weak};
10use std::time::{Duration, Instant};
11
12use lru::LruCache;
13
14use crate::extension_cache::{ExtensionCacheLimits, ExtensionCacheSelector, ExtensionCacheStore};
15#[cfg(test)]
16use computegraph::graph::Graph;
17use computegraph::ValueKey;
18#[cfg(test)]
19use computegraph::ValueRef;
20use tenferro_cpu::{CpuBackend, CpuBackendError, CpuPlacement};
21#[cfg(feature = "cuda")]
22use tenferro_gpu::CudaBackend;
23#[cfg(feature = "webgpu")]
24use tenferro_gpu::WebGpuBackend;
25#[cfg(test)]
26use tenferro_ops::input_key::TensorInputKey;
27use tenferro_ops::{std_tensor_op::StdTensorOp, SymDim, TensorMeta};
28use tenferro_runtime::ad_support::{compile_ad_source, ones_tensor};
29use tenferro_runtime::program::{ProgramValueMetadata, SemanticFingerprint, SemanticProgram};
30use tenferro_runtime::{
31    CompiledGraph, CoreCapabilityBundle, EngineId, ErrorPhase, ExecutionContextIdentity,
32    ExtensionModule, GraphCompiler, HardwareClassId, PreparedCompiledGraph, RegistrationIdentity,
33    Runtime, RuntimeConfigError, RuntimeConfigSnapshot, RuntimeEpoch, TracedTensor,
34};
35#[cfg(test)]
36use tenferro_tensor::TypedTensor;
37use tenferro_tensor::{BackendSession, BackendSessionHost};
38use tenferro_tensor::{
39    CacheStats, DType, IntoShapeVec, Tensor, TensorBackend, TensorElementwise, TensorRead,
40    TensorScalar, TensorValue,
41};
42
43use crate::eager_backend::{
44    cpu_runtime_engine_id, cpu_runtime_hardware_class, eager_runtime_for_backend, EagerBackend,
45};
46#[cfg(test)]
47use crate::eager_exec::exec_standard_op_on_tensor_reads_in_session;
48use crate::eager_exec::{exec_op_on_tensor_reads_with_runtime, exec_op_on_tensors_with_runtime};
49use crate::error::{ContextId, Error, Result};
50#[cfg(test)]
51use crate::metadata::push_metadata_scope;
52use crate::metadata::{
53    metadata_scopes_for_scope, register_scoped_metadata_batch, register_scoped_value_metadata,
54    tensor_meta_from_tensor, GlobalMetadataScope,
55};
56use crate::semantic_extension::SemanticExtensionRuleSet;
57use crate::traced::{derivative_trace_from_frozen_program, next_input_key};
58use crate::transform_cache::{AdTransformCache, AdTransformCacheLimits};
59
60use crate::AdContext;
61
62pub(crate) type GradSlot = Arc<Mutex<Option<Arc<Tensor>>>>;
63pub(crate) type WeakGradSlot = Weak<Mutex<Option<Arc<Tensor>>>>;
64
65#[derive(Clone, Debug)]
66pub(crate) struct EagerTrace;
67
68#[cfg(test)]
69pub(crate) static CPU_RUNTIME_SELECTION_REFRESHES: AtomicUsize = AtomicUsize::new(0);
70
71struct CpuRuntimeSelection {
72    snapshot: Arc<RuntimeConfigSnapshot>,
73    epoch: RuntimeEpoch,
74    engine_id: EngineId,
75    registration_identity: RegistrationIdentity,
76    capabilities: CoreCapabilityBundle,
77}
78
79#[derive(Debug, Default, Clone)]
80struct EagerOpProfileEntry {
81    calls: usize,
82    total_time: Duration,
83}
84
85thread_local! {
86    static EAGER_OP_PROFILE_STATE: RefCell<HashMap<&'static str, EagerOpProfileEntry>> =
87        RefCell::new(HashMap::new());
88    static EAGER_NO_GRAD_DEPTH: Cell<usize> = const { Cell::new(0) };
89    #[cfg(test)]
90    static EAGER_OP_PROFILE_ENABLED_OVERRIDE: RefCell<Option<bool>> = const { RefCell::new(None) };
91    #[cfg(test)]
92    static EAGER_OP_PROFILE_PRINT_EVERY_OVERRIDE: RefCell<Option<Option<usize>>> = const { RefCell::new(None) };
93    #[cfg(test)]
94    static EAGER_SEMANTIC_VJP_ENABLED_OVERRIDE: RefCell<Option<bool>> = const { RefCell::new(None) };
95}
96
97#[cfg(test)]
98pub(crate) static EAGER_SEMANTIC_VJP_EXECUTIONS: AtomicUsize = AtomicUsize::new(0);
99
100pub(crate) fn eager_grad_recording_enabled() -> bool {
101    EAGER_NO_GRAD_DEPTH.with(|depth| depth.get() == 0)
102}
103
104fn eager_semantic_vjp_enabled() -> bool {
105    #[cfg(test)]
106    if let Some(value) = EAGER_SEMANTIC_VJP_ENABLED_OVERRIDE.with(|state| *state.borrow()) {
107        return value;
108    }
109
110    // Semantic eager VJP/JVP on by default (Unification 7).
111    // Set TENFERRO_EAGER_SEMANTIC_VJP=0 to disable.
112    static ENABLED: OnceLock<bool> = OnceLock::new();
113    *ENABLED.get_or_init(|| env::var("TENFERRO_EAGER_SEMANTIC_VJP").map_or(true, |v| v != "0"))
114}
115
116/// Scope guard that temporarily disables eager operation recording.
117///
118/// Values computed while this guard is alive are concrete eager tensors, but
119/// they do not participate in reverse-mode gradient tracking.
120///
121/// # Examples
122///
123/// ```
124/// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
125/// use tenferro_cpu::CpuBackend;
126///
127/// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
128/// let x = EagerTensor::requires_grad_in(
129///     Tensor::from_vec_col_major(vec![1], vec![2.0_f64]).unwrap(),
130///     ctx.clone(),
131/// )?;
132/// let y = {
133///     let _guard = ctx.no_grad();
134///     x.mul(&x)?
135/// };
136/// assert!(!y.tracks_grad());
137/// # Ok::<(), tenferro_ad::Error>(())
138/// ```
139#[derive(Debug)]
140pub struct EagerNoGradGuard {
141    active: bool,
142}
143
144impl Drop for EagerNoGradGuard {
145    fn drop(&mut self) {
146        if !self.active {
147            return;
148        }
149        EAGER_NO_GRAD_DEPTH.with(|depth| {
150            depth.set(depth.get().saturating_sub(1));
151        });
152        self.active = false;
153    }
154}
155
156pub(crate) fn eager_op_profile_enabled() -> bool {
157    #[cfg(test)]
158    if let Some(value) = EAGER_OP_PROFILE_ENABLED_OVERRIDE.with(|state| *state.borrow()) {
159        return value;
160    }
161
162    static ENABLED: OnceLock<bool> = OnceLock::new();
163    *ENABLED.get_or_init(|| env::var("TENFERRO_PROFILE_EAGER_OP_AGG").is_ok())
164}
165
166pub(crate) fn eager_op_profile_start() -> Option<Instant> {
167    eager_op_profile_enabled().then(Instant::now)
168}
169
170pub(crate) fn record_eager_op_profile(section: &'static str, elapsed: Duration) {
171    if !eager_op_profile_enabled() {
172        return;
173    }
174    EAGER_OP_PROFILE_STATE.with(|state| {
175        let mut state = state.borrow_mut();
176        let entry = state.entry(section).or_default();
177        entry.calls += 1;
178        entry.total_time += elapsed;
179    });
180}
181
182pub(crate) fn profile_eager_op_section<T>(section: &'static str, f: impl FnOnce() -> T) -> T {
183    if !eager_op_profile_enabled() {
184        return f();
185    }
186    let started = Instant::now();
187    let result = f();
188    record_eager_op_profile(section, started.elapsed());
189    result
190}
191
192pub(crate) fn maybe_print_eager_op_profile() {
193    if !eager_op_profile_enabled() {
194        return;
195    }
196    let Some(print_every) = eager_op_profile_print_every() else {
197        return;
198    };
199    if print_every == 0 {
200        return;
201    }
202
203    let should_print = EAGER_OP_PROFILE_STATE.with(|state| {
204        state
205            .borrow()
206            .get("nary_op.total")
207            .is_some_and(|entry| entry.calls % print_every == 0)
208    });
209    if should_print {
210        print_and_reset_eager_op_profile();
211    }
212}
213
214fn eager_op_profile_print_every() -> Option<usize> {
215    #[cfg(test)]
216    if let Some(value) = EAGER_OP_PROFILE_PRINT_EVERY_OVERRIDE.with(|state| *state.borrow()) {
217        return value;
218    }
219
220    env::var("TENFERRO_PROFILE_EAGER_OP_PRINT_EVERY")
221        .ok()?
222        .parse()
223        .ok()
224}
225
226pub(crate) fn print_and_reset_eager_op_profile() {
227    EAGER_OP_PROFILE_STATE.with(|state| {
228        let mut entries: Vec<_> = state
229            .borrow()
230            .iter()
231            .map(|(section, entry)| (*section, entry.clone()))
232            .collect();
233        state.borrow_mut().clear();
234        entries.sort_by_key(|(_, entry)| Reverse(entry.total_time));
235
236        eprintln!("=== tenferro eager op profile ===");
237        for (section, entry) in entries {
238            let Some(per_call_us) = eager_op_profile_per_call_us(&entry) else {
239                continue;
240            };
241            eprintln!(
242                "{section}: calls={} total={:.6}ms per_call={:.3}us",
243                entry.calls,
244                entry.total_time.as_secs_f64() * 1.0e3,
245                per_call_us,
246            );
247        }
248    });
249}
250
251fn eager_op_profile_per_call_us(entry: &EagerOpProfileEntry) -> Option<f64> {
252    (entry.calls != 0).then(|| entry.total_time.as_secs_f64() * 1.0e6 / entry.calls as f64)
253}
254
255fn runtime_config_error(op: &'static str, source: RuntimeConfigError) -> Error {
256    Error::runtime_state_source(op, ErrorPhase::Execution, source)
257}
258
259fn runtime_state_source<E>(op: &'static str, source: E) -> Error
260where
261    E: std::error::Error + Send + Sync + 'static,
262{
263    Error::runtime_state_source(op, ErrorPhase::Execution, source)
264}
265
266fn cpu_runtime_bridge_unsupported(message: impl Into<String>) -> Error {
267    Error::unsupported(
268        "CpuPlacementBoundEager::refresh_runtime_selection",
269        ErrorPhase::Execution,
270        message,
271    )
272}
273
274fn select_cpu_runtime(runtime: &Runtime) -> Result<CpuRuntimeSelection> {
275    let snapshot = runtime
276        .snapshot()
277        .map_err(|source| runtime_state_source("EagerRuntime::runtime_snapshot", source))?;
278    let engine_id = cpu_runtime_engine_id()
279        .map_err(|source| runtime_config_error("EagerRuntime::cpu_runtime_engine_id", source))?;
280    let expected_hardware = cpu_runtime_hardware_class().map_err(|source| {
281        runtime_config_error("EagerRuntime::cpu_runtime_hardware_class", source)
282    })?;
283    let engine = snapshot
284        .engine(&engine_id)
285        .ok_or_else(|| cpu_runtime_bridge_unsupported("missing CPU runtime engine"))?;
286    validate_cpu_runtime_engine(
287        engine.context_identity(),
288        engine.hardware_class(),
289        engine.capabilities(),
290        &expected_hardware,
291    )?;
292    let epoch = snapshot.epoch();
293    let registration_identity = engine.registration_identity();
294    let capabilities = engine.capabilities().clone();
295    Ok(CpuRuntimeSelection {
296        snapshot,
297        epoch,
298        engine_id,
299        registration_identity,
300        capabilities,
301    })
302}
303
304fn validate_cpu_runtime_engine(
305    context_identity: ExecutionContextIdentity,
306    hardware_class: &HardwareClassId,
307    capabilities: &CoreCapabilityBundle,
308    expected_hardware: &HardwareClassId,
309) -> Result<()> {
310    if context_identity != ExecutionContextIdentity::of::<CpuBackend>() {
311        return Err(cpu_runtime_bridge_unsupported(
312            "CPU runtime context mismatch",
313        ));
314    }
315    if hardware_class != expected_hardware {
316        return Err(cpu_runtime_bridge_unsupported(
317            "CPU runtime hardware mismatch",
318        ));
319    }
320    if capabilities.elementwise().is_none() {
321        return Err(cpu_runtime_bridge_unsupported(
322            "missing CPU runtime capability: elementwise",
323        ));
324    }
325    if capabilities.reduction().is_none() {
326        return Err(cpu_runtime_bridge_unsupported(
327            "missing CPU runtime capability: reduction",
328        ));
329    }
330    if capabilities.indexing().is_none() {
331        return Err(cpu_runtime_bridge_unsupported(
332            "missing CPU runtime capability: indexing",
333        ));
334    }
335    if capabilities.dot_general().is_none() {
336        return Err(cpu_runtime_bridge_unsupported(
337            "missing CPU runtime capability: dot_general",
338        ));
339    }
340    if capabilities.layout().is_none() {
341        return Err(cpu_runtime_bridge_unsupported(
342            "missing CPU runtime capability: layout",
343        ));
344    }
345    Ok(())
346}
347
348/// Stats for caches owned by an [`EagerRuntime`].
349///
350/// `retained_bytes` fields are logical payload estimates, not process RSS.
351#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
352pub struct EagerRuntimeCacheStats {
353    /// Generic extension runtime caches.
354    pub extensions: CacheStats,
355    /// Eager AD transform memoization cache.
356    pub ad_transforms: CacheStats,
357    /// Prepared eager derivative program cache.
358    pub prepared_derivatives: CacheStats,
359}
360
361#[cfg(test)]
362pub(crate) struct EagerGraphExecution {
363    pub(crate) outputs: Vec<Arc<Tensor>>,
364}
365
366/// Placement-selected CPU view of one [`EagerRuntime`].
367///
368/// The view snapshots the runtime's CPU coordinator/provider bundle and the
369/// immutable runtime registration metadata when [`EagerRuntime::on_cpu`] is
370/// called. It holds no resource permit while idle and enters one backend
371/// session only while [`Self::with_eager_session`] runs. The session exposes
372/// core [`BackendSession`] operations on concrete [`Tensor`] values. This
373/// bridge deliberately does not expose the eager runtime's linalg, FFT, einsum,
374/// or extension-runtime registries.
375///
376/// The value is intentionally not `Clone`: mutable use makes concurrent
377/// session ownership explicit without adding another backend mutex.
378///
379/// # Examples
380///
381/// ```rust
382/// use tenferro_ad::EagerRuntime;
383/// use tenferro_cpu::CpuPlacement;
384///
385/// let runtime = EagerRuntime::new()?;
386/// let cpu = runtime.on_cpu(CpuPlacement::Auto)?;
387/// assert_eq!(cpu.runtime_id(), runtime.id());
388/// # Ok::<(), tenferro_ad::Error>(())
389/// ```
390pub struct CpuPlacementBoundEager {
391    runtime: Arc<EagerRuntime>,
392    backend: CpuBackend,
393    snapshot: Arc<RuntimeConfigSnapshot>,
394    epoch: RuntimeEpoch,
395    engine_id: EngineId,
396    registration_identity: RegistrationIdentity,
397    capabilities: CoreCapabilityBundle,
398}
399
400impl fmt::Debug for CpuPlacementBoundEager {
401    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
402        f.debug_struct("CpuPlacementBoundEager")
403            .field("runtime_id", &self.runtime.id())
404            .field("placement", &self.backend.placement())
405            .field("runtime_epoch", &self.epoch)
406            .field("engine_id", &self.engine_id)
407            .field("registration_identity", &self.registration_identity)
408            .finish_non_exhaustive()
409    }
410}
411
412impl CpuPlacementBoundEager {
413    fn refresh_runtime_selection(&mut self) -> Result<()> {
414        let current_epoch = self.runtime.runtime.epoch().map_err(|source| {
415            runtime_state_source("CpuPlacementBoundEager::refresh_runtime_selection", source)
416        })?;
417        if current_epoch == self.epoch {
418            return Ok(());
419        }
420
421        #[cfg(test)]
422        CPU_RUNTIME_SELECTION_REFRESHES.fetch_add(1, Ordering::SeqCst);
423
424        let selection = select_cpu_runtime(&self.runtime.runtime)?;
425        self.snapshot = selection.snapshot;
426        self.epoch = selection.epoch;
427        self.engine_id = selection.engine_id;
428        self.registration_identity = selection.registration_identity;
429        self.capabilities = selection.capabilities;
430        Ok(())
431    }
432
433    /// Return the identity of the original eager runtime.
434    ///
435    /// # Examples
436    ///
437    /// ```rust
438    /// use tenferro_ad::EagerRuntime;
439    /// use tenferro_cpu::CpuPlacement;
440    ///
441    /// let runtime = EagerRuntime::new()?;
442    /// let cpu = runtime.on_cpu(CpuPlacement::Auto)?;
443    /// assert_eq!(cpu.runtime_id(), runtime.id());
444    /// # Ok::<(), tenferro_ad::Error>(())
445    /// ```
446    pub fn runtime_id(&self) -> ContextId {
447        self.runtime.id()
448    }
449
450    /// Return the placement requested when this view was created.
451    ///
452    /// # Examples
453    ///
454    /// ```rust
455    /// use tenferro_ad::EagerRuntime;
456    /// use tenferro_cpu::CpuPlacement;
457    ///
458    /// let runtime = EagerRuntime::new()?;
459    /// let cpu = runtime.on_cpu(CpuPlacement::Auto)?;
460    /// assert_eq!(cpu.placement(), CpuPlacement::Auto);
461    /// # Ok::<(), tenferro_ad::Error>(())
462    /// ```
463    pub fn placement(&self) -> CpuPlacement {
464        self.backend.placement()
465    }
466
467    /// Enter one CPU backend session and run core operations through it.
468    ///
469    /// One call creates one backend session. Tenferro-managed CPU executors
470    /// enter once around the closure and core operations reuse that compatible
471    /// execution scope. The closure may borrow stack data and need not be
472    /// `'static`.
473    ///
474    /// This phase-2 bridge accepts only core [`BackendSession`] operations. It
475    /// does not lock or dispatch the eager runtime's linalg, FFT, einsum, or
476    /// extension registries.
477    ///
478    /// # Examples
479    ///
480    /// ```rust
481    /// use tenferro_ad::{EagerRuntime, Error};
482    /// use tenferro_cpu::CpuPlacement;
483    /// use tenferro_tensor::{Tensor, TensorElementwise};
484    ///
485    /// let runtime = EagerRuntime::new()?;
486    /// let mut cpu = runtime.on_cpu(CpuPlacement::Auto)?;
487    /// let lhs = Tensor::from_vec_col_major(vec![1], vec![1.0_f64])?;
488    /// let rhs = Tensor::from_vec_col_major(vec![1], vec![2.0_f64])?;
489    /// let output = cpu.with_eager_session(|session| {
490    ///     TensorElementwise::add(session, &lhs, &rhs).map_err(Error::from)
491    /// })?;
492    /// assert_eq!(output.as_slice::<f64>().unwrap(), &[3.0]);
493    /// # Ok::<(), Error>(())
494    /// ```
495    ///
496    /// # Errors
497    ///
498    /// Returns the callback's [`Error`] unchanged. Core backend operations may
499    /// report validation, unsupported capability, backend, or runtime-state
500    /// failures through that error.
501    ///
502    /// # Panics
503    ///
504    /// The existing CPU backend re-entry guard panics if the callback enters a
505    /// public `CpuBackend` or calls an ordinary `EagerTensor` operation on this
506    /// same runtime. Use only the borrowed `session` for work inside the scope.
507    pub fn with_eager_session<R: Send>(
508        &mut self,
509        f: impl FnOnce(&mut dyn BackendSession) -> Result<R> + Send,
510    ) -> Result<R> {
511        self.refresh_runtime_selection()?;
512        self.backend.with_backend_session(f)
513    }
514}
515
516/// Shared eager execution context for tensors on a backend.
517///
518/// Reusing one context lets eager tensors share backend state, extension
519/// runtime caches, and gradient storage across a computation.
520///
521/// # Examples
522///
523/// ```
524/// use tenferro_cpu::CpuBackend;
525/// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
526///
527/// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
528/// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![1], vec![1.0_f64]).unwrap(), ctx.clone()).unwrap();
529/// let y = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![1], vec![2.0_f64]).unwrap(), ctx).unwrap();
530/// let z = x.add(&y).unwrap();
531///
532/// assert_eq!(z.materialized().unwrap().as_slice::<f64>().unwrap(), &[3.0]);
533/// # Ok::<(), tenferro_ad::Error>(())
534/// ```
535pub struct EagerRuntime {
536    id: ContextId,
537    runtime: Runtime,
538    // The backend and its exact runtime engine registration are selected
539    // together during construction and remain paired for this runtime's
540    // lifetime. The mutex only serializes mutable backend operations.
541    backend: Mutex<EagerBackend>,
542    extension_install_lock: Mutex<()>,
543    pub(crate) extension_caches: Mutex<ExtensionCacheStore>,
544    semantic_extension_rules: SemanticExtensionRuleSet,
545    grad_slots: Mutex<HashMap<ValueKey<StdTensorOp>, WeakGradSlot>>,
546    value_records: Mutex<HashMap<ValueKey<StdTensorOp>, Weak<EagerTensorRecord>>>,
547    value_ptr_records: Mutex<HashMap<usize, Weak<EagerTensorRecord>>>,
548    ad_transform_cache: Arc<AdTransformCache>,
549    /// S2: prepared derivative programs keyed by semantic structure, wrt input,
550    /// and concrete bound input metadata. Avoids re-running freeze+AD
551    /// transform+compile_frozen on warm structure hits.
552    prepared_derivative_cache: Mutex<PreparedDerivativeCache>,
553}
554
555impl fmt::Debug for EagerRuntime {
556    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
557        let mut debug = f.debug_struct("EagerRuntime");
558        debug.field("id", &self.id);
559        debug.field("runtime_id", &self.runtime.id());
560        debug.field("runtime_epoch", &self.runtime.epoch().ok());
561        match self.backend.try_lock() {
562            Ok(backend) => {
563                debug.field("backend", &*backend);
564            }
565            Err(_) => {
566                debug.field("backend", &"<locked>");
567            }
568        }
569        match self.extension_caches.try_lock() {
570            Ok(caches) => {
571                debug.field(
572                    "extension_cache_stats",
573                    &caches.stats(ExtensionCacheSelector::All),
574                );
575            }
576            Err(_) => {
577                debug.field("extension_cache_stats", &"<locked>");
578            }
579        }
580        match self.extension_install_lock.try_lock() {
581            Ok(_) => {
582                debug.field("extension_install_lock", &"<unlocked>");
583            }
584            Err(_) => {
585                debug.field("extension_install_lock", &"<locked>");
586            }
587        }
588        debug.field("semantic_extension_rules", &self.semantic_extension_rules);
589        match self.grad_slots.try_lock() {
590            Ok(slots) => {
591                debug.field("grad_slots_len", &slots.len());
592            }
593            Err(_) => {
594                debug.field("grad_slots_len", &"<locked>");
595            }
596        }
597        match self.value_records.try_lock() {
598            Ok(records) => {
599                debug.field("value_records_len", &records.len());
600            }
601            Err(_) => {
602                debug.field("value_records_len", &"<locked>");
603            }
604        }
605        match self.value_ptr_records.try_lock() {
606            Ok(records) => {
607                debug.field("value_ptr_records_len", &records.len());
608            }
609            Err(_) => {
610                debug.field("value_ptr_records_len", &"<locked>");
611            }
612        }
613        match self.ad_transform_cache.stats() {
614            Ok(stats) => {
615                debug.field("ad_transform_cache_stats", &stats);
616            }
617            Err(err) => {
618                debug.field("ad_transform_cache_stats", &format_args!("{err}"));
619            }
620        }
621        match self.prepared_derivative_cache.try_lock() {
622            Ok(cache) => {
623                debug.field("prepared_derivative_cache_stats", &cache.stats());
624            }
625            Err(_) => {
626                debug.field("prepared_derivative_cache_stats", &"<locked>");
627            }
628        }
629        debug.finish_non_exhaustive()
630    }
631}
632
633impl EagerRuntime {
634    pub(crate) fn lock_backend(&self) -> Result<MutexGuard<'_, EagerBackend>> {
635        self.backend.lock().map_err(|_| {
636            Error::runtime_state("eager_backend", ErrorPhase::Execution, "lock poisoned")
637        })
638    }
639
640    fn lock_extension_caches(&self) -> Result<MutexGuard<'_, ExtensionCacheStore>> {
641        self.extension_caches.lock().map_err(|_| {
642            Error::runtime_state(
643                "eager_extension_caches",
644                ErrorPhase::Execution,
645                "lock poisoned",
646            )
647        })
648    }
649
650    fn lock_extension_install(&self) -> Result<MutexGuard<'_, ()>> {
651        self.extension_install_lock.lock().map_err(|_| {
652            Error::runtime_state(
653                "eager_extension_install",
654                ErrorPhase::Execution,
655                "lock poisoned",
656            )
657        })
658    }
659
660    fn lock_prepared_derivative_cache(&self) -> Result<MutexGuard<'_, PreparedDerivativeCache>> {
661        self.prepared_derivative_cache.lock().map_err(|_| {
662            Error::runtime_state(
663                "prepared_derivative_cache",
664                ErrorPhase::Execution,
665                "lock poisoned",
666            )
667        })
668    }
669
670    fn lock_grad_slots(
671        &self,
672    ) -> Result<MutexGuard<'_, HashMap<ValueKey<StdTensorOp>, WeakGradSlot>>> {
673        self.grad_slots.lock().map_err(|_| {
674            Error::runtime_state(
675                "eager_gradient_slots",
676                ErrorPhase::Execution,
677                "lock poisoned",
678            )
679        })
680    }
681
682    fn lock_value_records(
683        &self,
684    ) -> Result<MutexGuard<'_, HashMap<ValueKey<StdTensorOp>, Weak<EagerTensorRecord>>>> {
685        self.value_records.lock().map_err(|_| {
686            Error::runtime_state(
687                "eager_value_registry",
688                ErrorPhase::Execution,
689                "lock poisoned",
690            )
691        })
692    }
693
694    fn lock_value_ptr_records(
695        &self,
696    ) -> Result<MutexGuard<'_, HashMap<usize, Weak<EagerTensorRecord>>>> {
697        self.value_ptr_records.lock().map_err(|_| {
698            Error::runtime_state(
699                "eager_value_pointer_registry",
700                ErrorPhase::Execution,
701                "lock poisoned",
702            )
703        })
704    }
705
706    fn from_backend(backend: EagerBackend) -> Result<Self> {
707        Self::from_backend_with_rules_and_cache(
708            backend,
709            SemanticExtensionRuleSet::default(),
710            Arc::new(AdTransformCache::new()),
711        )
712    }
713
714    fn from_backend_with_rules_and_cache(
715        backend: EagerBackend,
716        semantic_extension_rules: SemanticExtensionRuleSet,
717        ad_transform_cache: Arc<AdTransformCache>,
718    ) -> Result<Self> {
719        let runtime = eager_runtime_for_backend(&backend)
720            .map_err(|source| runtime_config_error("EagerRuntime::from_backend", source))?;
721        Ok(Self {
722            id: ContextId::fresh(),
723            runtime,
724            backend: Mutex::new(backend),
725            extension_install_lock: Mutex::new(()),
726            extension_caches: Mutex::new(ExtensionCacheStore::new()),
727            semantic_extension_rules,
728            grad_slots: Mutex::new(HashMap::new()),
729            value_records: Mutex::new(HashMap::new()),
730            value_ptr_records: Mutex::new(HashMap::new()),
731            ad_transform_cache,
732            prepared_derivative_cache: Mutex::new(PreparedDerivativeCache::default()),
733        })
734    }
735
736    /// Create a shared CPU eager execution context.
737    ///
738    /// # Examples
739    ///
740    /// ```
741    /// use tenferro_ad::EagerRuntime;
742    ///
743    /// let ctx = EagerRuntime::new()?;
744    /// assert_eq!(std::sync::Arc::strong_count(&ctx), 1);
745    /// # Ok::<(), tenferro_ad::Error>(())
746    /// ```
747    ///
748    /// # Errors
749    ///
750    /// Returns [`Error::RuntimeStateSource`] when provider runtime
751    /// registration cannot be configured, preserving the underlying
752    /// [`RuntimeConfigError`] as the typed error source.
753    pub fn new() -> Result<Arc<Self>> {
754        Self::with_cpu_backend(CpuBackend::new())
755    }
756
757    /// Create a shared eager execution context from a configured CPU backend.
758    ///
759    /// # Examples
760    ///
761    /// ```
762    /// use tenferro_cpu::CpuBackend;
763    /// use tenferro_ad::{EagerRuntime};
764    ///
765    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::with_threads(1)?)?;
766    /// assert_eq!(std::sync::Arc::strong_count(&ctx), 1);
767    /// # Ok::<(), Box<dyn std::error::Error>>(())
768    /// ```
769    ///
770    /// # Errors
771    ///
772    /// Returns [`Error::RuntimeStateSource`] when provider runtime
773    /// registration cannot be configured, preserving the underlying
774    /// [`RuntimeConfigError`] as the typed error source.
775    pub fn with_cpu_backend(backend: CpuBackend) -> Result<Arc<Self>> {
776        Ok(Arc::new(Self::from_backend(EagerBackend::cpu(backend))?))
777    }
778
779    /// Snapshot a placement-selected CPU handle from this eager runtime.
780    ///
781    /// The eager backend lock is held only long enough to verify the backend
782    /// kind and clone its CPU coordinator/provider snapshot. Placement
783    /// resolution happens after that guard is dropped. The returned value does
784    /// not hold a resource permit or a second runtime/backend mutex while idle.
785    ///
786    /// # Examples
787    ///
788    /// ```rust
789    /// use tenferro_ad::EagerRuntime;
790    /// use tenferro_cpu::CpuPlacement;
791    ///
792    /// let runtime = EagerRuntime::new()?;
793    /// let cpu = runtime.on_cpu(CpuPlacement::Auto)?;
794    /// assert_eq!(cpu.runtime_id(), runtime.id());
795    /// # Ok::<(), tenferro_ad::Error>(())
796    /// ```
797    ///
798    /// # Errors
799    ///
800    /// Returns [`Error::RuntimeState`] if the eager backend lock is poisoned,
801    /// [`Error::Unsupported`] if the runtime is not CPU-backed, or a typed
802    /// tensor runtime error retaining [`tenferro_cpu::CpuPlacementError`] when
803    /// the requested placement cannot be resolved.
804    pub fn on_cpu(self: &Arc<Self>, placement: CpuPlacement) -> Result<CpuPlacementBoundEager> {
805        let backend = {
806            let backend = self.lock_backend()?;
807            backend.cpu_snapshot().ok_or_else(|| {
808                Error::unsupported(
809                    "EagerRuntime::on_cpu",
810                    ErrorPhase::Execution,
811                    "the eager runtime is not CPU-backed",
812                )
813            })?
814        };
815        let selection = select_cpu_runtime(&self.runtime)?;
816        let backend = backend.for_placement(placement).map_err(|source| {
817            let error: tenferro_tensor::Error = CpuBackendError::Placement {
818                op: "EagerRuntime::on_cpu",
819                source,
820            }
821            .into();
822            Error::from(error)
823        })?;
824        Ok(CpuPlacementBoundEager {
825            runtime: Arc::clone(self),
826            backend,
827            snapshot: selection.snapshot,
828            epoch: selection.epoch,
829            engine_id: selection.engine_id,
830            registration_identity: selection.registration_identity,
831            capabilities: selection.capabilities,
832        })
833    }
834
835    /// Create a shared CPU eager context with explicit AD extension rules.
836    ///
837    /// # Examples
838    ///
839    /// ```rust
840    /// use tenferro_cpu::CpuBackend;
841    /// use tenferro_ad::{AdContext, EagerRuntime};
842    ///
843    /// let ad = AdContext::builder().build().unwrap();
844    /// let ctx = EagerRuntime::with_cpu_backend_and_ad_context(CpuBackend::new(), &ad)?;
845    /// assert_eq!(std::sync::Arc::strong_count(&ctx), 1);
846    /// # Ok::<(), tenferro_ad::Error>(())
847    /// ```
848    ///
849    /// # Errors
850    ///
851    /// Returns [`Error::RuntimeStateSource`] when provider runtime
852    /// registration cannot be configured, preserving the underlying
853    /// [`RuntimeConfigError`] as the typed error source.
854    pub fn with_cpu_backend_and_ad_context(
855        backend: CpuBackend,
856        ad: &AdContext,
857    ) -> Result<Arc<Self>> {
858        Ok(Arc::new(Self::from_backend_with_rules_and_cache(
859            EagerBackend::cpu(backend),
860            ad.semantic_extension_rules().clone(),
861            ad.ad_transform_cache(),
862        )?))
863    }
864
865    /// Create a shared eager execution context from a configured CUDA backend.
866    ///
867    /// # Examples
868    ///
869    /// ```
870    /// use tenferro_gpu::CudaBackend;
871    /// use tenferro_ad::EagerRuntime;
872    ///
873    /// let _ctor: fn(CudaBackend) -> tenferro_ad::Result<std::sync::Arc<EagerRuntime>> =
874    ///     EagerRuntime::with_cuda_backend;
875    /// ```
876    #[cfg(feature = "cuda")]
877    ///
878    /// # Errors
879    ///
880    /// Returns [`Error::RuntimeStateSource`] when provider runtime
881    /// registration cannot be configured, preserving the underlying
882    /// [`RuntimeConfigError`] as the typed error source.
883    pub fn with_cuda_backend(backend: CudaBackend) -> Result<Arc<Self>> {
884        Ok(Arc::new(Self::from_backend(EagerBackend::cuda(backend))?))
885    }
886
887    /// Create a shared CUDA eager context with explicit AD extension rules.
888    ///
889    /// # Examples
890    ///
891    /// ```rust
892    /// use tenferro_ad::{AdContext, EagerRuntime};
893    /// use tenferro_gpu::CudaBackend;
894    ///
895    /// let _ctor: fn(CudaBackend, &AdContext) -> tenferro_ad::Result<std::sync::Arc<EagerRuntime>> =
896    ///     EagerRuntime::with_cuda_backend_and_ad_context;
897    /// ```
898    #[cfg(feature = "cuda")]
899    ///
900    /// # Errors
901    ///
902    /// Returns [`Error::RuntimeStateSource`] when provider runtime
903    /// registration cannot be configured, preserving the underlying
904    /// [`RuntimeConfigError`] as the typed error source.
905    pub fn with_cuda_backend_and_ad_context(
906        backend: CudaBackend,
907        ad: &AdContext,
908    ) -> Result<Arc<Self>> {
909        Ok(Arc::new(Self::from_backend_with_rules_and_cache(
910            EagerBackend::cuda(backend),
911            ad.semantic_extension_rules().clone(),
912            ad.ad_transform_cache(),
913        )?))
914    }
915
916    /// Create a shared eager execution context from a configured WebGPU backend.
917    ///
918    /// # Examples
919    ///
920    /// ```
921    /// use tenferro_ad::EagerRuntime;
922    /// use tenferro_gpu::WebGpuBackend;
923    ///
924    /// let _ctor: fn(WebGpuBackend) -> tenferro_ad::Result<std::sync::Arc<EagerRuntime>> =
925    ///     EagerRuntime::with_webgpu_backend;
926    /// ```
927    #[cfg(feature = "webgpu")]
928    ///
929    /// # Errors
930    ///
931    /// Returns [`Error::RuntimeStateSource`] when provider runtime
932    /// registration cannot be configured, preserving the underlying
933    /// [`RuntimeConfigError`] as the typed error source.
934    pub fn with_webgpu_backend(backend: WebGpuBackend) -> Result<Arc<Self>> {
935        Ok(Arc::new(Self::from_backend(EagerBackend::webgpu(backend))?))
936    }
937
938    /// Create a shared WebGPU eager context with explicit AD extension rules.
939    ///
940    /// # Examples
941    ///
942    /// ```rust
943    /// use tenferro_ad::{AdContext, EagerRuntime};
944    /// use tenferro_gpu::WebGpuBackend;
945    ///
946    /// let _ctor: fn(WebGpuBackend, &AdContext) -> tenferro_ad::Result<std::sync::Arc<EagerRuntime>> =
947    ///     EagerRuntime::with_webgpu_backend_and_ad_context;
948    /// ```
949    #[cfg(feature = "webgpu")]
950    ///
951    /// # Errors
952    ///
953    /// Returns [`Error::RuntimeStateSource`] when provider runtime
954    /// registration cannot be configured, preserving the underlying
955    /// [`RuntimeConfigError`] as the typed error source.
956    pub fn with_webgpu_backend_and_ad_context(
957        backend: WebGpuBackend,
958        ad: &AdContext,
959    ) -> Result<Arc<Self>> {
960        Ok(Arc::new(Self::from_backend_with_rules_and_cache(
961            EagerBackend::webgpu(backend),
962            ad.semantic_extension_rules().clone(),
963            ad.ad_transform_cache(),
964        )?))
965    }
966
967    /// Return an opaque identifier for this context.
968    ///
969    /// # Examples
970    ///
971    /// ```
972    /// use tenferro_cpu::CpuBackend;
973    /// use tenferro_ad::{EagerRuntime};
974    ///
975    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
976    /// assert_ne!(ctx.id(), EagerRuntime::with_cpu_backend(CpuBackend::new())?.id());
977    /// # Ok::<(), tenferro_ad::Error>(())
978    /// ```
979    pub fn id(&self) -> ContextId {
980        self.id
981    }
982
983    /// Disable eager operation recording on the current thread until the guard is dropped.
984    ///
985    /// This is useful for optimizer updates, metric calculations, and other
986    /// eager computations that should not become part of the AD tape.
987    ///
988    /// # Examples
989    ///
990    /// ```
991    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
992    /// use tenferro_cpu::CpuBackend;
993    ///
994    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
995    /// let x = EagerTensor::requires_grad_in(
996    ///     Tensor::from_vec_col_major(vec![1], vec![3.0_f64]).unwrap(),
997    ///     ctx.clone(),
998    /// )?;
999    /// let y = {
1000    ///     let _guard = ctx.no_grad();
1001    ///     x.mul(&x)?
1002    /// };
1003    /// assert!(!y.tracks_grad());
1004    /// # Ok::<(), tenferro_ad::Error>(())
1005    /// ```
1006    pub fn no_grad(&self) -> EagerNoGradGuard {
1007        EAGER_NO_GRAD_DEPTH.with(|depth| {
1008            depth.set(depth.get().saturating_add(1));
1009        });
1010        EagerNoGradGuard { active: true }
1011    }
1012
1013    /// Install or replace one extension module on this eager context's runtime.
1014    ///
1015    /// Eager extension wrappers call this as an idempotent "ensure installed"
1016    /// step. The eager context serializes this path so parallel first-use of the
1017    /// same extension family cannot publish over another thread's base snapshot.
1018    ///
1019    /// # Errors
1020    ///
1021    /// Returns [`tenferro_runtime::Error::RuntimeState`] when runtime
1022    /// reconfiguration fails or the extension module transaction is invalid.
1023    pub fn install_extension_module(
1024        &self,
1025        module: Arc<dyn ExtensionModule>,
1026    ) -> Result<RuntimeEpoch> {
1027        let _install_guard = self.lock_extension_install()?;
1028        self.runtime
1029            .reconfigure(|edit| {
1030                edit.replace_extension_module(module)?;
1031                Ok(())
1032            })
1033            .map_err(|source| {
1034                runtime_state_source("EagerRuntime::install_extension_module", source)
1035            })
1036    }
1037
1038    /// Clear generic extension runtime cache entries.
1039    ///
1040    /// # Examples
1041    ///
1042    /// ```
1043    /// use tenferro_cpu::CpuBackend;
1044    /// use tenferro_ad::{EagerRuntime};
1045    ///
1046    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1047    /// ctx.clear_extension_caches()?;
1048    /// assert_eq!(ctx.cache_stats()?.extensions.entries, 0);
1049    /// # Ok::<(), tenferro_ad::Error>(())
1050    /// ```
1051    ///
1052    /// # Errors
1053    ///
1054    /// Returns [`tenferro_runtime::Error::RuntimeState`] when the extension
1055    /// cache lock is poisoned.
1056    pub fn clear_extension_caches(&self) -> Result<()> {
1057        self.lock_extension_caches()?.clear();
1058        Ok(())
1059    }
1060
1061    /// Clear every cache owned by this eager context.
1062    ///
1063    /// # Examples
1064    ///
1065    /// ```
1066    /// use tenferro_cpu::CpuBackend;
1067    /// use tenferro_ad::{EagerRuntime};
1068    ///
1069    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1070    /// ctx.clear_caches()?;
1071    /// assert_eq!(ctx.cache_stats()?.extensions.entries, 0);
1072    /// assert_eq!(ctx.cache_stats()?.ad_transforms.entries, 0);
1073    /// assert_eq!(ctx.cache_stats()?.prepared_derivatives.entries, 0);
1074    /// # Ok::<(), tenferro_ad::Error>(())
1075    /// ```
1076    ///
1077    /// # Errors
1078    ///
1079    /// Returns [`tenferro_runtime::Error::RuntimeState`] when either the
1080    /// extension cache or AD-transform cache is poisoned.
1081    pub fn clear_caches(&self) -> Result<()> {
1082        self.clear_extension_caches()?;
1083        self.clear_ad_transform_caches()?;
1084        self.clear_prepared_derivative_cache()?;
1085        Ok(())
1086    }
1087
1088    /// Clear prepared derivative program cache entries.
1089    ///
1090    /// # Examples
1091    ///
1092    /// ```rust
1093    /// use tenferro_ad::EagerRuntime;
1094    /// use tenferro_cpu::CpuBackend;
1095    ///
1096    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1097    /// ctx.clear_prepared_derivative_cache()?;
1098    /// assert_eq!(ctx.cache_stats()?.prepared_derivatives.entries, 0);
1099    /// # Ok::<(), tenferro_ad::Error>(())
1100    /// ```
1101    ///
1102    /// # Errors
1103    ///
1104    /// Returns [`tenferro_runtime::Error::RuntimeState`] if the prepared
1105    /// derivative cache lock is poisoned.
1106    pub fn clear_prepared_derivative_cache(&self) -> Result<()> {
1107        self.lock_prepared_derivative_cache()?.clear();
1108        Ok(())
1109    }
1110
1111    /// Return eager runtime cache-entry and retained-byte stats.
1112    ///
1113    /// # Examples
1114    ///
1115    /// ```
1116    /// use tenferro_cpu::CpuBackend;
1117    /// use tenferro_ad::{EagerRuntime};
1118    ///
1119    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1120    /// let stats = ctx.cache_stats()?;
1121    /// assert_eq!(stats.extensions.entries, 0);
1122    /// assert_eq!(stats.ad_transforms.entries, 0);
1123    /// assert_eq!(stats.prepared_derivatives.entries, 0);
1124    /// # Ok::<(), tenferro_ad::Error>(())
1125    /// ```
1126    ///
1127    /// # Errors
1128    ///
1129    /// Returns [`tenferro_runtime::Error::RuntimeState`] when a cache or
1130    /// AD-transform cache lock is poisoned.
1131    pub fn cache_stats(&self) -> Result<EagerRuntimeCacheStats> {
1132        Ok(EagerRuntimeCacheStats {
1133            extensions: self
1134                .lock_extension_caches()?
1135                .stats(ExtensionCacheSelector::All),
1136            ad_transforms: self.ad_transform_cache.stats()?,
1137            prepared_derivatives: self.lock_prepared_derivative_cache()?.stats(),
1138        })
1139    }
1140
1141    /// Return the AD transform cache retention limits.
1142    ///
1143    /// # Examples
1144    ///
1145    /// ```
1146    /// use tenferro_ad::EagerRuntime;
1147    /// use tenferro_cpu::CpuBackend;
1148    ///
1149    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1150    /// assert!(ctx.ad_transform_cache_limits()?.max_entries().get() > 0);
1151    /// # Ok::<(), tenferro_ad::Error>(())
1152    /// ```
1153    ///
1154    /// # Errors
1155    ///
1156    /// Returns [`tenferro_runtime::Error::RuntimeState`] if the AD-transform
1157    /// cache lock is poisoned.
1158    pub fn ad_transform_cache_limits(&self) -> Result<AdTransformCacheLimits> {
1159        self.ad_transform_cache.limits()
1160    }
1161
1162    /// Replace AD transform cache retention limits.
1163    ///
1164    /// # Examples
1165    ///
1166    /// ```
1167    /// use std::num::NonZeroUsize;
1168    /// use tenferro_ad::{AdTransformCacheLimits, EagerRuntime};
1169    /// use tenferro_cpu::CpuBackend;
1170    ///
1171    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1172    /// let limits = AdTransformCacheLimits::new(NonZeroUsize::new(1).unwrap());
1173    /// ctx.set_ad_transform_cache_limits(limits)?;
1174    /// assert_eq!(ctx.ad_transform_cache_limits()?, limits);
1175    /// # Ok::<(), tenferro_ad::Error>(())
1176    /// ```
1177    ///
1178    /// # Errors
1179    ///
1180    /// Returns [`tenferro_runtime::Error::RuntimeState`] if the AD-transform
1181    /// cache lock is poisoned while updating limits.
1182    pub fn set_ad_transform_cache_limits(&self, limits: AdTransformCacheLimits) -> Result<()> {
1183        self.ad_transform_cache.set_limits(limits)
1184    }
1185
1186    /// Clear AD transform cache entries visible through this eager runtime.
1187    ///
1188    /// # Examples
1189    ///
1190    /// ```
1191    /// use tenferro_ad::EagerRuntime;
1192    /// use tenferro_cpu::CpuBackend;
1193    ///
1194    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1195    /// ctx.clear_ad_transform_caches()?;
1196    /// assert_eq!(ctx.cache_stats()?.ad_transforms.entries, 0);
1197    /// # Ok::<(), tenferro_ad::Error>(())
1198    /// ```
1199    ///
1200    /// # Errors
1201    ///
1202    /// Returns [`tenferro_runtime::Error::RuntimeState`] if the AD-transform
1203    /// cache lock is poisoned while clearing entries.
1204    pub fn clear_ad_transform_caches(&self) -> Result<()> {
1205        self.ad_transform_cache.clear()
1206    }
1207
1208    /// Return prepared derivative cache retention limits.
1209    ///
1210    /// # Examples
1211    ///
1212    /// ```rust
1213    /// use tenferro_ad::EagerRuntime;
1214    /// use tenferro_cpu::CpuBackend;
1215    ///
1216    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1217    /// assert!(ctx.prepared_derivative_cache_limits()?.max_entries().get() > 0);
1218    /// # Ok::<(), tenferro_ad::Error>(())
1219    /// ```
1220    ///
1221    /// # Errors
1222    ///
1223    /// Returns [`tenferro_runtime::Error::RuntimeState`] if the prepared
1224    /// derivative cache lock is poisoned.
1225    pub fn prepared_derivative_cache_limits(&self) -> Result<AdTransformCacheLimits> {
1226        Ok(self.lock_prepared_derivative_cache()?.limits())
1227    }
1228
1229    /// Replace prepared derivative cache retention limits.
1230    ///
1231    /// # Examples
1232    ///
1233    /// ```rust
1234    /// use std::num::NonZeroUsize;
1235    /// use tenferro_ad::{AdTransformCacheLimits, EagerRuntime};
1236    /// use tenferro_cpu::CpuBackend;
1237    ///
1238    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1239    /// let limits = AdTransformCacheLimits::new(NonZeroUsize::new(1).unwrap());
1240    /// ctx.set_prepared_derivative_cache_limits(limits)?;
1241    /// assert_eq!(ctx.prepared_derivative_cache_limits()?, limits);
1242    /// # Ok::<(), tenferro_ad::Error>(())
1243    /// ```
1244    ///
1245    /// # Errors
1246    ///
1247    /// Returns [`tenferro_runtime::Error::RuntimeState`] if the prepared
1248    /// derivative cache lock is poisoned.
1249    pub fn set_prepared_derivative_cache_limits(
1250        &self,
1251        limits: AdTransformCacheLimits,
1252    ) -> Result<()> {
1253        self.lock_prepared_derivative_cache()?.set_limits(limits);
1254        Ok(())
1255    }
1256
1257    /// Return the extension cache retention limits.
1258    ///
1259    /// # Errors
1260    ///
1261    /// Returns [`tenferro_runtime::Error::RuntimeState`] if the extension
1262    /// cache lock is poisoned.
1263    pub fn extension_cache_limits(&self) -> Result<ExtensionCacheLimits> {
1264        Ok(self.lock_extension_caches()?.limits())
1265    }
1266
1267    /// Replace extension cache retention limits.
1268    ///
1269    /// # Errors
1270    ///
1271    /// Returns [`tenferro_runtime::Error::RuntimeState`] if the extension
1272    /// cache lock is poisoned.
1273    pub fn set_extension_cache_limits(&self, limits: ExtensionCacheLimits) -> Result<()> {
1274        self.lock_extension_caches()?.set_limits(limits);
1275        Ok(())
1276    }
1277
1278    /// Enter one backend execution session and run provider-neutral operations.
1279    ///
1280    /// The callback receives only a lifetime-bound, non-owning backend session.
1281    /// The backend and its engine registration are fixed when the eager runtime
1282    /// is constructed. Extension modules are installed separately and remain
1283    /// available to later extension operations.
1284    ///
1285    /// # Examples
1286    ///
1287    /// ```
1288    /// use tenferro_ad::EagerRuntime;
1289    /// use tenferro_cpu::CpuBackend;
1290    /// use tenferro_tensor::{Tensor, TensorElementwise};
1291    ///
1292    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1293    /// let lhs = Tensor::from_vec_col_major(vec![1], vec![1.0_f64])?;
1294    /// let rhs = Tensor::from_vec_col_major(vec![1], vec![2.0_f64])?;
1295    /// let output = ctx.with_execution_session(|session| {
1296    ///     TensorElementwise::add(session, &lhs, &rhs)
1297    /// })??;
1298    /// assert_eq!(output.as_slice::<f64>()?, &[3.0]);
1299    /// # Ok::<(), tenferro_ad::Error>(())
1300    /// ```
1301    ///
1302    /// # Errors
1303    ///
1304    /// Returns [`tenferro_runtime::Error::RuntimeState`] if the eager backend
1305    /// lock is poisoned. Backend operations retain their typed tensor/backend
1306    /// errors inside the callback result.
1307    pub fn with_execution_session<R: Send>(
1308        &self,
1309        f: impl FnOnce(&mut dyn BackendSession) -> R + Send,
1310    ) -> Result<R> {
1311        let mut backend = self.lock_backend()?;
1312        Ok(backend.with_backend_session(f))
1313    }
1314
1315    // Lock ordering: the eager backend owner is locked first; the
1316    // extension-cache lock is acquired only after it and remains held through
1317    // the borrowed session callback.
1318    /// Run an extension-owned eager operation with a borrowed backend session
1319    /// and the eager runtime's extension cache store.
1320    ///
1321    /// The eager backend owner is locked before the extension-cache lock is
1322    /// acquired. The callback receives an
1323    /// [`tenferro_runtime::ExtensionExecutionContext`] so cache access and
1324    /// backend execution share one lifetime-bound context without exposing the
1325    /// owning eager backend. The backend and its engine registration remain
1326    /// fixed for the eager runtime's lifetime.
1327    ///
1328    /// # Examples
1329    ///
1330    /// ```
1331    /// use tenferro_ad::EagerRuntime;
1332    /// use tenferro_cpu::CpuBackend;
1333    /// use tenferro_tensor::{Tensor, TensorElementwise};
1334    ///
1335    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1336    /// let lhs = Tensor::from_vec_col_major(vec![1], vec![1.0_f64])?;
1337    /// let rhs = Tensor::from_vec_col_major(vec![1], vec![2.0_f64])?;
1338    /// let output = ctx.with_extension_execution_context(|extension_ctx| {
1339    ///     TensorElementwise::add(extension_ctx.backend_mut(), &lhs, &rhs)
1340    /// })??;
1341    /// assert_eq!(output.as_slice::<f64>()?, &[3.0]);
1342    /// # Ok::<(), tenferro_ad::Error>(())
1343    /// ```
1344    ///
1345    /// # Errors
1346    ///
1347    /// Returns [`tenferro_runtime::Error::RuntimeState`] if the eager backend
1348    /// or extension-cache lock is poisoned. Errors returned by the callback
1349    /// remain in its result value.
1350    pub fn with_extension_execution_context<R: Send>(
1351        &self,
1352        f: impl FnOnce(
1353                &mut tenferro_runtime::ExtensionExecutionContext<'_, dyn BackendSession + '_>,
1354            ) -> R
1355            + Send,
1356    ) -> Result<R> {
1357        let mut backend = self.lock_backend()?;
1358        let mut extension_cache_guard = self.lock_extension_caches()?;
1359        let extension_caches: &mut ExtensionCacheStore = &mut extension_cache_guard;
1360        Ok(backend.with_backend_session(move |session| {
1361            let mut extension_ctx =
1362                tenferro_runtime::ExtensionExecutionContext::new(session, extension_caches);
1363            f(&mut extension_ctx)
1364        }))
1365    }
1366
1367    pub(crate) fn materialize_value(&self, value: &TensorValue) -> Result<Tensor> {
1368        if let Some(tensor) = value.as_tensor_arc() {
1369            return Ok(tensor.as_ref().clone());
1370        }
1371
1372        let mut backend = self.lock_backend()?;
1373        backend
1374            .with_backend_session(|exec| exec.to_contiguous_read(value.tensor_read()))
1375            .map_err(Error::from)
1376    }
1377
1378    /// Block the current thread until backend work submitted by this eager runtime completes.
1379    ///
1380    /// CPU runtimes return immediately. CUDA and WebGPU runtimes synchronize
1381    /// their current backend work queue.
1382    ///
1383    /// # Examples
1384    ///
1385    /// ```
1386    /// use tenferro_cpu::CpuBackend;
1387    /// use tenferro_ad::EagerRuntime;
1388    ///
1389    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1390    /// ctx.synchronize().unwrap();
1391    /// # Ok::<(), tenferro_ad::Error>(())
1392    /// ```
1393    ///
1394    /// # Errors
1395    ///
1396    /// Returns [`tenferro_runtime::Error::RuntimeState`] if the backend lock is
1397    /// poisoned, or a typed tensor backend error if synchronization fails.
1398    pub fn synchronize(&self) -> Result<()> {
1399        self.lock_backend()?.synchronize().map_err(Error::from)
1400    }
1401
1402    fn exec_outputs_with_runtime<R>(
1403        &self,
1404        lock_backend_section: &'static str,
1405        exec_section: &'static str,
1406        op: &StdTensorOp,
1407        execute: impl FnOnce(&mut EagerBackend, Option<&Runtime>) -> Result<R>,
1408    ) -> Result<R> {
1409        // Lock ordering: eager execution holds the backend lock while standard
1410        // ops run without runtime extension access; extension ops receive the
1411        // runtime so extension cache locks are acquired only from that path.
1412        let mut backend = profile_eager_op_section(lock_backend_section, || self.lock_backend())?;
1413        let runtime = matches!(op, StdTensorOp::Extension(_)).then_some(&self.runtime);
1414        profile_eager_op_section(exec_section, || execute(&mut backend, runtime))
1415    }
1416
1417    pub(crate) fn exec_outputs(&self, op: &StdTensorOp, inputs: &[&Tensor]) -> Result<Vec<Tensor>> {
1418        self.exec_outputs_with_runtime(
1419            "exec_outputs.lock_backend",
1420            "exec_outputs.exec_op",
1421            op,
1422            |backend, runtime| exec_op_on_tensors_with_runtime(op, inputs, backend, runtime),
1423        )
1424    }
1425
1426    pub(crate) fn exec_outputs_read(
1427        &self,
1428        op: &StdTensorOp,
1429        inputs: &[TensorRead<'_>],
1430    ) -> Result<Vec<Tensor>> {
1431        self.exec_outputs_with_runtime(
1432            "exec_outputs_read.lock_backend",
1433            "exec_outputs_read.exec_op",
1434            op,
1435            |backend, runtime| exec_op_on_tensor_reads_with_runtime(op, inputs, backend, runtime),
1436        )
1437    }
1438
1439    #[cfg(test)]
1440    pub(crate) fn exec_standard_graph_outputs(
1441        &self,
1442        graph: &Graph<StdTensorOp>,
1443        initial_data: &HashMap<ValueKey<StdTensorOp>, Arc<Tensor>>,
1444    ) -> Result<EagerGraphExecution> {
1445        let mut backend =
1446            profile_eager_op_section("exec_graph.lock_backend", || self.lock_backend())?;
1447        let mut all_values = initial_data.clone();
1448
1449        profile_eager_op_section("exec_graph.with_backend_session", || {
1450            backend.with_backend_session(|exec| -> Result<()> {
1451                for op_node in graph.operations() {
1452                    let outputs = {
1453                        let input_values = op_node
1454                            .inputs
1455                            .iter()
1456                            .map(|input| {
1457                                let key = match input {
1458                                    ValueRef::Local(local_id) => &graph.values()[*local_id].key,
1459                                    ValueRef::External(key) => key,
1460                                };
1461                                all_values.get(key).cloned().ok_or_else(|| {
1462                                    Error::Internal(format!(
1463                                        "standard graph eager execution missing value for {key:?}"
1464                                    ))
1465                                })
1466                            })
1467                            .collect::<Result<Vec<_>>>()?;
1468                        let input_reads = input_values
1469                            .iter()
1470                            .map(|value| TensorRead::from_tensor(value.as_ref()))
1471                            .collect::<Vec<_>>();
1472                        exec_standard_op_on_tensor_reads_in_session(
1473                            &op_node.operation,
1474                            &input_reads,
1475                            exec,
1476                        )?
1477                    };
1478
1479                    if outputs.len() != op_node.outputs.len() {
1480                        return Err(Error::Internal(format!(
1481                            "standard graph eager execution expected {} outputs for {:?}, got {}",
1482                            op_node.outputs.len(),
1483                            op_node.operation,
1484                            outputs.len()
1485                        )));
1486                    }
1487
1488                    for (output_id, output) in op_node.outputs.iter().zip(outputs) {
1489                        let key = graph.values()[*output_id].key.clone();
1490                        all_values.insert(key, Arc::new(output));
1491                    }
1492                }
1493                Ok(())
1494            })
1495        })?;
1496
1497        let outputs = graph
1498            .outputs()
1499            .iter()
1500            .map(|&output_id| {
1501                let key = &graph.values()[output_id].key;
1502                all_values.get(key).cloned().ok_or_else(|| {
1503                    Error::Internal(format!(
1504                        "standard graph eager execution missing graph output {key:?}"
1505                    ))
1506                })
1507            })
1508            .collect::<Result<Vec<_>>>()?;
1509
1510        Ok(EagerGraphExecution { outputs })
1511    }
1512
1513    pub(crate) fn try_register_grad_slot(
1514        &self,
1515        key: &ValueKey<StdTensorOp>,
1516        slot: &GradSlot,
1517    ) -> Result<()> {
1518        self.lock_grad_slots()?
1519            .insert(key.clone(), Arc::downgrade(slot));
1520        Ok(())
1521    }
1522
1523    pub(crate) fn try_register_value_record(
1524        &self,
1525        key: &ValueKey<StdTensorOp>,
1526        record: &Arc<EagerTensorRecord>,
1527    ) -> Result<()> {
1528        self.lock_value_records()?
1529            .insert(key.clone(), Arc::downgrade(record));
1530        self.try_register_value_record_ptr(record)?;
1531        Ok(())
1532    }
1533
1534    pub(crate) fn try_register_value_record_ptr(
1535        &self,
1536        record: &Arc<EagerTensorRecord>,
1537    ) -> Result<()> {
1538        let tensor = match record.value.as_tensor_arc() {
1539            Some(tensor) => Some(Arc::clone(tensor)),
1540            None => record.materialized_cache.get().cloned(),
1541        };
1542        let Some(tensor) = tensor else {
1543            return Ok(());
1544        };
1545        self.lock_value_ptr_records()?
1546            .insert(tensor_ptr(&tensor), Arc::downgrade(record));
1547        Ok(())
1548    }
1549
1550    pub(crate) fn value_record(
1551        &self,
1552        key: &ValueKey<StdTensorOp>,
1553    ) -> Result<Option<Arc<EagerTensorRecord>>> {
1554        let mut records = self.lock_value_records()?;
1555        let Some(record) = records.get(key).cloned() else {
1556            return Ok(None);
1557        };
1558        match record.upgrade() {
1559            Some(record) => Ok(Some(record)),
1560            None => {
1561                records.remove(key);
1562                Ok(None)
1563            }
1564        }
1565    }
1566
1567    /// Clear all live gradient slots tracked by this context.
1568    ///
1569    /// This resets the stored gradients to `None` without unregistering the
1570    /// tensors, so future `backward()` calls can accumulate again.
1571    ///
1572    /// # Examples
1573    ///
1574    /// ```
1575    /// use tenferro_cpu::CpuBackend;
1576    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
1577    ///
1578    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1579    /// let x = EagerTensor::requires_grad_in(Tensor::from_vec_col_major(vec![3], vec![1.0_f64, 2.0, 3.0]).unwrap(), ctx.clone()).unwrap();
1580    /// let y = EagerTensor::requires_grad_in(Tensor::from_vec_col_major(vec![3], vec![4.0_f64, 5.0, 6.0]).unwrap(), ctx.clone()).unwrap();
1581    /// let loss = x.mul(&y).unwrap().reduce_sum(Some(&[0])).unwrap();
1582    /// let _ = loss.backward().unwrap();
1583    ///
1584    /// ctx.clear_grads()?;
1585    ///
1586    /// assert!(x.grad()?.is_none());
1587    /// assert!(y.grad()?.is_none());
1588    /// # Ok::<(), tenferro_ad::Error>(())
1589    /// ```
1590    ///
1591    /// # Errors
1592    ///
1593    /// Returns [`tenferro_runtime::Error::RuntimeState`] if a gradient-slot
1594    /// lock is poisoned while clearing live gradients.
1595    pub fn clear_grads(&self) -> Result<()> {
1596        let live_slots = {
1597            let mut live_slots = Vec::new();
1598            self.lock_grad_slots()?.retain(|_, slot| {
1599                if let Some(slot) = slot.upgrade() {
1600                    live_slots.push(slot);
1601                    true
1602                } else {
1603                    false
1604                }
1605            });
1606            live_slots
1607        };
1608
1609        let mut poisoned_slot = false;
1610        for slot in live_slots {
1611            match slot.lock() {
1612                Ok(mut current) => {
1613                    *current = None;
1614                }
1615                Err(_) => {
1616                    poisoned_slot = true;
1617                }
1618            }
1619        }
1620        if poisoned_slot {
1621            return Err(Error::runtime_state(
1622                "eager_gradient_slot",
1623                ErrorPhase::Execution,
1624                "lock poisoned",
1625            ));
1626        }
1627        Ok(())
1628    }
1629
1630    /// Import a concrete tensor into this context as an untracked constant.
1631    ///
1632    /// The returned tensor does not participate in gradient tracking.
1633    /// Use this for fixed masks, quadrature weights, physical constants,
1634    /// and other data that should not receive gradients.
1635    ///
1636    /// # Examples
1637    ///
1638    /// ```
1639    /// use tenferro_cpu::CpuBackend;
1640    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
1641    ///
1642    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1643    /// let c = ctx.constant_from(Tensor::from_vec_col_major(vec![2], vec![1.0_f64, 2.0]).unwrap())?;
1644    /// let x = EagerTensor::requires_grad_in(Tensor::from_vec_col_major(vec![2], vec![3.0_f64, 4.0]).unwrap(), ctx)?;
1645    /// let z = x.add(&c).unwrap();
1646    ///
1647    /// assert_eq!(z.materialized()?.as_slice::<f64>().unwrap(), &[4.0, 6.0]);
1648    /// # Ok::<(), tenferro_ad::Error>(())
1649    /// ```
1650    ///
1651    /// # Errors
1652    ///
1653    /// Returns [`tenferro_runtime::Error::RuntimeState`] when metadata cannot
1654    /// be registered or the backend lock is poisoned.
1655    pub fn constant_from(self: &Arc<Self>, tensor: Tensor) -> Result<EagerTensor> {
1656        EagerTensor::new_leaf(Arc::clone(self), tensor, false)
1657    }
1658
1659    /// Import a concrete tensor into this context as a trainable variable.
1660    ///
1661    /// The returned tensor participates in gradient tracking; its gradient
1662    /// slot is registered in this context.
1663    ///
1664    /// # Examples
1665    ///
1666    /// ```
1667    /// use tenferro_cpu::CpuBackend;
1668    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
1669    ///
1670    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1671    /// let p = ctx.variable_from(Tensor::from_vec_col_major(vec![2], vec![1.0_f64, 2.0]).unwrap())?;
1672    /// let loss = p.exp().unwrap().reduce_sum(Some(&[0])).unwrap();
1673    /// let _ = loss.backward().unwrap();
1674    ///
1675    /// let grad = p.grad().unwrap().unwrap();
1676    /// assert_eq!(grad.shape(), &[2]);
1677    /// # Ok::<(), tenferro_ad::Error>(())
1678    /// ```
1679    ///
1680    /// # Errors
1681    ///
1682    /// Returns [`tenferro_runtime::Error::RuntimeState`] when gradient metadata
1683    /// or the eager backend state cannot be registered.
1684    pub fn variable_from(self: &Arc<Self>, tensor: Tensor) -> Result<EagerTensor> {
1685        EagerTensor::new_leaf(Arc::clone(self), tensor, true)
1686    }
1687
1688    /// Gradient of a scalar eager output with respect to an eager tensor.
1689    ///
1690    /// Functional eager gradients return ordinary eager tensors and do not
1691    /// write into `grad()` slots. The returned tensor keeps a trace when the
1692    /// derivative computation depends on tracked eager values.
1693    ///
1694    /// # Examples
1695    ///
1696    /// ```
1697    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
1698    /// use tenferro_cpu::CpuBackend;
1699    ///
1700    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1701    /// let x = EagerTensor::requires_grad_in(
1702    ///     Tensor::from_vec_col_major(vec![], vec![3.0_f64]).unwrap(),
1703    ///     ctx.clone(),
1704    /// )?;
1705    /// let loss = x.mul(&x)?;
1706    /// let dx = ctx.grad(&loss, &x)?;
1707    /// assert_eq!(dx.materialized()?.as_slice::<f64>().unwrap(), &[6.0]);
1708    /// # Ok::<(), tenferro_ad::Error>(())
1709    /// ```
1710    ///
1711    /// # Errors
1712    ///
1713    /// Returns [`tenferro_runtime::Error::NonScalarGrad`] for a non-scalar
1714    /// output, [`Error::ContextMismatch`] for tensors from another runtime,
1715    /// [`Error::UnsupportedAdRule`] when an AD rule is unavailable, or a typed
1716    /// validation/backend error from eager execution.
1717    pub fn grad(self: &Arc<Self>, output: &EagerTensor, wrt: &EagerTensor) -> Result<EagerTensor> {
1718        self.grad_optional(output, wrt)?
1719            .ok_or_else(|| Error::Internal(format!("grad output is inactive for {:?}", wrt.key)))
1720    }
1721
1722    /// Gradient that returns `None` when `wrt` is inactive.
1723    ///
1724    /// # Examples
1725    ///
1726    /// ```
1727    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
1728    /// use tenferro_cpu::CpuBackend;
1729    ///
1730    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1731    /// let x = EagerTensor::requires_grad_in(
1732    ///     Tensor::from_vec_col_major(vec![], vec![3.0_f64]).unwrap(),
1733    ///     ctx.clone(),
1734    /// )?;
1735    /// let y = EagerTensor::requires_grad_in(
1736    ///     Tensor::from_vec_col_major(vec![], vec![4.0_f64]).unwrap(),
1737    ///     ctx.clone(),
1738    /// )?;
1739    /// let loss = y.mul(&y)?;
1740    /// assert!(ctx.grad_optional(&loss, &x)?.is_none());
1741    /// # Ok::<(), tenferro_ad::Error>(())
1742    /// ```
1743    ///
1744    /// # Errors
1745    ///
1746    /// Returns [`tenferro_runtime::Error::NonScalarGrad`] for a non-scalar
1747    /// output, [`Error::ContextMismatch`] for a foreign runtime, or a typed
1748    /// validation/backend/runtime-state error from eager execution.
1749    pub fn grad_optional(
1750        self: &Arc<Self>,
1751        output: &EagerTensor,
1752        wrt: &EagerTensor,
1753    ) -> Result<Option<EagerTensor>> {
1754        if !output.shape().is_empty() {
1755            return Err(Error::NonScalarGrad {
1756                shape: output.shape().to_vec(),
1757            });
1758        }
1759
1760        let value = output.materialized_arc()?;
1761        let seed = {
1762            let mut backend = self.lock_backend()?;
1763            one_like_tensor(value.as_ref(), &mut *backend)?
1764        };
1765        let seed = EagerTensor::new_result_arc(
1766            Arc::clone(self),
1767            eager_val_key(),
1768            Arc::new(seed),
1769            false,
1770            None,
1771            Vec::new(),
1772        )?;
1773        self.vjp_optional(output, wrt, &seed)
1774    }
1775
1776    /// Reverse-mode vector-Jacobian product for eager tensors.
1777    ///
1778    /// # Examples
1779    ///
1780    /// ```
1781    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
1782    /// use tenferro_cpu::CpuBackend;
1783    ///
1784    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1785    /// let x = EagerTensor::requires_grad_in(
1786    ///     Tensor::from_vec_col_major(vec![2], vec![2.0_f64, 3.0]).unwrap(),
1787    ///     ctx.clone(),
1788    /// )?;
1789    /// let y = x.mul(&x)?;
1790    /// let seed = EagerTensor::from_tensor_in(
1791    ///     Tensor::from_vec_col_major(vec![2], vec![1.0_f64, 1.0]).unwrap(),
1792    ///     ctx.clone(),
1793    /// )?;
1794    /// let dx = ctx.vjp(&y, &x, &seed)?;
1795    /// assert_eq!(dx.materialized()?.as_slice::<f64>().unwrap(), &[4.0, 6.0]);
1796    /// # Ok::<(), tenferro_ad::Error>(())
1797    /// ```
1798    ///
1799    /// # Errors
1800    ///
1801    /// Returns [`Error::ContextMismatch`] for tensors from different eager
1802    /// runtimes, [`Error::Validation`] when the cotangent shape or dtype does
1803    /// not match the output, [`Error::UnsupportedAdRule`] when a rule is not
1804    /// registered, or a typed backend/runtime-state error.
1805    pub fn vjp(
1806        self: &Arc<Self>,
1807        output: &EagerTensor,
1808        wrt: &EagerTensor,
1809        cotangent: &EagerTensor,
1810    ) -> Result<EagerTensor> {
1811        self.vjp_optional(output, wrt, cotangent)?
1812            .ok_or_else(|| Error::Internal(format!("vjp output is inactive for {:?}", wrt.key)))
1813    }
1814
1815    /// Reverse-mode vector-Jacobian product that returns `None` for inactive inputs.
1816    ///
1817    /// # Examples
1818    ///
1819    /// ```
1820    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
1821    /// use tenferro_cpu::CpuBackend;
1822    ///
1823    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1824    /// let x = EagerTensor::requires_grad_in(
1825    ///     Tensor::from_vec_col_major(vec![1], vec![2.0_f64]).unwrap(),
1826    ///     ctx.clone(),
1827    /// )?;
1828    /// let y = EagerTensor::requires_grad_in(
1829    ///     Tensor::from_vec_col_major(vec![1], vec![4.0_f64]).unwrap(),
1830    ///     ctx.clone(),
1831    /// )?;
1832    /// let seed = EagerTensor::from_tensor_in(
1833    ///     Tensor::from_vec_col_major(vec![1], vec![1.0_f64]).unwrap(),
1834    ///     ctx.clone(),
1835    /// )?;
1836    /// let loss = y.mul(&y)?;
1837    /// assert!(ctx.vjp_optional(&loss, &x, &seed)?.is_none());
1838    /// # Ok::<(), tenferro_ad::Error>(())
1839    /// ```
1840    ///
1841    /// # Errors
1842    ///
1843    /// Returns [`Error::ContextMismatch`] for tensors from different eager
1844    /// runtimes, [`Error::Validation`] when the cotangent shape or dtype does
1845    /// not match the output, [`Error::UnsupportedAdRule`] when a rule is not
1846    /// registered, or a typed backend/runtime-state error.
1847    pub fn vjp_optional(
1848        self: &Arc<Self>,
1849        output: &EagerTensor,
1850        wrt: &EagerTensor,
1851        cotangent: &EagerTensor,
1852    ) -> Result<Option<EagerTensor>> {
1853        validate_same_runtime(self, output, "vjp output")?;
1854        validate_same_runtime(self, wrt, "vjp wrt")?;
1855        validate_same_runtime(self, cotangent, "vjp cotangent")?;
1856        validate_seed_tensor("vjp", output, cotangent)?;
1857        // Unification 7: semantic path is the only VJP path.
1858        match semantic_eager_vjp_optional(self, output, wrt, cotangent)? {
1859            Some(result) => Ok(result),
1860            None => Ok(None),
1861        }
1862    }
1863
1864    /// Forward-mode Jacobian-vector product for eager tensors.
1865    ///
1866    /// # Examples
1867    ///
1868    /// ```
1869    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
1870    /// use tenferro_cpu::CpuBackend;
1871    ///
1872    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1873    /// let x = EagerTensor::requires_grad_in(
1874    ///     Tensor::from_vec_col_major(vec![1], vec![3.0_f64]).unwrap(),
1875    ///     ctx.clone(),
1876    /// )?;
1877    /// let tangent = EagerTensor::from_tensor_in(
1878    ///     Tensor::from_vec_col_major(vec![1], vec![1.0_f64]).unwrap(),
1879    ///     ctx.clone(),
1880    /// )?;
1881    /// let y = x.mul(&x)?;
1882    /// let dy = ctx.jvp(&y, &x, &tangent)?;
1883    /// assert_eq!(dy.materialized()?.as_slice::<f64>().unwrap(), &[6.0]);
1884    /// # Ok::<(), tenferro_ad::Error>(())
1885    /// ```
1886    ///
1887    /// # Errors
1888    ///
1889    /// Returns [`Error::ContextMismatch`] for tensors from different eager
1890    /// runtimes, [`Error::Validation`] when the tangent shape or dtype does not
1891    /// match `wrt`, [`Error::UnsupportedAdRule`] when a rule is unavailable, or
1892    /// a typed backend/runtime-state error.
1893    pub fn jvp(
1894        self: &Arc<Self>,
1895        output: &EagerTensor,
1896        wrt: &EagerTensor,
1897        tangent: &EagerTensor,
1898    ) -> Result<EagerTensor> {
1899        self.jvp_optional(output, wrt, tangent)?
1900            .ok_or_else(|| Error::Internal(format!("jvp output is inactive for {:?}", wrt.key)))
1901    }
1902
1903    /// Forward-mode Jacobian-vector product that returns `None` for inactive outputs.
1904    ///
1905    /// # Examples
1906    ///
1907    /// ```
1908    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
1909    /// use tenferro_cpu::CpuBackend;
1910    ///
1911    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
1912    /// let x = EagerTensor::requires_grad_in(
1913    ///     Tensor::from_vec_col_major(vec![1], vec![2.0_f64]).unwrap(),
1914    ///     ctx.clone(),
1915    /// )?;
1916    /// let y = EagerTensor::requires_grad_in(
1917    ///     Tensor::from_vec_col_major(vec![1], vec![4.0_f64]).unwrap(),
1918    ///     ctx.clone(),
1919    /// )?;
1920    /// let tangent = EagerTensor::from_tensor_in(
1921    ///     Tensor::from_vec_col_major(vec![1], vec![1.0_f64]).unwrap(),
1922    ///     ctx.clone(),
1923    /// )?;
1924    /// let loss = y.mul(&y)?;
1925    /// assert!(ctx.jvp_optional(&loss, &x, &tangent)?.is_none());
1926    /// # Ok::<(), tenferro_ad::Error>(())
1927    /// ```
1928    ///
1929    /// # Errors
1930    ///
1931    /// Returns [`Error::ContextMismatch`] for tensors from different eager
1932    /// runtimes, [`Error::Validation`] when the tangent shape or dtype does not
1933    /// match `wrt`, [`Error::UnsupportedAdRule`] when a rule is unavailable, or
1934    /// a typed backend/runtime-state error.
1935    pub fn jvp_optional(
1936        self: &Arc<Self>,
1937        output: &EagerTensor,
1938        wrt: &EagerTensor,
1939        tangent: &EagerTensor,
1940    ) -> Result<Option<EagerTensor>> {
1941        validate_same_runtime(self, output, "jvp output")?;
1942        validate_same_runtime(self, wrt, "jvp wrt")?;
1943        validate_same_runtime(self, tangent, "jvp tangent")?;
1944        validate_seed_tensor("jvp", wrt, tangent)?;
1945        // Unification 7: semantic path is the only JVP path.
1946        match semantic_eager_jvp_optional(self, output, wrt, tangent)? {
1947            Some(result) => Ok(result),
1948            None => Ok(None),
1949        }
1950    }
1951
1952    fn store_grads(
1953        &self,
1954        cotangents: &HashMap<ValueKey<StdTensorOp>, Arc<Tensor>>,
1955        backend: &mut EagerBackend,
1956    ) -> Result<()> {
1957        let mut updates = Vec::new();
1958
1959        {
1960            let mut slots = self.lock_grad_slots()?;
1961            slots.retain(|key, slot| {
1962                let Some(slot) = slot.upgrade() else {
1963                    return false;
1964                };
1965
1966                if let Some(incoming) = cotangents.get(key) {
1967                    updates.push((slot, Arc::clone(incoming)));
1968                }
1969
1970                true
1971            });
1972        }
1973
1974        for (slot, incoming) in updates {
1975            let mut current = slot.lock().map_err(|_| {
1976                Error::runtime_state(
1977                    "eager_gradient_slot",
1978                    ErrorPhase::Execution,
1979                    "lock poisoned",
1980                )
1981            })?;
1982            let next = match current.as_ref() {
1983                Some(existing) => Arc::new(backend.add(existing.as_ref(), incoming.as_ref())?),
1984                None => incoming,
1985            };
1986            *current = Some(next);
1987        }
1988
1989        Ok(())
1990    }
1991}
1992
1993#[derive(Clone, Debug, PartialEq, Eq, Hash)]
1994struct PreparedDerivativeCacheKey {
1995    semantic_fingerprint: SemanticFingerprint,
1996    runtime_epoch: RuntimeEpoch,
1997    wrt_input_index: usize,
1998    input_metadata: Box<[ProgramValueMetadata]>,
1999}
2000
2001/// Cached prepared derivative: program + index metadata.
2002#[derive(Debug)]
2003struct PreparedDerivative {
2004    program: Arc<CompiledGraph>,
2005    prepared: Arc<PreparedCompiledGraph>,
2006    seed_input_index: usize,
2007    derivative_output_index: usize,
2008}
2009
2010#[derive(Debug)]
2011struct PreparedDerivativeCache {
2012    limits: AdTransformCacheLimits,
2013    entries: LruCache<PreparedDerivativeCacheKey, PreparedDerivativeCacheEntry>,
2014    stats: CacheStats,
2015}
2016
2017impl PreparedDerivativeCache {
2018    fn limits(&self) -> AdTransformCacheLimits {
2019        self.limits
2020    }
2021
2022    fn set_limits(&mut self, limits: AdTransformCacheLimits) {
2023        self.limits = limits;
2024        self.evict_to_limits();
2025    }
2026
2027    fn clear(&mut self) {
2028        let clears = self.stats.clears.saturating_add(1);
2029        self.entries.clear();
2030        self.stats = CacheStats {
2031            clears,
2032            ..CacheStats::empty()
2033        };
2034    }
2035
2036    fn stats(&self) -> CacheStats {
2037        self.stats
2038    }
2039
2040    fn get(&mut self, key: &PreparedDerivativeCacheKey) -> Option<Arc<PreparedDerivative>> {
2041        match self.entries.get(key) {
2042            Some(entry) => {
2043                self.stats.hits = self.stats.hits.saturating_add(1);
2044                Some(Arc::clone(&entry.value))
2045            }
2046            None => {
2047                self.stats.misses = self.stats.misses.saturating_add(1);
2048                None
2049            }
2050        }
2051    }
2052
2053    fn insert(&mut self, key: PreparedDerivativeCacheKey, value: Arc<PreparedDerivative>) {
2054        let retained_bytes = prepared_derivative_cache_entry_retained_bytes(&key, value.as_ref());
2055        let entry = PreparedDerivativeCacheEntry {
2056            value,
2057            retained_bytes,
2058        };
2059        self.stats.retained_bytes = self.stats.retained_bytes.saturating_add(retained_bytes);
2060        if let Some((_old_key, old_entry)) = self.entries.push(key, entry) {
2061            self.stats.retained_bytes = self
2062                .stats
2063                .retained_bytes
2064                .saturating_sub(old_entry.retained_bytes);
2065        }
2066        self.stats.entries = self.entries.len();
2067        self.evict_to_limits();
2068    }
2069
2070    fn evict_to_limits(&mut self) {
2071        while self.entries.len() > self.limits.max_entries().get()
2072            || self
2073                .limits
2074                .max_retained_bytes()
2075                .is_some_and(|limit| self.stats.retained_bytes > limit.get())
2076        {
2077            let Some((_key, entry)) = self.entries.pop_lru() else {
2078                break;
2079            };
2080            self.stats.retained_bytes = self
2081                .stats
2082                .retained_bytes
2083                .saturating_sub(entry.retained_bytes);
2084            self.stats.evictions = self.stats.evictions.saturating_add(1);
2085        }
2086        self.stats.entries = self.entries.len();
2087    }
2088}
2089
2090impl Default for PreparedDerivativeCache {
2091    fn default() -> Self {
2092        Self {
2093            limits: AdTransformCacheLimits::default(),
2094            entries: LruCache::unbounded(),
2095            stats: CacheStats::empty(),
2096        }
2097    }
2098}
2099
2100#[derive(Debug)]
2101struct PreparedDerivativeCacheEntry {
2102    value: Arc<PreparedDerivative>,
2103    retained_bytes: usize,
2104}
2105
2106fn prepared_derivative_cache_entry_retained_bytes(
2107    key: &PreparedDerivativeCacheKey,
2108    value: &PreparedDerivative,
2109) -> usize {
2110    size_of::<PreparedDerivativeCacheKey>()
2111        .saturating_add(
2112            key.input_metadata
2113                .len()
2114                .saturating_mul(size_of::<ProgramValueMetadata>()),
2115        )
2116        .saturating_add(size_of::<PreparedDerivative>())
2117        .saturating_add(compiled_graph_retained_bytes(value.program.as_ref()))
2118        .saturating_add(prepared_compiled_graph_retained_bytes(
2119            value.prepared.as_ref(),
2120            value.program.as_ref(),
2121        ))
2122}
2123
2124fn prepared_compiled_graph_retained_bytes(
2125    prepared: &PreparedCompiledGraph,
2126    derivative_program: &CompiledGraph,
2127) -> usize {
2128    size_of_val(prepared).saturating_add(compiled_graph_retained_bytes(derivative_program))
2129}
2130
2131fn compiled_graph_retained_bytes(program: &CompiledGraph) -> usize {
2132    size_of::<CompiledGraph>()
2133        .saturating_add(size_of_val(program.input_keys()))
2134        .saturating_add(program.bindings().len().saturating_mul(size_of::<usize>()))
2135        .saturating_add(semantic_program_retained_bytes(program.program()))
2136}
2137
2138fn semantic_program_retained_bytes(program: &SemanticProgram) -> usize {
2139    size_of::<SemanticProgram>()
2140        .saturating_add(size_of_val(program.inputs()))
2141        .saturating_add(size_of_val(program.outputs()))
2142        .saturating_add(
2143            program
2144                .operations()
2145                .len()
2146                .saturating_mul(size_of::<usize>()),
2147        )
2148        .saturating_add(
2149            program
2150                .shape_guards()
2151                .len()
2152                .saturating_mul(size_of::<usize>()),
2153        )
2154}
2155
2156fn semantic_eager_vjp_optional(
2157    ctx: &Arc<EagerRuntime>,
2158    output: &EagerTensor,
2159    wrt: &EagerTensor,
2160    cotangent: &EagerTensor,
2161) -> Result<Option<Option<EagerTensor>>> {
2162    if !eager_semantic_vjp_enabled() {
2163        return Ok(None);
2164    }
2165    let (Some(output_trace), Some(wrt_trace)) =
2166        (output.semantic_trace.as_ref(), wrt.semantic_trace.as_ref())
2167    else {
2168        return Ok(None);
2169    };
2170    let Some(wrt_key) = wrt_trace.input_key() else {
2171        return Ok(None);
2172    };
2173    if !output_trace.has_attached_input_key(&wrt_key) {
2174        return Ok(None);
2175    }
2176
2177    // First compile the trace to get bindings and wrt_input_index.
2178    // (The compile step is needed even for cache hits to extract tensor bindings.)
2179    let mut compiler = GraphCompiler::new();
2180    let source = compile_ad_source(&mut compiler, output_trace)?;
2181    if source.output_count() != 1
2182        || source.input_keys().len() != source.input_count()
2183        || source.bindings().len() != source.input_count()
2184    {
2185        return Ok(None);
2186    }
2187    let Some(wrt_input_index) = source.input_key_index(&wrt_key) else {
2188        return Ok(None);
2189    };
2190
2191    // S2: check prepared-derivative cache before AD transform + compile_frozen.
2192    let cache_key = PreparedDerivativeCacheKey {
2193        semantic_fingerprint: source.program().semantic_fingerprint(),
2194        runtime_epoch: ctx.runtime.epoch().map_err(|source| {
2195            Error::runtime_state_source("semantic_eager_vjp", ErrorPhase::Execution, source)
2196        })?,
2197        wrt_input_index,
2198        input_metadata: source.frozen_program().input_metadata_with_bound_shapes(),
2199    };
2200    let prepared = { ctx.lock_prepared_derivative_cache()?.get(&cache_key) };
2201    let (seed_input_index, derivative_output_index, derivative_program, prepared_runtime) =
2202        if let Some(prepared) = prepared {
2203            (
2204                prepared.seed_input_index,
2205                prepared.derivative_output_index,
2206                Arc::clone(&prepared.program),
2207                Some(Arc::clone(&prepared.prepared)),
2208            )
2209        } else {
2210            let mut active_inputs = vec![false; source.input_count()];
2211            if let Some(active) = active_inputs.get_mut(wrt_input_index) {
2212                *active = true;
2213            } else {
2214                return Ok(None);
2215            }
2216            let active_outputs = vec![true; source.output_count()];
2217            let ad = AdContext::with_rules_and_transform_cache(
2218                ctx.semantic_extension_rules.clone(),
2219                Arc::clone(&ctx.ad_transform_cache),
2220            );
2221            let derivative = ad
2222                .vjp_program(source.frozen_program(), &active_inputs, &active_outputs)
2223                .map_err(|source| {
2224                    Error::runtime_state_source(
2225                        "semantic_eager_vjp",
2226                        ErrorPhase::GraphBuild,
2227                        source,
2228                    )
2229                })?;
2230            let seed_input_index = derivative
2231                .derivative_input_indices()
2232                .first()
2233                .copied()
2234                .flatten();
2235            let derivative_output_index = derivative
2236                .derivative_output_indices()
2237                .get(wrt_input_index)
2238                .copied()
2239                .flatten();
2240            let (Some(seed_input_index), Some(derivative_output_index)) =
2241                (seed_input_index, derivative_output_index)
2242            else {
2243                return Ok(Some(None));
2244            };
2245            let program = Arc::new(compiler.compile_frozen_program(derivative.frozen())?);
2246            (seed_input_index, derivative_output_index, program, None)
2247        };
2248
2249    let cotangent_tensor = cotangent.materialized_arc()?;
2250    let input_count = derivative_program.input_count();
2251    let mut owned_inputs = vec![None; input_count];
2252    for (source_input_index, (_, tensor)) in source.bindings().iter().enumerate() {
2253        let Some(slot) = owned_inputs.get_mut(source_input_index) else {
2254            return Err(Error::Internal(format!(
2255                "semantic eager VJP derivative program has no primal input slot {source_input_index}"
2256            )));
2257        };
2258        *slot = Some(tensor.clone());
2259    }
2260    let Some(slot) = owned_inputs.get_mut(seed_input_index) else {
2261        return Err(Error::Internal(format!(
2262            "semantic eager VJP seed input index {seed_input_index} is outside {} inputs",
2263            owned_inputs.len()
2264        )));
2265    };
2266    *slot = Some(cotangent_tensor.as_ref().clone());
2267    let input_refs = owned_inputs
2268        .iter()
2269        .enumerate()
2270        .map(|(index, tensor)| {
2271            tensor.as_ref().ok_or_else(|| {
2272                Error::Internal(format!(
2273                    "semantic eager VJP derivative input {index} was not populated"
2274                ))
2275            })
2276        })
2277        .collect::<Result<Vec<_>>>()?;
2278    let prepared_runtime = if let Some(prepared_runtime) = prepared_runtime {
2279        prepared_runtime
2280    } else {
2281        let prepared_runtime = Arc::new(
2282            ctx.runtime
2283                .prepare_compiled(&derivative_program, &input_refs)?,
2284        );
2285        let entry = Arc::new(PreparedDerivative {
2286            program: Arc::clone(&derivative_program),
2287            prepared: Arc::clone(&prepared_runtime),
2288            seed_input_index,
2289            derivative_output_index,
2290        });
2291        ctx.lock_prepared_derivative_cache()?
2292            .insert(cache_key, entry);
2293        prepared_runtime
2294    };
2295    let outputs = ctx.runtime.run_prepared(&prepared_runtime, &input_refs)?;
2296    let Some(result) = outputs.get(derivative_output_index).cloned() else {
2297        return Err(Error::Internal(format!(
2298            "semantic eager VJP derivative output index {derivative_output_index} is outside {} outputs",
2299            outputs.len()
2300        )));
2301    };
2302    let cotangent_trace =
2303        TracedTensor::from_tensor_arc_symbolic_shape(Arc::clone(&cotangent_tensor))?;
2304    let semantic_trace = derivative_trace_from_frozen_program(
2305        &source,
2306        derivative_program.frozen_program(),
2307        derivative_output_index,
2308        &[(seed_input_index, Arc::clone(&cotangent_tensor))],
2309        &[output_trace, wrt_trace, &cotangent_trace],
2310        None,
2311        "semantic_eager_vjp",
2312    )?;
2313
2314    #[cfg(test)]
2315    EAGER_SEMANTIC_VJP_EXECUTIONS.fetch_add(1, Ordering::Relaxed);
2316
2317    Ok(Some(Some(EagerTensor::new_result_arc_with_semantic_trace(
2318        Arc::clone(ctx),
2319        eager_val_key(),
2320        Arc::new(result),
2321        true,
2322        None,
2323        Some(semantic_trace),
2324        Vec::new(),
2325    )?)))
2326}
2327
2328fn semantic_eager_jvp_optional(
2329    ctx: &Arc<EagerRuntime>,
2330    output: &EagerTensor,
2331    wrt: &EagerTensor,
2332    tangent: &EagerTensor,
2333) -> Result<Option<Option<EagerTensor>>> {
2334    if !eager_semantic_vjp_enabled() {
2335        return Ok(None);
2336    }
2337    let (Some(output_trace), Some(wrt_trace)) =
2338        (output.semantic_trace.as_ref(), wrt.semantic_trace.as_ref())
2339    else {
2340        return Ok(None);
2341    };
2342    let Some(wrt_key) = wrt_trace.input_key() else {
2343        return Ok(None);
2344    };
2345    if !output_trace.has_attached_input_key(&wrt_key) {
2346        return Ok(None);
2347    }
2348
2349    let mut compiler = GraphCompiler::new();
2350    let source = compile_ad_source(&mut compiler, output_trace)?;
2351    if source.output_count() != 1
2352        || source.input_keys().len() != source.input_count()
2353        || source.bindings().len() != source.input_count()
2354    {
2355        return Ok(None);
2356    }
2357    let Some(wrt_input_index) = source.input_key_index(&wrt_key) else {
2358        return Ok(None);
2359    };
2360
2361    let mut active_inputs = vec![false; source.input_count()];
2362    if let Some(active) = active_inputs.get_mut(wrt_input_index) {
2363        *active = true;
2364    } else {
2365        return Ok(None);
2366    }
2367    let ad = AdContext::with_rules_and_transform_cache(
2368        ctx.semantic_extension_rules.clone(),
2369        Arc::clone(&ctx.ad_transform_cache),
2370    );
2371    let derivative = ad
2372        .jvp_program(source.frozen_program(), &active_inputs)
2373        .map_err(|source| {
2374            Error::runtime_state_source("semantic_eager_jvp", ErrorPhase::GraphBuild, source)
2375        })?;
2376    // derivative_input_indices maps source input → derivative seed input.
2377    let Some(seed_input_index) = derivative
2378        .derivative_input_indices()
2379        .get(wrt_input_index)
2380        .copied()
2381        .flatten()
2382    else {
2383        return Ok(Some(None));
2384    };
2385    // derivative_output_indices maps source output → derivative output.
2386    // There is always exactly one source output (guarded above).
2387    let Some(derivative_output_index) = derivative
2388        .derivative_output_indices()
2389        .first()
2390        .copied()
2391        .flatten()
2392    else {
2393        return Ok(Some(None));
2394    };
2395
2396    let derivative_program = compiler.compile_frozen_program(derivative.frozen())?;
2397    let tangent_tensor = tangent.materialized_arc()?;
2398    let input_count = derivative_program.input_count();
2399    let mut owned_inputs = vec![None; input_count];
2400    for (source_input_index, (_, tensor)) in source.bindings().iter().enumerate() {
2401        let Some(slot) = owned_inputs.get_mut(source_input_index) else {
2402            return Err(Error::Internal(format!(
2403                "semantic eager JVP derivative program has no primal input slot {source_input_index}"
2404            )));
2405        };
2406        *slot = Some(tensor.clone());
2407    }
2408    let Some(slot) = owned_inputs.get_mut(seed_input_index) else {
2409        return Err(Error::Internal(format!(
2410            "semantic eager JVP seed input index {seed_input_index} is outside {} inputs",
2411            owned_inputs.len()
2412        )));
2413    };
2414    *slot = Some(tangent_tensor.as_ref().clone());
2415    let input_refs = owned_inputs
2416        .iter()
2417        .enumerate()
2418        .map(|(index, tensor)| {
2419            tensor.as_ref().ok_or_else(|| {
2420                Error::Internal(format!(
2421                    "semantic eager JVP derivative input {index} was not populated"
2422                ))
2423            })
2424        })
2425        .collect::<Result<Vec<_>>>()?;
2426    let outputs = ctx.runtime.run_compiled(&derivative_program, &input_refs)?;
2427    let Some(result) = outputs.get(derivative_output_index).cloned() else {
2428        return Err(Error::Internal(format!(
2429            "semantic eager JVP derivative output index {derivative_output_index} is outside {} outputs",
2430            outputs.len()
2431        )));
2432    };
2433    let tangent_trace = TracedTensor::from_tensor_arc_symbolic_shape(Arc::clone(&tangent_tensor))?;
2434    let semantic_trace = derivative_trace_from_frozen_program(
2435        &source,
2436        derivative.frozen(),
2437        derivative_output_index,
2438        &[(seed_input_index, Arc::clone(&tangent_tensor))],
2439        &[output_trace, wrt_trace, &tangent_trace],
2440        None,
2441        "semantic_eager_jvp",
2442    )?;
2443
2444    Ok(Some(Some(EagerTensor::new_result_arc_with_semantic_trace(
2445        Arc::clone(ctx),
2446        eager_val_key(),
2447        Arc::new(result),
2448        true,
2449        None,
2450        Some(semantic_trace),
2451        Vec::new(),
2452    )?)))
2453}
2454
2455fn validate_same_runtime(
2456    runtime: &Arc<EagerRuntime>,
2457    tensor: &EagerTensor,
2458    role: &'static str,
2459) -> Result<()> {
2460    if tensor.ctx_id() != runtime.id() {
2461        return Err(Error::ContextMismatch {
2462            lhs: runtime.id(),
2463            rhs: tensor.ctx_id(),
2464        });
2465    }
2466    let _ = role;
2467    Ok(())
2468}
2469
2470pub(crate) fn tensor_ptr(tensor: &Arc<Tensor>) -> usize {
2471    Arc::as_ptr(tensor) as usize
2472}
2473
2474fn validate_seed_tensor(op: &'static str, primal: &EagerTensor, seed: &EagerTensor) -> Result<()> {
2475    if primal.dtype() != seed.dtype() {
2476        return Err(
2477            tenferro_tensor::Error::dtype_mismatch(op, primal.dtype(), seed.dtype()).into(),
2478        );
2479    }
2480    if primal.shape() != seed.shape() {
2481        return Err(
2482            tenferro_tensor::Error::shape_mismatch(op, primal.shape(), seed.shape()).into(),
2483        );
2484    }
2485    Ok(())
2486}
2487
2488/// Eager tensor with reverse-mode autodiff over concrete tensor values.
2489///
2490/// This executes each primitive immediately and records a lightweight reverse
2491/// DAG for `backward()`. Gradients accumulate across repeated `backward()`
2492/// calls until they are cleared explicitly.
2493///
2494/// # Examples
2495///
2496/// ```
2497/// use tenferro_cpu::CpuBackend;
2498/// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
2499///
2500/// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
2501/// let x = EagerTensor::requires_grad_in(Tensor::from_vec_col_major(vec![3], vec![1.0_f64, 2.0, 3.0]).unwrap(), ctx)?;
2502/// let loss = x.mul(&x).unwrap().reduce_sum(Some(&[0])).unwrap();
2503/// let _cotangents = loss.backward().unwrap();
2504/// let loss = x.mul(&x).unwrap().reduce_sum(Some(&[0])).unwrap();
2505/// let _cotangents = loss.backward().unwrap();
2506///
2507/// assert_eq!(x.grad().unwrap().unwrap().as_slice::<f64>().unwrap(), &[4.0, 8.0, 12.0]);
2508/// x.clear_grad();
2509///
2510/// assert!(x.grad().unwrap().is_none());
2511/// # Ok::<(), tenferro_ad::Error>(())
2512/// ```
2513#[derive(Clone)]
2514pub struct EagerTensor {
2515    pub(crate) value: Arc<TensorValue>,
2516    materialized_cache: Arc<OnceLock<Arc<Tensor>>>,
2517    pub(crate) key: ValueKey<StdTensorOp>,
2518    pub(crate) trace: Option<EagerTrace>,
2519    pub(crate) semantic_trace: Option<TracedTensor>,
2520    pub(crate) requires_grad: bool,
2521    grad_slot: GradSlot,
2522    pub(crate) metadata_scopes: Vec<Arc<GlobalMetadataScope>>,
2523    pub(crate) ctx: Arc<EagerRuntime>,
2524    _record: Arc<EagerTensorRecord>,
2525}
2526
2527pub(crate) struct EagerTensorRecord {
2528    value: Arc<TensorValue>,
2529    materialized_cache: Arc<OnceLock<Arc<Tensor>>>,
2530    key: ValueKey<StdTensorOp>,
2531    trace: Option<EagerTrace>,
2532    semantic_trace: Option<TracedTensor>,
2533    requires_grad: bool,
2534    grad_slot: GradSlot,
2535    metadata_scopes: Vec<Arc<GlobalMetadataScope>>,
2536    ctx: Arc<EagerRuntime>,
2537}
2538
2539struct EagerTensorParts {
2540    ctx: Arc<EagerRuntime>,
2541    key: ValueKey<StdTensorOp>,
2542    requires_grad: bool,
2543    trace: Option<EagerTrace>,
2544    semantic_trace: Option<TracedTensor>,
2545    value: Arc<TensorValue>,
2546    metadata_scopes: Vec<Arc<GlobalMetadataScope>>,
2547    register_value: bool,
2548}
2549
2550impl fmt::Debug for EagerTensor {
2551    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
2552        f.debug_struct("EagerTensor")
2553            .field("dtype", &self.dtype())
2554            .field("shape", &self.shape())
2555            .field("key", &self.key)
2556            .field("requires_grad", &self.requires_grad)
2557            .field("has_trace", &self.trace.is_some())
2558            .field("has_semantic_trace", &self.semantic_trace.is_some())
2559            .field("ctx_id", &self.ctx_id())
2560            .finish_non_exhaustive()
2561    }
2562}
2563
2564impl EagerTensor {
2565    /// Create an untracked eager tensor inside an existing eager context.
2566    ///
2567    /// # Examples
2568    ///
2569    /// ```
2570    /// use tenferro_cpu::CpuBackend;
2571    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
2572    ///
2573    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
2574    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2], vec![1.0_f64, 2.0]).unwrap(), ctx)?;
2575    ///
2576    /// assert_eq!(x.materialized()?.as_slice::<f64>().unwrap(), &[1.0, 2.0]);
2577    /// # Ok::<(), tenferro_ad::Error>(())
2578    /// ```
2579    ///
2580    /// # Errors
2581    ///
2582    /// Returns [`tenferro_runtime::Error::RuntimeState`] when metadata cannot
2583    /// be registered in the target context, or a typed tensor/backend error
2584    /// while materializing the source value.
2585    pub fn from_tensor_in(tensor: Tensor, ctx: Arc<EagerRuntime>) -> Result<Self> {
2586        Self::new_leaf(ctx, tensor, false)
2587    }
2588
2589    /// Create an untracked eager tensor from compact column-major data inside
2590    /// an existing eager runtime.
2591    ///
2592    /// # Errors
2593    ///
2594    /// Returns [`Error::TensorRuntime`] with
2595    /// [`tenferro_tensor::ValidationError::ShapeMismatch`] when the shape and
2596    /// data length disagree, or with
2597    /// [`tenferro_tensor::ValidationError::IntegerOverflow`] when shape
2598    /// arithmetic overflows. Returns [`Error::RuntimeState`] when eager
2599    /// metadata cannot be registered.
2600    pub fn from_vec_col_major_in<T: TensorScalar>(
2601        shape: impl IntoShapeVec,
2602        data: Vec<T>,
2603        ctx: Arc<EagerRuntime>,
2604    ) -> Result<Self> {
2605        Self::from_tensor_in(Tensor::from_vec_col_major(shape, data)?, ctx)
2606    }
2607
2608    /// Create a tracked eager leaf inside an existing eager context.
2609    ///
2610    /// # Examples
2611    ///
2612    /// ```
2613    /// use tenferro_cpu::CpuBackend;
2614    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
2615    ///
2616    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
2617    /// let x = EagerTensor::requires_grad_in(Tensor::from_vec_col_major(vec![2], vec![1.0_f64, 2.0]).unwrap(), ctx)?;
2618    ///
2619    /// assert!(x.grad().unwrap().is_none());
2620    /// # Ok::<(), tenferro_ad::Error>(())
2621    /// ```
2622    ///
2623    /// # Errors
2624    ///
2625    /// Returns [`tenferro_runtime::Error::RuntimeState`] when gradient metadata
2626    /// cannot be registered in the target context, or a typed tensor/backend
2627    /// error while creating the leaf.
2628    pub fn requires_grad_in(tensor: Tensor, ctx: Arc<EagerRuntime>) -> Result<Self> {
2629        Self::new_leaf(ctx, tensor, true)
2630    }
2631
2632    pub(crate) fn new_leaf(
2633        ctx: Arc<EagerRuntime>,
2634        tensor: Tensor,
2635        requires_grad: bool,
2636    ) -> Result<Self> {
2637        let key = eager_val_key();
2638        let tensor = Arc::new(tensor);
2639        let semantic_trace = TracedTensor::from_tensor_arc_symbolic_shape(Arc::clone(&tensor))?;
2640        let metadata_scope =
2641            register_scoped_value_metadata(key.clone(), tensor_meta_from_tensor(tensor.as_ref()))
2642                .map_err(|err| {
2643                Error::runtime_state_source("eager leaf metadata", ErrorPhase::GraphBuild, err)
2644            })?;
2645        Self::from_parts(EagerTensorParts {
2646            ctx,
2647            key,
2648            requires_grad,
2649            trace: None,
2650            semantic_trace: Some(semantic_trace),
2651            value: Arc::new(TensorValue::from_tensor_arc(tensor)),
2652            metadata_scopes: metadata_scopes_for_scope(metadata_scope),
2653            register_value: true,
2654        })
2655    }
2656
2657    pub(crate) fn new_result_arc(
2658        ctx: Arc<EagerRuntime>,
2659        key: ValueKey<StdTensorOp>,
2660        tensor: Arc<Tensor>,
2661        requires_grad: bool,
2662        trace: Option<EagerTrace>,
2663        metadata_scopes: Vec<Arc<GlobalMetadataScope>>,
2664    ) -> Result<Self> {
2665        Self::new_result_arc_with_semantic_trace(
2666            ctx,
2667            key,
2668            tensor,
2669            requires_grad,
2670            trace,
2671            None,
2672            metadata_scopes,
2673        )
2674    }
2675
2676    pub(crate) fn new_result_arc_with_semantic_trace(
2677        ctx: Arc<EagerRuntime>,
2678        key: ValueKey<StdTensorOp>,
2679        tensor: Arc<Tensor>,
2680        requires_grad: bool,
2681        trace: Option<EagerTrace>,
2682        semantic_trace: Option<TracedTensor>,
2683        metadata_scopes: Vec<Arc<GlobalMetadataScope>>,
2684    ) -> Result<Self> {
2685        Self::from_parts(EagerTensorParts {
2686            ctx,
2687            key,
2688            requires_grad,
2689            trace,
2690            semantic_trace,
2691            value: Arc::new(TensorValue::from_tensor_arc(tensor)),
2692            metadata_scopes,
2693            register_value: true,
2694        })
2695    }
2696
2697    pub(crate) fn new_unregistered_result_arc_with_semantic_trace(
2698        ctx: Arc<EagerRuntime>,
2699        key: ValueKey<StdTensorOp>,
2700        tensor: Arc<Tensor>,
2701        requires_grad: bool,
2702        trace: Option<EagerTrace>,
2703        semantic_trace: Option<TracedTensor>,
2704        metadata_scopes: Vec<Arc<GlobalMetadataScope>>,
2705    ) -> Result<Self> {
2706        Self::from_parts(EagerTensorParts {
2707            ctx,
2708            key,
2709            requires_grad,
2710            trace,
2711            semantic_trace,
2712            value: Arc::new(TensorValue::from_tensor_arc(tensor)),
2713            metadata_scopes,
2714            register_value: false,
2715        })
2716    }
2717
2718    pub(crate) fn new_result_value(
2719        ctx: Arc<EagerRuntime>,
2720        key: ValueKey<StdTensorOp>,
2721        value: TensorValue,
2722        requires_grad: bool,
2723        trace: Option<EagerTrace>,
2724        semantic_trace: Option<TracedTensor>,
2725        metadata_scopes: Vec<Arc<GlobalMetadataScope>>,
2726    ) -> Result<Self> {
2727        Self::from_parts(EagerTensorParts {
2728            ctx,
2729            key,
2730            requires_grad,
2731            trace,
2732            semantic_trace,
2733            value: Arc::new(value),
2734            metadata_scopes,
2735            register_value: true,
2736        })
2737    }
2738
2739    fn from_parts(parts: EagerTensorParts) -> Result<Self> {
2740        let EagerTensorParts {
2741            ctx,
2742            key,
2743            requires_grad,
2744            trace,
2745            semantic_trace,
2746            value,
2747            metadata_scopes,
2748            register_value,
2749        } = parts;
2750        let grad_slot = Arc::new(Mutex::new(None));
2751        if requires_grad {
2752            ctx.try_register_grad_slot(&key, &grad_slot)?;
2753        }
2754        let materialized_cache = Arc::new(OnceLock::new());
2755        let record = Arc::new(EagerTensorRecord {
2756            value: Arc::clone(&value),
2757            materialized_cache: Arc::clone(&materialized_cache),
2758            key: key.clone(),
2759            trace: trace.clone(),
2760            semantic_trace: semantic_trace.clone(),
2761            requires_grad,
2762            grad_slot: Arc::clone(&grad_slot),
2763            metadata_scopes: metadata_scopes.clone(),
2764            ctx: Arc::clone(&ctx),
2765        });
2766        if register_value {
2767            ctx.try_register_value_record(&key, &record)?;
2768        }
2769
2770        Ok(Self {
2771            value,
2772            materialized_cache,
2773            key,
2774            trace,
2775            semantic_trace,
2776            requires_grad,
2777            grad_slot,
2778            metadata_scopes,
2779            ctx,
2780            _record: record,
2781        })
2782    }
2783
2784    pub(crate) fn new_untracked_result(ctx: Arc<EagerRuntime>, tensor: Tensor) -> Result<Self> {
2785        Ok(Self::new_untracked_value_result(
2786            ctx,
2787            TensorValue::from_tensor(tensor),
2788        ))
2789    }
2790
2791    pub(crate) fn new_untracked_value_result(ctx: Arc<EagerRuntime>, value: TensorValue) -> Self {
2792        Self::new_untracked_value_result_with_semantic_trace(ctx, value, None)
2793    }
2794
2795    pub(crate) fn new_untracked_value_result_with_semantic_trace(
2796        ctx: Arc<EagerRuntime>,
2797        value: TensorValue,
2798        semantic_trace: Option<TracedTensor>,
2799    ) -> Self {
2800        let value = Arc::new(value);
2801        let materialized_cache = Arc::new(OnceLock::new());
2802        let key = eager_val_key();
2803        let grad_slot = Arc::new(Mutex::new(None));
2804        let record = Arc::new(EagerTensorRecord {
2805            value: Arc::clone(&value),
2806            materialized_cache: Arc::clone(&materialized_cache),
2807            key: key.clone(),
2808            trace: None,
2809            semantic_trace: semantic_trace.clone(),
2810            requires_grad: false,
2811            grad_slot: Arc::clone(&grad_slot),
2812            metadata_scopes: Vec::new(),
2813            ctx: Arc::clone(&ctx),
2814        });
2815        Self {
2816            value,
2817            materialized_cache,
2818            key,
2819            trace: None,
2820            semantic_trace,
2821            requires_grad: false,
2822            grad_slot,
2823            metadata_scopes: Vec::new(),
2824            ctx,
2825            _record: record,
2826        }
2827    }
2828
2829    pub(crate) fn from_record(record: Arc<EagerTensorRecord>) -> Self {
2830        Self {
2831            value: Arc::clone(&record.value),
2832            materialized_cache: Arc::clone(&record.materialized_cache),
2833            key: record.key.clone(),
2834            trace: record.trace.clone(),
2835            semantic_trace: record.semantic_trace.clone(),
2836            requires_grad: record.requires_grad,
2837            grad_slot: Arc::clone(&record.grad_slot),
2838            metadata_scopes: record.metadata_scopes.clone(),
2839            ctx: Arc::clone(&record.ctx),
2840            _record: record,
2841        }
2842    }
2843
2844    /// Detach this tensor from the reverse graph.
2845    ///
2846    /// The returned tensor keeps the concrete value but no longer contributes
2847    /// gradients to the original graph.
2848    ///
2849    /// # Examples
2850    ///
2851    /// ```
2852    /// use tenferro_cpu::CpuBackend;
2853    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
2854    ///
2855    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
2856    /// let x = EagerTensor::requires_grad_in(Tensor::from_vec_col_major(vec![2], vec![1.0_f64, 2.0]).unwrap(), ctx)?;
2857    /// let y = x.detach();
2858    ///
2859    /// assert_eq!(y.materialized()?.as_slice::<f64>().unwrap(), &[1.0, 2.0]);
2860    /// assert!(y.grad().unwrap().is_none());
2861    /// # Ok::<(), tenferro_ad::Error>(())
2862    /// ```
2863    pub fn detach(&self) -> Self {
2864        let semantic_trace = self.value.as_tensor_arc().and_then(|tensor| {
2865            TracedTensor::from_tensor_arc_symbolic_shape(Arc::clone(tensor)).ok()
2866        });
2867        Self::new_untracked_value_result_with_semantic_trace(
2868            self.ctx.clone(),
2869            self.value.as_ref().clone(),
2870            semantic_trace,
2871        )
2872    }
2873
2874    /// Detach this tensor from its graph and re-register it in a different
2875    /// context as an untracked leaf.
2876    ///
2877    /// # Examples
2878    ///
2879    /// ```
2880    /// use tenferro_cpu::CpuBackend;
2881    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
2882    ///
2883    /// let ctx_a = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
2884    /// let ctx_b = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
2885    /// let x = EagerTensor::requires_grad_in(Tensor::from_vec_col_major(vec![2], vec![1.0_f64, 2.0]).unwrap(), ctx_a)?;
2886    /// let d = x.detach_into(&ctx_b)?;
2887    ///
2888    /// assert!(!d.tracks_grad());
2889    /// assert_eq!(d.ctx_id(), ctx_b.id());
2890    /// # Ok::<(), tenferro_ad::Error>(())
2891    /// ```
2892    ///
2893    /// # Errors
2894    ///
2895    /// Returns [`Error::RuntimeState`] if the source cannot be materialized or
2896    /// the target context cannot register its metadata.
2897    pub fn detach_into(&self, ctx: &Arc<EagerRuntime>) -> Result<Self> {
2898        Self::from_tensor_in(self.to_tensor()?, Arc::clone(ctx))
2899    }
2900
2901    /// Materialize and share the concrete tensor value.
2902    ///
2903    /// # Examples
2904    ///
2905    /// ```
2906    /// use tenferro_cpu::CpuBackend;
2907    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
2908    ///
2909    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
2910    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![1], vec![3.0_f64]).unwrap(), ctx)?;
2911    /// assert_eq!(x.materialized()?.as_slice::<f64>().unwrap(), &[3.0]);
2912    /// # Ok::<(), tenferro_ad::Error>(())
2913    /// ```
2914    ///
2915    /// # Errors
2916    ///
2917    /// Returns [`Error::RuntimeState`] when lazy/backend-resident storage cannot
2918    /// be materialized, or when eager value-record state is poisoned.
2919    pub fn materialized(&self) -> Result<Arc<Tensor>> {
2920        self.materialized_arc()
2921    }
2922
2923    /// Return this tensor's scalar dtype without materializing through
2924    /// [`materialized`](Self::materialized).
2925    pub fn dtype(&self) -> DType {
2926        self.value.dtype()
2927    }
2928
2929    /// Return this tensor's logical shape without materializing through
2930    /// [`materialized`](Self::materialized).
2931    pub fn shape(&self) -> &[usize] {
2932        self.value.shape()
2933    }
2934
2935    /// Borrow this tensor value as a [`TensorRead`].
2936    ///
2937    /// This is the preferred borrowed input boundary for executor calls. It
2938    /// preserves the option to replace eager storage with non-contiguous views
2939    /// without forcing callers through [`materialized`](Self::materialized).
2940    pub fn tensor_read(&self) -> TensorRead<'_> {
2941        self.value.tensor_read()
2942    }
2943
2944    /// Materialize this eager tensor as an owned [`Tensor`].
2945    ///
2946    /// This is the owned materialization boundary for callers that need a
2947    /// standalone compact tensor. The operation is fallible because eager
2948    /// values may be backed by lazy or backend-resident storage.
2949    ///
2950    /// # Errors
2951    ///
2952    /// Returns [`Error::RuntimeState`] if backend state is unavailable, or a
2953    /// typed tensor backend error when contiguous materialization fails.
2954    pub fn to_tensor(&self) -> Result<Tensor> {
2955        self.ctx.materialize_value(self.value.as_ref())
2956    }
2957
2958    pub(crate) fn materialized_arc(&self) -> Result<Arc<Tensor>> {
2959        if let Some(tensor) = self.value.as_tensor_arc() {
2960            self.ctx.try_register_value_record_ptr(&self._record)?;
2961            return Ok(Arc::clone(tensor));
2962        }
2963        if let Some(tensor) = self.materialized_cache.get() {
2964            self.ctx.try_register_value_record_ptr(&self._record)?;
2965            return Ok(Arc::clone(tensor));
2966        }
2967
2968        let materialized = Arc::new(self.ctx.materialize_value(self.value.as_ref())?);
2969        let _ = self.materialized_cache.set(Arc::clone(&materialized));
2970        self.ctx.try_register_value_record_ptr(&self._record)?;
2971        Ok(self
2972            .materialized_cache
2973            .get()
2974            .map(Arc::clone)
2975            .unwrap_or(materialized))
2976    }
2977
2978    #[cfg(test)]
2979    pub(crate) fn materialized_cache_is_initialized(&self) -> bool {
2980        self.materialized_cache.get().is_some()
2981    }
2982
2983    /// Return the accumulated gradient currently stored for this tensor.
2984    ///
2985    /// The stored gradient accumulates across repeated `backward()` calls
2986    /// until it is cleared explicitly.
2987    ///
2988    /// For complex scalar losses, stored gradients use tenferro's
2989    /// Hermitian-adjoint cotangent convention. See
2990    /// <https://tensor4all.org/tenferro-rs/guides/complex-ad.html>.
2991    ///
2992    /// # Examples
2993    ///
2994    /// ```
2995    /// use tenferro_cpu::CpuBackend;
2996    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
2997    ///
2998    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
2999    /// let x = EagerTensor::requires_grad_in(Tensor::from_vec_col_major(vec![2], vec![1.0_f64, 2.0]).unwrap(), ctx).unwrap();
3000    /// let loss = x.exp().unwrap().reduce_sum(Some(&[0])).unwrap();
3001    /// let _cotangents = loss.backward().unwrap();
3002    ///
3003    /// let grad = x.grad()?.unwrap();
3004    /// assert_eq!(grad.shape(), &[2]);
3005    /// # Ok::<(), tenferro_ad::Error>(())
3006    /// ```
3007    ///
3008    /// # Errors
3009    ///
3010    /// Returns [`Error::RuntimeState`] if the gradient slot is poisoned or no
3011    /// longer available.
3012    pub fn grad(&self) -> Result<Option<Arc<Tensor>>> {
3013        self.grad_slot
3014            .lock()
3015            .map_err(|_| {
3016                Error::runtime_state(
3017                    "eager_gradient_slot",
3018                    ErrorPhase::Execution,
3019                    "lock poisoned",
3020                )
3021            })
3022            .map(|slot| slot.clone())
3023    }
3024
3025    /// Clear the accumulated gradient stored for this tensor.
3026    ///
3027    /// This only affects this tensor's gradient slot. Other tensors in the
3028    /// same context retain their gradients until they are cleared explicitly or
3029    /// overwritten by later accumulation.
3030    ///
3031    /// # Examples
3032    ///
3033    /// ```
3034    /// use tenferro_cpu::CpuBackend;
3035    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
3036    ///
3037    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
3038    /// let x = EagerTensor::requires_grad_in(Tensor::from_vec_col_major(vec![3], vec![1.0_f64, 2.0, 3.0]).unwrap(), ctx.clone()).unwrap();
3039    /// let y = EagerTensor::requires_grad_in(Tensor::from_vec_col_major(vec![3], vec![4.0_f64, 5.0, 6.0]).unwrap(), ctx).unwrap();
3040    /// let loss = x.mul(&y).unwrap().reduce_sum(Some(&[0])).unwrap();
3041    /// let _ = loss.backward().unwrap();
3042    ///
3043    /// x.clear_grad()?;
3044    ///
3045    /// assert!(x.grad()?.is_none());
3046    /// assert!(y.grad()?.is_some());
3047    /// # Ok::<(), tenferro_ad::Error>(())
3048    /// ```
3049    ///
3050    /// # Errors
3051    ///
3052    /// Returns [`Error::RuntimeState`] if the gradient slot lock is poisoned.
3053    pub fn clear_grad(&self) -> Result<()> {
3054        *self.grad_slot.lock().map_err(|_| {
3055            Error::runtime_state(
3056                "eager_gradient_slot",
3057                ErrorPhase::Execution,
3058                "lock poisoned",
3059            )
3060        })? = None;
3061        Ok(())
3062    }
3063
3064    /// Report whether this tensor participates in gradient tracking.
3065    ///
3066    /// Tracked tensors keep a gradient slot in their eager context; untracked
3067    /// tensors and detached tensors do not.
3068    ///
3069    /// # Examples
3070    ///
3071    /// ```
3072    /// use tenferro_cpu::CpuBackend;
3073    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
3074    ///
3075    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
3076    /// let plain = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![2], vec![1.0_f64, 2.0]).unwrap(), ctx.clone()).unwrap();
3077    /// let tracked = EagerTensor::requires_grad_in(Tensor::from_vec_col_major(vec![2], vec![3.0_f64, 4.0]).unwrap(), ctx.clone()).unwrap();
3078    /// let detached = tracked.detach();
3079    ///
3080    /// assert!(!plain.tracks_grad());
3081    /// assert!(tracked.tracks_grad());
3082    /// assert!(!detached.tracks_grad());
3083    /// # Ok::<(), tenferro_ad::Error>(())
3084    /// ```
3085    pub fn tracks_grad(&self) -> bool {
3086        self.requires_grad
3087    }
3088
3089    #[cfg(test)]
3090    fn debug_trace_saved_value_count(&self) -> Option<usize> {
3091        None
3092    }
3093
3094    /// Return the opaque identifier of the context this tensor belongs to.
3095    ///
3096    /// # Examples
3097    ///
3098    /// ```
3099    /// use tenferro_cpu::CpuBackend;
3100    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
3101    ///
3102    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
3103    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![1], vec![1.0_f64]).unwrap(), ctx.clone()).unwrap();
3104    ///
3105    /// assert_eq!(x.ctx_id(), ctx.id());
3106    /// # Ok::<(), tenferro_ad::Error>(())
3107    /// ```
3108    pub fn ctx_id(&self) -> ContextId {
3109        self.ctx.id()
3110    }
3111
3112    /// Borrow the eager runtime context that owns this tensor.
3113    pub fn runtime(&self) -> &Arc<EagerRuntime> {
3114        &self.ctx
3115    }
3116
3117    /// Check whether two tensors belong to the same eager context.
3118    ///
3119    /// # Examples
3120    ///
3121    /// ```
3122    /// use tenferro_cpu::CpuBackend;
3123    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
3124    ///
3125    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
3126    /// let x = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![1], vec![1.0_f64]).unwrap(), ctx.clone()).unwrap();
3127    /// let y = EagerTensor::from_tensor_in(Tensor::from_vec_col_major(vec![1], vec![2.0_f64]).unwrap(), ctx).unwrap();
3128    ///
3129    /// assert!(x.same_context(&y));
3130    /// # Ok::<(), tenferro_ad::Error>(())
3131    /// ```
3132    pub fn same_context(&self, other: &Self) -> bool {
3133        self.ctx_id() == other.ctx_id()
3134    }
3135
3136    #[cfg(test)]
3137    pub(crate) fn standard_graph_op(
3138        inputs: &[&Self],
3139        build_graph: impl FnOnce(&[TensorInputKey]) -> Result<Arc<Graph<StdTensorOp>>>,
3140    ) -> Result<Vec<Self>> {
3141        let Some(first) = inputs.first() else {
3142            return Err(Error::Internal(
3143                "standard eager graph op requires at least one input tensor".to_string(),
3144            ));
3145        };
3146        let ctx = Arc::clone(&first.ctx);
3147        for tensor in inputs.iter().skip(1) {
3148            if !first.same_context(tensor) {
3149                return Err(Error::ContextMismatch {
3150                    lhs: first.ctx_id(),
3151                    rhs: tensor.ctx_id(),
3152                });
3153            }
3154        }
3155
3156        let graph_input_keys = (0..inputs.len())
3157            .map(|_| next_input_key())
3158            .collect::<Vec<_>>();
3159        let graph = build_graph(&graph_input_keys)?;
3160        let initial_data = graph_input_keys
3161            .iter()
3162            .zip(inputs.iter())
3163            .map(|(key, tensor)| Ok((ValueKey::Input(key.clone()), tensor.materialized_arc()?)))
3164            .collect::<Result<HashMap<_, _>>>()?;
3165        let execution = ctx.exec_standard_graph_outputs(graph.as_ref(), &initial_data)?;
3166        if execution.outputs.len() != graph.outputs().len() {
3167            return Err(Error::Internal(format!(
3168                "standard eager graph op expected {} graph outputs, got {}",
3169                graph.outputs().len(),
3170                execution.outputs.len()
3171            )));
3172        }
3173
3174        if !eager_grad_recording_enabled() || !inputs.iter().any(|input| input.requires_grad) {
3175            return execution
3176                .outputs
3177                .into_iter()
3178                .map(|output| {
3179                    Self::new_unregistered_result_arc_with_semantic_trace(
3180                        Arc::clone(&ctx),
3181                        eager_val_key(),
3182                        output,
3183                        false,
3184                        None,
3185                        None,
3186                        Vec::new(),
3187                    )
3188                })
3189                .collect();
3190        }
3191
3192        let recorded = record_eager_graph_outputs(
3193            graph.as_ref(),
3194            &graph_input_keys,
3195            &execution.outputs,
3196            inputs,
3197        )?;
3198        if recorded.traces.len() != execution.outputs.len() {
3199            return Err(Error::Internal(format!(
3200                "standard eager graph op expected {} eager traces, got {}",
3201                execution.outputs.len(),
3202                recorded.traces.len()
3203            )));
3204        }
3205
3206        let mut metadata_scopes = vec![Arc::clone(&recorded.metadata_scope)];
3207        for input in inputs {
3208            for scope in &input.metadata_scopes {
3209                push_metadata_scope(&mut metadata_scopes, Arc::clone(scope));
3210            }
3211        }
3212
3213        recorded
3214            .traces
3215            .into_iter()
3216            .zip(recorded.semantic_traces)
3217            .zip(execution.outputs)
3218            .map(|((trace, semantic_trace), output)| {
3219                Self::new_result_arc_with_semantic_trace(
3220                    Arc::clone(&ctx),
3221                    trace.key,
3222                    output,
3223                    trace.requires_grad,
3224                    trace.trace,
3225                    semantic_trace,
3226                    metadata_scopes.clone(),
3227                )
3228            })
3229            .collect()
3230    }
3231
3232    /// Run reverse-mode AD from this scalar output.
3233    ///
3234    /// Returns the full cotangent map produced by the reverse pass and also
3235    /// accumulates into `grad()` for tracked eager tensors reachable from this
3236    /// output.
3237    ///
3238    /// For complex scalar outputs, cotangents use tenferro's Hermitian
3239    /// real-inner-product convention. See
3240    /// <https://tensor4all.org/tenferro-rs/guides/complex-ad.html>.
3241    ///
3242    /// # Examples
3243    ///
3244    /// ```
3245    /// use tenferro_cpu::CpuBackend;
3246    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
3247    ///
3248    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
3249    /// let x = EagerTensor::requires_grad_in(Tensor::from_vec_col_major(vec![3], vec![1.0_f64, 2.0, 3.0]).unwrap(), ctx).unwrap();
3250    /// let loss = x.add(&x).unwrap().reduce_sum(Some(&[0])).unwrap();
3251    /// let _cotangents = loss.backward().unwrap();
3252    /// let loss = x.add(&x).unwrap().reduce_sum(Some(&[0])).unwrap();
3253    /// let _cotangents = loss.backward().unwrap();
3254    ///
3255    /// assert_eq!(x.grad().unwrap().unwrap().as_slice::<f64>().unwrap(), &[4.0, 4.0, 4.0]);
3256    /// # Ok::<(), tenferro_ad::Error>(())
3257    /// ```
3258    ///
3259    /// # Errors
3260    ///
3261    /// Returns [`Error::NonScalarGrad`] when this output is not scalar,
3262    /// [`Error::UnsupportedAdRule`] when a graph operation lacks a reverse rule,
3263    /// or a typed validation/backend/runtime-state error during the reverse pass.
3264    pub fn backward(&self) -> Result<HashMap<ValueKey<StdTensorOp>, Arc<Tensor>>> {
3265        if !self.shape().is_empty() {
3266            return Err(Error::NonScalarGrad {
3267                shape: self.shape().to_vec(),
3268            });
3269        }
3270
3271        let value = self.materialized_arc()?;
3272        let seed = {
3273            let mut backend = self.ctx.lock_backend()?;
3274            Arc::new(one_like_tensor(value.as_ref(), &mut *backend)?)
3275        };
3276        self.backward_from_seed(seed)
3277    }
3278
3279    /// Run reverse-mode AD from this output with an explicit cotangent seed.
3280    ///
3281    /// This is the stateful eager VJP sugar: it returns the cotangent map and
3282    /// accumulates reachable tracked leaves into their `grad()` slots. Use
3283    /// [`EagerRuntime::vjp`] when the VJP result should be returned as a
3284    /// composable eager tensor without touching grad slots.
3285    ///
3286    /// # Examples
3287    ///
3288    /// ```
3289    /// use tenferro_ad::{EagerRuntime, EagerTensor, Tensor};
3290    /// use tenferro_cpu::CpuBackend;
3291    ///
3292    /// let ctx = EagerRuntime::with_cpu_backend(CpuBackend::new())?;
3293    /// let x = EagerTensor::requires_grad_in(
3294    ///     Tensor::from_vec_col_major(vec![2], vec![2.0_f64, 3.0]).unwrap(),
3295    ///     ctx.clone(),
3296    /// )?;
3297    /// let seed = EagerTensor::from_tensor_in(
3298    ///     Tensor::from_vec_col_major(vec![2], vec![1.0_f64, 2.0]).unwrap(),
3299    ///     ctx,
3300    /// )?;
3301    /// let y = x.mul(&x)?;
3302    /// y.backward_with(&seed)?;
3303    /// assert_eq!(x.grad()?.unwrap().as_slice::<f64>().unwrap(), &[4.0, 12.0]);
3304    /// # Ok::<(), tenferro_ad::Error>(())
3305    /// ```
3306    ///
3307    /// # Errors
3308    ///
3309    /// Returns [`Error::ContextMismatch`] when `cotangent` belongs to another
3310    /// eager runtime, [`Error::Validation`] when its shape or dtype is not a
3311    /// valid seed, [`Error::UnsupportedAdRule`] for an unavailable reverse
3312    /// rule, or a typed backend/runtime-state error during execution.
3313    pub fn backward_with(
3314        &self,
3315        cotangent: &EagerTensor,
3316    ) -> Result<HashMap<ValueKey<StdTensorOp>, Arc<Tensor>>> {
3317        if !self.same_context(cotangent) {
3318            return Err(Error::ContextMismatch {
3319                lhs: self.ctx_id(),
3320                rhs: cotangent.ctx_id(),
3321            });
3322        }
3323        validate_seed_tensor("backward", self, cotangent)?;
3324        self.backward_from_seed(cotangent.materialized_arc()?)
3325    }
3326
3327    fn backward_from_seed(
3328        &self,
3329        seed: Arc<Tensor>,
3330    ) -> Result<HashMap<ValueKey<StdTensorOp>, Arc<Tensor>>> {
3331        let cotangent = EagerTensor::new_result_arc(
3332            Arc::clone(&self.ctx),
3333            eager_val_key(),
3334            seed,
3335            false,
3336            None,
3337            Vec::new(),
3338        )?;
3339        let candidate_keys = {
3340            let mut slots = self.ctx.lock_grad_slots()?;
3341            let mut keys = Vec::new();
3342            slots.retain(|key, slot| {
3343                if slot.upgrade().is_some() {
3344                    keys.push(key.clone());
3345                    true
3346                } else {
3347                    false
3348                }
3349            });
3350            keys
3351        };
3352
3353        let mut cotangents = HashMap::new();
3354        for key in candidate_keys {
3355            let Some(record) = self.ctx.value_record(&key)? else {
3356                continue;
3357            };
3358            if !record.requires_grad {
3359                continue;
3360            }
3361            let wrt = EagerTensor::from_record(record);
3362            let Some(grad) = self.ctx.vjp_optional(self, &wrt, &cotangent)? else {
3363                continue;
3364            };
3365            cotangents.insert(key, grad.materialized_arc()?);
3366        }
3367        let mut backend = self.ctx.lock_backend()?;
3368        self.ctx.store_grads(&cotangents, &mut backend)?;
3369        Ok(cotangents)
3370    }
3371}
3372
3373pub(crate) fn eager_val_key() -> ValueKey<StdTensorOp> {
3374    ValueKey::Input(next_input_key())
3375}
3376
3377pub(crate) struct RecordedEagerTrace {
3378    pub(crate) key: ValueKey<StdTensorOp>,
3379    pub(crate) trace: Option<EagerTrace>,
3380    pub(crate) requires_grad: bool,
3381}
3382
3383pub(crate) struct RecordedEagerOutputs {
3384    pub(crate) traces: Vec<RecordedEagerTrace>,
3385    pub(crate) semantic_traces: Vec<Option<TracedTensor>>,
3386    pub(crate) metadata_scope: Arc<GlobalMetadataScope>,
3387}
3388
3389pub(crate) fn record_eager_outputs(
3390    op: &StdTensorOp,
3391    outputs: &[Arc<Tensor>],
3392    inputs: &[&EagerTensor],
3393) -> Result<RecordedEagerOutputs> {
3394    let semantic_traces = record_semantic_eager_outputs(op, outputs.len(), inputs)?;
3395    let output_metadata = outputs
3396        .iter()
3397        .map(|output| tensor_meta_from_tensor(output.as_ref()));
3398    record_eager_outputs_from_metadata(output_metadata, semantic_traces, inputs)
3399}
3400
3401pub(crate) fn record_eager_value_outputs(
3402    op: &StdTensorOp,
3403    outputs: &[&TensorValue],
3404    inputs: &[&EagerTensor],
3405) -> Result<RecordedEagerOutputs> {
3406    let semantic_traces = record_semantic_eager_outputs(op, outputs.len(), inputs)?;
3407    let output_metadata = outputs.iter().map(|output| tensor_meta_from_value(output));
3408    record_eager_outputs_from_metadata(output_metadata, semantic_traces, inputs)
3409}
3410
3411fn record_semantic_eager_outputs(
3412    op: &StdTensorOp,
3413    output_count: usize,
3414    inputs: &[&EagerTensor],
3415) -> Result<Vec<Option<TracedTensor>>> {
3416    let Some(semantic_inputs) = inputs
3417        .iter()
3418        .map(|input| input.semantic_trace.as_ref())
3419        .collect::<Option<Vec<_>>>()
3420    else {
3421        return Ok(vec![None; output_count]);
3422    };
3423    let semantic_outputs = match op {
3424        StdTensorOp::Extension(ext) => {
3425            tenferro_runtime::extension::apply(Arc::clone(ext), &semantic_inputs)?
3426        }
3427        _ => tenferro_runtime::extension::apply_standard_op(op.clone(), &semantic_inputs)?,
3428    };
3429    if semantic_outputs.len() != output_count {
3430        return Err(Error::Internal(format!(
3431            "semantic eager recording expected {output_count} outputs for {op:?}, got {}",
3432            semantic_outputs.len()
3433        )));
3434    }
3435    Ok(semantic_outputs.into_iter().map(Some).collect())
3436}
3437
3438#[cfg(test)]
3439fn record_eager_graph_outputs(
3440    graph: &Graph<StdTensorOp>,
3441    graph_input_keys: &[TensorInputKey],
3442    outputs: &[Arc<Tensor>],
3443    inputs: &[&EagerTensor],
3444) -> Result<RecordedEagerOutputs> {
3445    let semantic_traces = record_semantic_eager_graph_outputs(graph, graph_input_keys, inputs)?;
3446    let output_metadata = outputs
3447        .iter()
3448        .map(|output| tensor_meta_from_tensor(output.as_ref()));
3449    record_eager_outputs_from_metadata(output_metadata, semantic_traces, inputs)
3450}
3451
3452#[cfg(test)]
3453fn record_semantic_eager_graph_outputs(
3454    graph: &Graph<StdTensorOp>,
3455    graph_input_keys: &[TensorInputKey],
3456    inputs: &[&EagerTensor],
3457) -> Result<Vec<Option<TracedTensor>>> {
3458    let Some(semantic_inputs) = inputs
3459        .iter()
3460        .map(|input| input.semantic_trace.as_ref())
3461        .collect::<Option<Vec<_>>>()
3462    else {
3463        return Ok(vec![None; graph.outputs().len()]);
3464    };
3465    if graph_input_keys.len() != semantic_inputs.len() {
3466        return Err(Error::Internal(format!(
3467            "semantic graph recording expected {} input keys, got {}",
3468            semantic_inputs.len(),
3469            graph_input_keys.len()
3470        )));
3471    }
3472
3473    let mut values = HashMap::new();
3474    for (key, tensor) in graph_input_keys.iter().zip(semantic_inputs) {
3475        values.insert(ValueKey::Input(key.clone()), tensor.clone());
3476    }
3477
3478    for op_node in graph.operations() {
3479        let input_values = op_node
3480            .inputs
3481            .iter()
3482            .map(|input| {
3483                let key = match input {
3484                    ValueRef::Local(local_id) => &graph.values()[*local_id].key,
3485                    ValueRef::External(key) => key,
3486                };
3487                values.get(key).cloned().ok_or_else(|| {
3488                    Error::Internal(format!(
3489                        "semantic graph recording missing value for {key:?}"
3490                    ))
3491                })
3492            })
3493            .collect::<Result<Vec<_>>>()?;
3494        let input_refs = input_values.iter().collect::<Vec<_>>();
3495        let semantic_outputs = match &op_node.operation {
3496            StdTensorOp::Extension(ext) => {
3497                tenferro_runtime::extension::apply(Arc::clone(ext), &input_refs)?
3498            }
3499            op => tenferro_runtime::extension::apply_standard_op(op.clone(), &input_refs)?,
3500        };
3501        if semantic_outputs.len() != op_node.outputs.len() {
3502            return Err(Error::Internal(format!(
3503                "semantic graph recording expected {} outputs for {:?}, got {}",
3504                op_node.outputs.len(),
3505                op_node.operation,
3506                semantic_outputs.len()
3507            )));
3508        }
3509        for (output_id, output) in op_node.outputs.iter().copied().zip(semantic_outputs) {
3510            values.insert(graph.values()[output_id].key.clone(), output);
3511        }
3512    }
3513
3514    graph
3515        .outputs()
3516        .iter()
3517        .map(|&output_id| {
3518            let key = &graph.values()[output_id].key;
3519            values.get(key).cloned().map(Some).ok_or_else(|| {
3520                Error::Internal(format!(
3521                    "semantic graph recording missing output for {key:?}"
3522                ))
3523            })
3524        })
3525        .collect()
3526}
3527
3528fn record_eager_outputs_from_metadata(
3529    output_metadata: impl IntoIterator<Item = TensorMeta>,
3530    semantic_traces: Vec<Option<TracedTensor>>,
3531    inputs: &[&EagerTensor],
3532) -> Result<RecordedEagerOutputs> {
3533    let output_metadata = output_metadata.into_iter().collect::<Vec<_>>();
3534    if semantic_traces.len() != output_metadata.len() {
3535        return Err(Error::Internal(format!(
3536            "eager recording expected {} semantic traces, got {}",
3537            output_metadata.len(),
3538            semantic_traces.len()
3539        )));
3540    }
3541    let requires_grad =
3542        eager_grad_recording_enabled() && inputs.iter().any(|input| input.requires_grad);
3543    let mut registrations = Vec::with_capacity(output_metadata.len());
3544    let traces = output_metadata
3545        .into_iter()
3546        .map(|metadata| {
3547            let key = eager_val_key();
3548            registrations.push((key.clone(), metadata));
3549            RecordedEagerTrace {
3550                key,
3551                trace: None,
3552                requires_grad,
3553            }
3554        })
3555        .collect();
3556
3557    Ok(RecordedEagerOutputs {
3558        traces,
3559        semantic_traces,
3560        metadata_scope: Arc::new(register_scoped_metadata_batch(registrations)?),
3561    })
3562}
3563
3564fn tensor_meta_from_value(value: &TensorValue) -> TensorMeta {
3565    TensorMeta::exact(
3566        value.dtype(),
3567        value.shape().iter().copied().map(SymDim::from).collect(),
3568    )
3569}
3570
3571pub(crate) fn exec_single_output(
3572    op: &StdTensorOp,
3573    inputs: &[&Tensor],
3574    ctx: &EagerRuntime,
3575) -> Result<Tensor> {
3576    let mut outputs = ctx.exec_outputs(op, inputs)?;
3577    if outputs.len() != 1 {
3578        return Err(Error::Internal(format!(
3579            "expected one eager output for {:?}, got {}",
3580            op,
3581            outputs.len()
3582        )));
3583    }
3584    Ok(profile_eager_op_section(
3585        "exec_single_output.remove_output",
3586        || outputs.remove(0),
3587    ))
3588}
3589
3590pub(crate) fn exec_single_output_read(
3591    op: &StdTensorOp,
3592    inputs: &[TensorRead<'_>],
3593    ctx: &EagerRuntime,
3594) -> Result<Tensor> {
3595    let mut outputs = ctx.exec_outputs_read(op, inputs)?;
3596    if outputs.len() != 1 {
3597        return Err(Error::Internal(format!(
3598            "expected one eager output for {:?}, got {}",
3599            op,
3600            outputs.len()
3601        )));
3602    }
3603    Ok(profile_eager_op_section(
3604        "exec_single_output_read.remove_output",
3605        || outputs.remove(0),
3606    ))
3607}
3608
3609#[cfg(test)]
3610pub(crate) fn zero_like_tensor<B: TensorBackend>(
3611    input: &Tensor,
3612    backend: &mut B,
3613) -> Result<Tensor> {
3614    let host = match input {
3615        Tensor::F32(tensor) => Tensor::F32(TypedTensor::zeros(tensor.shape().to_vec())?),
3616        Tensor::F64(tensor) => Tensor::F64(TypedTensor::zeros(tensor.shape().to_vec())?),
3617        Tensor::I32(tensor) => Tensor::I32(TypedTensor::zeros(tensor.shape().to_vec())?),
3618        Tensor::I64(tensor) => Tensor::I64(TypedTensor::zeros(tensor.shape().to_vec())?),
3619        Tensor::Bool(tensor) => Tensor::Bool(TypedTensor::from_vec_col_major(
3620            tensor.shape().to_vec(),
3621            vec![false; tensor.n_elements()],
3622        )?),
3623        Tensor::C32(tensor) => Tensor::C32(TypedTensor::zeros(tensor.shape().to_vec())?),
3624        Tensor::C64(tensor) => Tensor::C64(TypedTensor::zeros(tensor.shape().to_vec())?),
3625    };
3626    backend.upload_host_tensor(&host).map_err(Error::from)
3627}
3628
3629pub(crate) fn one_like_tensor<B: TensorBackend>(input: &Tensor, backend: &mut B) -> Result<Tensor> {
3630    let host = ones_tensor(input.dtype(), input.shape().to_vec())?;
3631    backend.upload_host_tensor(&host).map_err(Error::from)
3632}
3633
3634#[cfg(test)]
3635mod tests;
tenferro_ad/eager.rs

tenferro_ad/
eager.rs