tenferro_runtime/graph/
executor.rs

1use std::collections::{HashMap, HashSet};
2use std::fmt;
3use std::sync::Arc;
4
5use tenferro_ops::input_key::TensorInputKey;
6use tenferro_tensor::{
7    DType, RuntimeCacheControl, Tensor, TensorBackend, TensorRead, TensorValue, TypedTensor,
8};
9
10use super::cache::GraphExecutorCacheStats;
11use super::program::{GraphProgram, GraphProgramInput};
12use crate::error::{Error, Result};
13use crate::exec::{ExecProgram, ExecSlot};
14use crate::extension_runtime::{ExtensionExecutor, ExtensionRuntimeRegistryError};
15use crate::traced::TracedTensor;
16
17/// Executes compiled graph programs on a concrete tensor backend.
18///
19/// A graph executor owns backend execution state only: backend runtime caches,
20/// extension runtime state, and reusable execution workspace. Compilation
21/// state lives in [`GraphCompiler`](super::GraphCompiler).
22///
23/// # Examples
24///
25/// ```
26/// use tenferro_cpu::CpuBackend;
27/// use tenferro_runtime::{GraphCompiler, GraphExecutor, TracedTensor};
28///
29/// let x = TracedTensor::from_vec_col_major(vec![2], vec![1.0_f64, 2.0]).unwrap();
30/// let y = (&x + &x).unwrap();
31/// let mut compiler = GraphCompiler::new();
32/// let program = compiler.compile(&y).unwrap();
33///
34/// let mut executor = GraphExecutor::new(CpuBackend::new());
35/// let out = executor.run(&program).unwrap();
36/// assert_eq!(out.as_slice::<f64>().unwrap(), &[2.0, 4.0]);
37/// ```
38pub struct GraphExecutor<B: TensorBackend + 'static> {
39    backend: B,
40    backend_cache: B::RuntimeCache,
41    extension_executor: ExtensionExecutor<B>,
42    slot_workspace: Vec<Option<ExecSlot<'static>>>,
43    borrowed_slot_workspace_capacity: usize,
44}
45
46impl<B: TensorBackend + 'static> fmt::Debug for GraphExecutor<B> {
47    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
48        f.debug_struct("GraphExecutor")
49            .field("backend_type", &std::any::type_name::<B>())
50            .field("cache_stats", &self.cache_stats())
51            .field("slot_workspace_len", &self.slot_workspace.len())
52            .field(
53                "borrowed_slot_workspace_capacity",
54                &self.borrowed_slot_workspace_capacity,
55            )
56            .finish_non_exhaustive()
57    }
58}
59
60impl<B: TensorBackend + 'static> GraphExecutor<B> {
61    /// Create an executor with the given backend and bounded default caches.
62    ///
63    /// # Examples
64    ///
65    /// ```
66    /// use tenferro_cpu::CpuBackend;
67    /// use tenferro_runtime::{GraphExecutor};
68    ///
69    /// let executor = GraphExecutor::new(CpuBackend::new());
70    /// assert_eq!(executor.cache_stats().extensions.entries, 0);
71    /// ```
72    pub fn new(backend: B) -> Self {
73        Self {
74            backend,
75            backend_cache: B::RuntimeCache::default(),
76            extension_executor: ExtensionExecutor::new(),
77            slot_workspace: Vec::new(),
78            borrowed_slot_workspace_capacity: 0,
79        }
80    }
81
82    /// Borrow the backend used by this executor.
83    ///
84    /// # Examples
85    ///
86    /// ```
87    /// use tenferro_cpu::CpuBackend;
88    /// use tenferro_runtime::{GraphExecutor};
89    ///
90    /// let executor = GraphExecutor::new(CpuBackend::new());
91    /// let _backend = executor.backend();
92    /// ```
93    pub fn backend(&self) -> &B {
94        &self.backend
95    }
96
97    /// Return output tensors to the executor backend's reusable buffer pool.
98    ///
99    /// This is useful for tight benchmark or serving loops that consume an
100    /// output before the next run and want backend-level output allocation
101    /// behavior to match caching allocators.
102    ///
103    /// # Examples
104    ///
105    /// ```
106    /// use tenferro_cpu::CpuBackend;
107    /// use tenferro_runtime::{GraphCompiler, GraphExecutor, TracedTensor};
108    ///
109    /// let x = TracedTensor::from_vec_col_major(vec![1], vec![2.0_f64]).unwrap();
110    /// let mut compiler = GraphCompiler::new();
111    /// let y = x.neg().unwrap();
112    /// let program = compiler.compile(&y).unwrap();
113    /// let mut executor = GraphExecutor::new(CpuBackend::new());
114    ///
115    /// let out = executor.run(&program).unwrap();
116    /// assert_eq!(out.as_slice::<f64>().unwrap(), &[-2.0]);
117    /// executor.reclaim_outputs(vec![out]);
118    /// ```
119    pub fn reclaim_outputs(&mut self, outputs: Vec<Tensor>) {
120        for tensor in outputs {
121            self.backend.reclaim_buffer(tensor);
122        }
123    }
124
125    /// Return compact value outputs to the backend pool when ownership is unique.
126    ///
127    /// Lazy owned views are intentionally ignored because their base storage may
128    /// be aliased by view metadata.
129    ///
130    /// # Examples
131    ///
132    /// ```
133    /// use tenferro_cpu::CpuBackend;
134    /// use tenferro_runtime::{GraphExecutor, Tensor, TensorValue};
135    ///
136    /// let tensor = Tensor::from_vec_col_major(vec![1], vec![3.0_f64]).unwrap();
137    /// let mut executor = GraphExecutor::new(CpuBackend::new());
138    /// executor.reclaim_value_outputs(vec![TensorValue::from_tensor(tensor)]);
139    /// ```
140    pub fn reclaim_value_outputs(&mut self, outputs: Vec<TensorValue>) {
141        for value in outputs {
142            if let TensorValue::Tensor(tensor) = value {
143                if let Ok(tensor) = Arc::try_unwrap(tensor) {
144                    self.backend.reclaim_buffer(tensor);
145                }
146            }
147        }
148    }
149
150    /// Borrow the extension runtime executor owned by this graph executor.
151    ///
152    /// # Examples
153    ///
154    /// ```
155    /// use tenferro_cpu::CpuBackend;
156    /// use tenferro_runtime::{GraphExecutor};
157    ///
158    /// let executor = GraphExecutor::new(CpuBackend::new());
159    /// assert_eq!(executor.extension_executor().cache_stats().entries, 0);
160    /// ```
161    pub fn extension_executor(&self) -> &ExtensionExecutor<B> {
162        &self.extension_executor
163    }
164
165    /// Mutably borrow the extension runtime executor owned by this graph executor.
166    ///
167    /// # Examples
168    ///
169    /// ```
170    /// use tenferro_cpu::CpuBackend;
171    /// use tenferro_runtime::{GraphExecutor};
172    ///
173    /// let mut executor = GraphExecutor::new(CpuBackend::new());
174    /// executor.extension_executor_mut().clear_caches();
175    /// ```
176    pub fn extension_executor_mut(&mut self) -> &mut ExtensionExecutor<B> {
177        &mut self.extension_executor
178    }
179
180    /// Register one extension runtime on this executor.
181    ///
182    /// # Examples
183    ///
184    /// ```
185    /// use tenferro_cpu::CpuBackend;
186    /// use tenferro_runtime::GraphExecutor;
187    ///
188    /// let mut executor = GraphExecutor::new(CpuBackend::new());
189    /// executor.register_extension(|_| Ok(())).unwrap();
190    /// ```
191    pub fn register_extension(
192        &mut self,
193        register: impl FnOnce(
194            &mut ExtensionExecutor<B>,
195        ) -> std::result::Result<(), ExtensionRuntimeRegistryError>,
196    ) -> std::result::Result<(), ExtensionRuntimeRegistryError> {
197        register(&mut self.extension_executor)
198    }
199
200    /// Run a one-output program using the program's default input tensors.
201    ///
202    /// # Examples
203    ///
204    /// ```
205    /// use tenferro_cpu::CpuBackend;
206    /// use tenferro_runtime::{GraphCompiler, GraphExecutor, TracedTensor};
207    ///
208    /// let x = TracedTensor::from_vec_col_major(vec![1], vec![3.0_f64]).unwrap();
209    /// let mut compiler = GraphCompiler::new();
210    /// let y = x.neg().unwrap();
211    /// let program = compiler.compile(&y).unwrap();
212    /// let mut executor = GraphExecutor::new(CpuBackend::new());
213    /// let out = executor.run(&program).unwrap();
214    /// assert_eq!(out.as_slice::<f64>().unwrap(), &[-3.0]);
215    /// ```
216    pub fn run(&mut self, program: &GraphProgram) -> Result<Tensor> {
217        let mut outputs = self.run_many(program)?;
218        expect_single_output(&mut outputs)
219    }
220
221    /// Run a one-output program and preserve lazy owned output views.
222    ///
223    /// # Examples
224    ///
225    /// ```
226    /// use tenferro_cpu::CpuBackend;
227    /// use tenferro_runtime::{GraphCompiler, GraphExecutor, TensorValue, TracedTensor};
228    ///
229    /// let x = TracedTensor::from_vec_col_major(
230    ///     vec![2, 3],
231    ///     vec![1.0_f64, 2.0, 3.0, 4.0, 5.0, 6.0],
232    /// )
233    /// .unwrap();
234    /// let y = x.transpose(&[1, 0]).unwrap();
235    /// let mut compiler = GraphCompiler::new();
236    /// let program = compiler.compile(&y).unwrap();
237    ///
238    /// let mut executor = GraphExecutor::new(CpuBackend::new());
239    /// let value = executor.run_value(&program).unwrap();
240    /// assert!(matches!(&value, TensorValue::View(_)));
241    /// assert_eq!(value.shape(), &[3, 2]);
242    /// assert_eq!(
243    ///     value.to_tensor().unwrap().as_slice::<f64>().unwrap(),
244    ///     &[1.0, 3.0, 5.0, 2.0, 4.0, 6.0]
245    /// );
246    /// ```
247    pub fn run_value(&mut self, program: &GraphProgram) -> Result<TensorValue> {
248        let mut outputs = self.run_many_values(program)?;
249        expect_single_value(&mut outputs)
250    }
251
252    /// Run a program using the program's default input tensors.
253    ///
254    /// # Examples
255    ///
256    /// ```
257    /// use tenferro_cpu::CpuBackend;
258    /// use tenferro_runtime::{GraphCompiler, GraphExecutor, TracedTensor};
259    ///
260    /// let x = TracedTensor::from_vec_col_major(vec![1], vec![3.0_f64]).unwrap();
261    /// let y = x.neg().unwrap();
262    /// let mut compiler = GraphCompiler::new();
263    /// let program = compiler.compile_many(&[&x, &y]).unwrap();
264    /// let mut executor = GraphExecutor::new(CpuBackend::new());
265    /// let outputs = executor.run_many(&program).unwrap();
266    /// assert_eq!(outputs.len(), 2);
267    /// ```
268    pub fn run_many(&mut self, program: &GraphProgram) -> Result<Vec<Tensor>> {
269        self.run_many_with_inputs(program, &[])
270    }
271
272    /// Run a program and preserve lazy owned output views.
273    ///
274    /// # Examples
275    ///
276    /// ```
277    /// use tenferro_cpu::CpuBackend;
278    /// use tenferro_runtime::{GraphCompiler, GraphExecutor, TensorValue, TracedTensor};
279    ///
280    /// let x = TracedTensor::from_vec_col_major(vec![2, 2], vec![1.0_f64, 2.0, 3.0, 4.0]).unwrap();
281    /// let y = x.transpose(&[1, 0]).unwrap();
282    /// let mut compiler = GraphCompiler::new();
283    /// let program = compiler.compile_many(&[&y]).unwrap();
284    ///
285    /// let mut executor = GraphExecutor::new(CpuBackend::new());
286    /// let outputs = executor.run_many_values(&program).unwrap();
287    /// assert_eq!(outputs.len(), 1);
288    /// assert!(matches!(&outputs[0], TensorValue::View(_)));
289    /// assert_eq!(outputs[0].shape(), &[2, 2]);
290    /// ```
291    pub fn run_many_values(&mut self, program: &GraphProgram) -> Result<Vec<TensorValue>> {
292        self.run_many_values_with_inputs(program, &[])
293    }
294
295    /// Run a one-output program with explicit runtime placeholder bindings.
296    ///
297    /// Explicit bindings override program defaults and are validated against
298    /// the ordered input specs captured in the compiled program.
299    ///
300    /// # Examples
301    ///
302    /// ```
303    /// use tenferro_cpu::CpuBackend;
304    /// use tenferro_runtime::{DType, GraphCompiler, GraphExecutor, Tensor, TracedTensor};
305    ///
306    /// let x = TracedTensor::input_symbolic_shape(DType::F64, 1).unwrap();
307    /// let y = (&x + &x).unwrap();
308    /// let mut compiler = GraphCompiler::new();
309    /// let program = compiler
310    ///     .compile_with_input_specs(&y, &[(&x, DType::F64, &[2])])
311    ///     .unwrap();
312    /// let bound = Tensor::from_vec_col_major(vec![2], vec![1.0_f64, 2.0]).unwrap();
313    /// let mut executor = GraphExecutor::new(CpuBackend::new());
314    /// let out = executor.run_with_inputs(&program, &[(&x, &bound)]).unwrap();
315    /// assert_eq!(out.as_slice::<f64>().unwrap(), &[2.0, 4.0]);
316    /// ```
317    pub fn run_with_inputs(
318        &mut self,
319        program: &GraphProgram,
320        bindings: &[(&TracedTensor, &Tensor)],
321    ) -> Result<Tensor> {
322        let mut outputs = self.run_many_with_inputs(program, bindings)?;
323        expect_single_output(&mut outputs)
324    }
325
326    /// Run a one-output program with explicit bindings and preserve lazy output views.
327    ///
328    /// # Examples
329    ///
330    /// ```
331    /// use tenferro_cpu::CpuBackend;
332    /// use tenferro_runtime::{DType, GraphCompiler, GraphExecutor, Tensor, TensorValue, TracedTensor};
333    ///
334    /// let x = TracedTensor::input_symbolic_shape(DType::F64, 2).unwrap();
335    /// let y = x.transpose(&[1, 0]).unwrap();
336    /// let mut compiler = GraphCompiler::new();
337    /// let program = compiler
338    ///     .compile_with_input_specs(&y, &[(&x, DType::F64, &[2, 2])])
339    ///     .unwrap();
340    /// let bound = Tensor::from_vec_col_major(vec![2, 2], vec![1.0_f64, 2.0, 3.0, 4.0]).unwrap();
341    /// let mut executor = GraphExecutor::new(CpuBackend::new());
342    ///
343    /// let value = executor.run_value_with_inputs(&program, &[(&x, &bound)]).unwrap();
344    /// assert!(matches!(&value, TensorValue::View(_)));
345    /// assert_eq!(value.to_tensor().unwrap().as_slice::<f64>().unwrap(), &[1.0, 3.0, 2.0, 4.0]);
346    /// ```
347    pub fn run_value_with_inputs(
348        &mut self,
349        program: &GraphProgram,
350        bindings: &[(&TracedTensor, &Tensor)],
351    ) -> Result<TensorValue> {
352        let mut outputs = self.run_many_values_with_inputs(program, bindings)?;
353        expect_single_value(&mut outputs)
354    }
355
356    /// Run a one-output program with explicit borrowed runtime placeholder bindings.
357    ///
358    /// Unlike [`run_with_inputs`](Self::run_with_inputs), caller-owned input
359    /// tensors are read through [`TensorRead`] and are not cloned into executor
360    /// slots.
361    ///
362    /// # Examples
363    ///
364    /// ```
365    /// use tenferro_cpu::CpuBackend;
366    /// use tenferro_runtime::{
367    ///     DType, GraphCompiler, GraphExecutor, TensorRead, TensorView, TracedTensor, TypedTensorView,
368    /// };
369    ///
370    /// let x = TracedTensor::input_symbolic_shape(DType::F64, 1).unwrap();
371    /// let y = (&x + &x).unwrap();
372    /// let mut compiler = GraphCompiler::new();
373    /// let program = compiler
374    ///     .compile_with_input_specs(&y, &[(&x, DType::F64, &[2])])
375    ///     .unwrap();
376    /// let data = [1.0_f64, 99.0, 2.0];
377    /// let view = TypedTensorView::from_slice([2], [2], 0, &data).unwrap();
378    /// let read = TensorRead::from_view(TensorView::F64(view));
379    /// let mut executor = GraphExecutor::new(CpuBackend::new());
380    ///
381    /// let out = executor.run_with_input_reads(&program, &[(&x, read)]).unwrap();
382    /// assert_eq!(out.as_slice::<f64>().unwrap(), &[2.0, 4.0]);
383    /// ```
384    pub fn run_with_input_reads<'a>(
385        &mut self,
386        program: &'a GraphProgram,
387        bindings: &[(&TracedTensor, TensorRead<'a>)],
388    ) -> Result<Tensor> {
389        let mut outputs = self.run_many_with_input_reads(program, bindings)?;
390        expect_single_output(&mut outputs)
391    }
392
393    /// Run a one-output program with borrowed bindings and preserve lazy output views.
394    ///
395    /// # Examples
396    ///
397    /// ```
398    /// use tenferro_cpu::CpuBackend;
399    /// use tenferro_runtime::{
400    ///     DType, GraphCompiler, GraphExecutor, TensorRead, TensorValue, TensorView, TracedTensor,
401    ///     TypedTensorView,
402    /// };
403    ///
404    /// let x = TracedTensor::input_symbolic_shape(DType::F64, 2).unwrap();
405    /// let y = x.transpose(&[1, 0]).unwrap();
406    /// let mut compiler = GraphCompiler::new();
407    /// let program = compiler
408    ///     .compile_with_input_specs(&y, &[(&x, DType::F64, &[2, 2])])
409    ///     .unwrap();
410    /// let data = [1.0_f64, 2.0, 3.0, 4.0];
411    /// let view = TypedTensorView::from_slice([2, 2], [1, 2], 0, &data).unwrap();
412    /// let read = TensorRead::from_view(TensorView::F64(view));
413    /// let mut executor = GraphExecutor::new(CpuBackend::new());
414    ///
415    /// let value = executor
416    ///     .run_value_with_input_reads(&program, &[(&x, read)])
417    ///     .unwrap();
418    /// assert!(matches!(&value, TensorValue::View(_)));
419    /// assert_eq!(value.shape(), &[2, 2]);
420    /// ```
421    pub fn run_value_with_input_reads<'a>(
422        &mut self,
423        program: &'a GraphProgram,
424        bindings: &[(&TracedTensor, TensorRead<'a>)],
425    ) -> Result<TensorValue> {
426        let mut outputs = self.run_many_values_with_input_reads(program, bindings)?;
427        expect_single_value(&mut outputs)
428    }
429
430    /// Run a program with explicit runtime placeholder bindings.
431    ///
432    /// # Examples
433    ///
434    /// ```
435    /// use tenferro_cpu::CpuBackend;
436    /// use tenferro_runtime::{DType, GraphCompiler, GraphExecutor, Tensor, TracedTensor};
437    ///
438    /// let x = TracedTensor::input_symbolic_shape(DType::F64, 1).unwrap();
439    /// let sum = (&x + &x).unwrap();
440    /// let mut compiler = GraphCompiler::new();
441    /// let program = compiler
442    ///     .compile_with_input_specs(&sum, &[(&x, DType::F64, &[2])])
443    ///     .unwrap();
444    /// let bound = Tensor::from_vec_col_major(vec![2], vec![1.0_f64, 2.0]).unwrap();
445    /// let mut executor = GraphExecutor::new(CpuBackend::new());
446    /// let outputs = executor.run_many_with_inputs(&program, &[(&x, &bound)]).unwrap();
447    /// assert_eq!(outputs.len(), 1);
448    /// assert_eq!(outputs[0].as_slice::<f64>().unwrap(), &[2.0, 4.0]);
449    /// ```
450    pub fn run_many_with_inputs(
451        &mut self,
452        program: &GraphProgram,
453        bindings: &[(&TracedTensor, &Tensor)],
454    ) -> Result<Vec<Tensor>> {
455        let input_tensors = resolve_inputs(program, bindings, &mut self.backend)?;
456        self.eval_exec_ir(&program.exec, input_tensors)
457    }
458
459    /// Run a program with explicit bindings and preserve lazy output views.
460    ///
461    /// # Examples
462    ///
463    /// ```
464    /// use tenferro_cpu::CpuBackend;
465    /// use tenferro_runtime::{DType, GraphCompiler, GraphExecutor, Tensor, TensorValue, TracedTensor};
466    ///
467    /// let x = TracedTensor::input_symbolic_shape(DType::F64, 2).unwrap();
468    /// let y = x.transpose(&[1, 0]).unwrap();
469    /// let mut compiler = GraphCompiler::new();
470    /// let program = compiler
471    ///     .compile_with_input_specs(&y, &[(&x, DType::F64, &[2, 2])])
472    ///     .unwrap();
473    /// let bound = Tensor::from_vec_col_major(vec![2, 2], vec![1.0_f64, 2.0, 3.0, 4.0]).unwrap();
474    /// let mut executor = GraphExecutor::new(CpuBackend::new());
475    ///
476    /// let outputs = executor
477    ///     .run_many_values_with_inputs(&program, &[(&x, &bound)])
478    ///     .unwrap();
479    /// assert_eq!(outputs.len(), 1);
480    /// assert!(matches!(&outputs[0], TensorValue::View(_)));
481    /// ```
482    pub fn run_many_values_with_inputs(
483        &mut self,
484        program: &GraphProgram,
485        bindings: &[(&TracedTensor, &Tensor)],
486    ) -> Result<Vec<TensorValue>> {
487        let input_tensors = resolve_inputs(program, bindings, &mut self.backend)?;
488        self.eval_exec_ir_values(&program.exec, input_tensors)
489    }
490
491    /// Run a program with explicit borrowed runtime placeholder bindings.
492    ///
493    /// Bindings override program defaults and are validated against the input
494    /// specs captured in the compiled program. Bound tensors are borrowed by
495    /// the executor for this call instead of cloned into input slots.
496    ///
497    /// # Examples
498    ///
499    /// ```
500    /// use tenferro_cpu::CpuBackend;
501    /// use tenferro_runtime::{
502    ///     DType, GraphCompiler, GraphExecutor, TensorRead, TensorView, TracedTensor, TypedTensorView,
503    /// };
504    ///
505    /// let x = TracedTensor::input_symbolic_shape(DType::F64, 1).unwrap();
506    /// let y = (&x + &x).unwrap();
507    /// let mut compiler = GraphCompiler::new();
508    /// let program = compiler
509    ///     .compile_with_input_specs(&y, &[(&x, DType::F64, &[2])])
510    ///     .unwrap();
511    /// let data = [1.0_f64, 99.0, 2.0];
512    /// let view = TypedTensorView::from_slice([2], [2], 0, &data).unwrap();
513    /// let read = TensorRead::from_view(TensorView::F64(view));
514    /// let mut executor = GraphExecutor::new(CpuBackend::new());
515    ///
516    /// let outputs = executor.run_many_with_input_reads(&program, &[(&x, read)]).unwrap();
517    /// assert_eq!(outputs[0].as_slice::<f64>().unwrap(), &[2.0, 4.0]);
518    /// ```
519    pub fn run_many_with_input_reads<'a>(
520        &mut self,
521        program: &'a GraphProgram,
522        bindings: &[(&TracedTensor, TensorRead<'a>)],
523    ) -> Result<Vec<Tensor>> {
524        let inputs = resolve_input_reads(program, bindings, &mut self.backend)?;
525        self.eval_exec_ir_slots(&program.exec, inputs)
526    }
527
528    /// Run a program with borrowed bindings and preserve lazy output views.
529    ///
530    /// # Examples
531    ///
532    /// ```
533    /// use tenferro_cpu::CpuBackend;
534    /// use tenferro_runtime::{
535    ///     DType, GraphCompiler, GraphExecutor, TensorRead, TensorValue, TensorView, TracedTensor,
536    ///     TypedTensorView,
537    /// };
538    ///
539    /// let x = TracedTensor::input_symbolic_shape(DType::F64, 2).unwrap();
540    /// let y = x.transpose(&[1, 0]).unwrap();
541    /// let mut compiler = GraphCompiler::new();
542    /// let program = compiler
543    ///     .compile_with_input_specs(&y, &[(&x, DType::F64, &[2, 2])])
544    ///     .unwrap();
545    /// let data = [1.0_f64, 2.0, 3.0, 4.0];
546    /// let view = TypedTensorView::from_slice([2, 2], [1, 2], 0, &data).unwrap();
547    /// let read = TensorRead::from_view(TensorView::F64(view));
548    /// let mut executor = GraphExecutor::new(CpuBackend::new());
549    ///
550    /// let outputs = executor
551    ///     .run_many_values_with_input_reads(&program, &[(&x, read)])
552    ///     .unwrap();
553    /// assert_eq!(outputs.len(), 1);
554    /// assert!(matches!(&outputs[0], TensorValue::View(_)));
555    /// ```
556    pub fn run_many_values_with_input_reads<'a>(
557        &mut self,
558        program: &'a GraphProgram,
559        bindings: &[(&TracedTensor, TensorRead<'a>)],
560    ) -> Result<Vec<TensorValue>> {
561        let inputs = resolve_input_reads(program, bindings, &mut self.backend)?;
562        self.eval_exec_ir_slot_values(&program.exec, inputs)
563    }
564
565    /// Evaluate an execution program through this executor's backend state.
566    ///
567    /// This lower-level entry point is intended for code that already owns an
568    /// execution program and concrete ordered input tensors.
569    ///
570    /// # Examples
571    ///
572    /// ```
573    /// use tenferro_cpu::CpuBackend;
574    /// use tenferro_runtime::{GraphCompiler, GraphExecutor, TracedTensor};
575    ///
576    /// let x = TracedTensor::from_vec_col_major(vec![1], vec![2.0_f64]).unwrap();
577    /// let mut compiler = GraphCompiler::new();
578    /// let y = x.neg().unwrap();
579    /// let program = compiler.compile(&y).unwrap();
580    /// let mut executor = GraphExecutor::new(CpuBackend::new());
581    /// let out = executor.run(&program).unwrap();
582    /// assert_eq!(out.as_slice::<f64>().unwrap(), &[-2.0]);
583    /// ```
584    pub fn eval_exec_ir(
585        &mut self,
586        program: &ExecProgram,
587        inputs: Vec<Tensor>,
588    ) -> Result<Vec<Tensor>> {
589        validate_exec_input_count(program, inputs.len())?;
590        crate::segment::eval_exec_segmented_with_cache_and_workspace(
591            &mut self.backend,
592            program,
593            inputs,
594            &mut self.slot_workspace,
595            &mut self.backend_cache,
596            Some(&mut self.extension_executor),
597        )
598    }
599
600    /// Evaluate an execution program and preserve lazy owned output views.
601    ///
602    /// # Examples
603    ///
604    /// ```
605    /// use tenferro_cpu::CpuBackend;
606    /// use tenferro_runtime::{GraphCompiler, GraphExecutor, TensorValue, TracedTensor};
607    ///
608    /// let x = TracedTensor::from_vec_col_major(vec![2, 2], vec![1.0_f64, 2.0, 3.0, 4.0]).unwrap();
609    /// let y = x.transpose(&[1, 0]).unwrap();
610    /// let mut compiler = GraphCompiler::new();
611    /// let program = compiler.compile(&y).unwrap();
612    /// let mut executor = GraphExecutor::new(CpuBackend::new());
613    ///
614    /// let value = executor.run_value(&program).unwrap();
615    /// assert!(matches!(&value, TensorValue::View(_)));
616    /// assert_eq!(value.shape(), &[2, 2]);
617    /// ```
618    pub fn eval_exec_ir_values(
619        &mut self,
620        program: &ExecProgram,
621        inputs: Vec<Tensor>,
622    ) -> Result<Vec<TensorValue>> {
623        validate_exec_input_count(program, inputs.len())?;
624        let inputs = inputs.into_iter().map(ExecSlot::Owned).collect();
625        crate::segment::eval_exec_segmented_slot_values_with_cache_and_workspace(
626            &mut self.backend,
627            program,
628            inputs,
629            &mut self.slot_workspace,
630            &mut self.backend_cache,
631            Some(&mut self.extension_executor),
632        )
633    }
634
635    /// Evaluate an execution program without consuming caller-owned inputs.
636    ///
637    /// # Examples
638    ///
639    /// ```
640    /// use tenferro_cpu::CpuBackend;
641    /// use tenferro_runtime::{GraphCompiler, GraphExecutor, TracedTensor};
642    ///
643    /// let x = TracedTensor::from_vec_col_major(vec![1], vec![2.0_f64]).unwrap();
644    /// let mut compiler = GraphCompiler::new();
645    /// let y = x.neg().unwrap();
646    /// let program = compiler.compile(&y).unwrap();
647    /// let mut executor = GraphExecutor::new(CpuBackend::new());
648    /// let out = executor.run(&program).unwrap();
649    /// assert_eq!(out.shape(), &[1]);
650    /// ```
651    pub fn eval_exec_ir_non_consuming(
652        &mut self,
653        program: &ExecProgram,
654        inputs: &[Tensor],
655    ) -> Result<Vec<Tensor>> {
656        let inputs = inputs
657            .iter()
658            .map(|tensor| ExecSlot::Read(TensorRead::from_tensor(tensor)))
659            .collect();
660        self.eval_exec_ir_slots(program, inputs)
661    }
662
663    /// Evaluate an execution program without consuming inputs and preserve lazy outputs.
664    ///
665    /// # Examples
666    ///
667    /// ```
668    /// use tenferro_cpu::CpuBackend;
669    /// use tenferro_runtime::{GraphCompiler, GraphExecutor, TensorValue, TracedTensor};
670    ///
671    /// let x = TracedTensor::from_vec_col_major(vec![2, 2], vec![1.0_f64, 2.0, 3.0, 4.0]).unwrap();
672    /// let y = x.transpose(&[1, 0]).unwrap();
673    /// let mut compiler = GraphCompiler::new();
674    /// let program = compiler.compile(&y).unwrap();
675    /// let mut executor = GraphExecutor::new(CpuBackend::new());
676    ///
677    /// let value = executor.run_value(&program).unwrap();
678    /// assert!(matches!(&value, TensorValue::View(_)));
679    /// assert_eq!(value.to_tensor().unwrap().shape(), &[2, 2]);
680    /// ```
681    pub fn eval_exec_ir_non_consuming_values(
682        &mut self,
683        program: &ExecProgram,
684        inputs: &[Tensor],
685    ) -> Result<Vec<TensorValue>> {
686        let inputs = inputs
687            .iter()
688            .map(|tensor| ExecSlot::Read(TensorRead::from_tensor(tensor)))
689            .collect();
690        self.eval_exec_ir_slot_values(program, inputs)
691    }
692
693    fn eval_exec_ir_slots<'a>(
694        &mut self,
695        program: &ExecProgram,
696        inputs: Vec<ExecSlot<'a>>,
697    ) -> Result<Vec<Tensor>> {
698        validate_exec_input_count(program, inputs.len())?;
699        let mut slot_workspace = Vec::with_capacity(self.borrowed_slot_workspace_capacity);
700        let result = crate::segment::eval_exec_segmented_slots_with_cache_and_workspace(
701            &mut self.backend,
702            program,
703            inputs,
704            &mut slot_workspace,
705            &mut self.backend_cache,
706            Some(&mut self.extension_executor),
707        );
708        self.borrowed_slot_workspace_capacity = slot_workspace.capacity();
709        result
710    }
711
712    fn eval_exec_ir_slot_values<'a>(
713        &mut self,
714        program: &ExecProgram,
715        inputs: Vec<ExecSlot<'a>>,
716    ) -> Result<Vec<TensorValue>> {
717        validate_exec_input_count(program, inputs.len())?;
718        let mut slot_workspace = Vec::with_capacity(self.borrowed_slot_workspace_capacity);
719        let result = crate::segment::eval_exec_segmented_slot_values_with_cache_and_workspace(
720            &mut self.backend,
721            program,
722            inputs,
723            &mut slot_workspace,
724            &mut self.backend_cache,
725            Some(&mut self.extension_executor),
726        );
727        self.borrowed_slot_workspace_capacity = slot_workspace.capacity();
728        result
729    }
730
731    /// Clear backend-specific runtime analysis cache entries.
732    ///
733    /// # Examples
734    ///
735    /// ```
736    /// use tenferro_cpu::CpuBackend;
737    /// use tenferro_runtime::{GraphExecutor};
738    ///
739    /// let mut executor = GraphExecutor::new(CpuBackend::new());
740    /// executor.clear_backend_cache();
741    /// assert_eq!(executor.cache_stats().backend.entries, 0);
742    /// ```
743    pub fn clear_backend_cache(&mut self) {
744        self.backend_cache.clear();
745    }
746
747    /// Clear generic extension runtime cache entries.
748    ///
749    /// # Examples
750    ///
751    /// ```
752    /// use tenferro_cpu::CpuBackend;
753    /// use tenferro_runtime::{GraphExecutor};
754    ///
755    /// let mut executor = GraphExecutor::new(CpuBackend::new());
756    /// executor.clear_extension_caches();
757    /// assert_eq!(executor.cache_stats().extensions.entries, 0);
758    /// ```
759    pub fn clear_extension_caches(&mut self) {
760        self.extension_executor.clear_caches();
761    }
762
763    /// Clear every executor-owned runtime cache.
764    ///
765    /// # Examples
766    ///
767    /// ```
768    /// use tenferro_cpu::CpuBackend;
769    /// use tenferro_runtime::{GraphExecutor};
770    ///
771    /// let mut executor = GraphExecutor::new(CpuBackend::new());
772    /// executor.clear_caches();
773    /// assert_eq!(executor.cache_stats().backend.entries, 0);
774    /// ```
775    pub fn clear_caches(&mut self) {
776        self.clear_extension_caches();
777        self.clear_backend_cache();
778    }
779
780    /// Return executor runtime cache-entry and retained-byte stats.
781    ///
782    /// # Examples
783    ///
784    /// ```
785    /// use tenferro_cpu::CpuBackend;
786    /// use tenferro_runtime::{GraphExecutor};
787    ///
788    /// let executor = GraphExecutor::new(CpuBackend::new());
789    /// let stats = executor.cache_stats();
790    /// assert_eq!(stats.extensions.entries, 0);
791    /// ```
792    pub fn cache_stats(&self) -> GraphExecutorCacheStats {
793        GraphExecutorCacheStats {
794            extensions: self.extension_executor.cache_stats(),
795            backend: self.backend_cache.stats(),
796        }
797    }
798}
799
800impl<B: TensorBackend + 'static> Default for GraphExecutor<B>
801where
802    B: Default,
803{
804    fn default() -> Self {
805        Self::new(B::default())
806    }
807}
808
809fn validate_exec_input_count(program: &ExecProgram, actual: usize) -> Result<()> {
810    let expected = program.input_slots.len();
811    if actual != expected {
812        return Err(Error::Internal(format!(
813            "expected {expected} inputs for execution program, got {actual}"
814        )));
815    }
816    Ok(())
817}
818
819fn expect_single_output(outputs: &mut Vec<Tensor>) -> Result<Tensor> {
820    if outputs.len() != 1 {
821        return Err(Error::Internal(format!(
822            "expected 1 output, got {}",
823            outputs.len()
824        )));
825    }
826    outputs
827        .pop()
828        .ok_or_else(|| Error::Internal("missing graph output".to_string()))
829}
830
831fn expect_single_value(outputs: &mut Vec<TensorValue>) -> Result<TensorValue> {
832    if outputs.len() != 1 {
833        return Err(Error::Internal(format!(
834            "expected 1 output, got {}",
835            outputs.len()
836        )));
837    }
838    outputs
839        .pop()
840        .ok_or_else(|| Error::Internal("missing graph output".to_string()))
841}
842
843fn resolve_inputs(
844    program: &GraphProgram,
845    bindings: &[(&TracedTensor, &Tensor)],
846    backend: &mut impl TensorBackend,
847) -> Result<Vec<Tensor>> {
848    let program_keys: HashSet<_> = program
849        .inputs
850        .iter()
851        .map(|input| input.key.clone())
852        .collect();
853    let tangent_root_specs = tangent_root_specs(&program.inputs);
854    let default_map: HashMap<_, _> = program
855        .inputs
856        .iter()
857        .filter_map(|input| {
858            input
859                .default_tensor
860                .as_ref()
861                .map(|tensor| (input.key.clone(), tensor.as_ref()))
862        })
863        .collect();
864    let mut binding_map = HashMap::new();
865    for (index, (placeholder, tensor)) in bindings.iter().enumerate() {
866        if placeholder.data.is_some() {
867            return Err(Error::UnexpectedBinding {
868                binding_index: index,
869            });
870        }
871        let key = placeholder.input_key().ok_or(Error::UnexpectedBinding {
872            binding_index: index,
873        })?;
874        validate_binding_placeholder(index, placeholder, tensor)?;
875        let is_program_input = program_keys.contains(&key);
876        if !is_program_input && !tangent_root_specs.contains_key(&key) {
877            return Err(Error::UnexpectedBinding {
878                binding_index: index,
879            });
880        }
881        if binding_map.insert(key.clone(), *tensor).is_some() {
882            return Err(Error::DuplicateBinding {
883                input_key: format!("{:?}", key),
884            });
885        }
886    }
887
888    program
889        .inputs
890        .iter()
891        .map(|input| resolve_input(input, &binding_map, &default_map, backend))
892        .collect()
893}
894
895fn resolve_input_reads<'a>(
896    program: &'a GraphProgram,
897    bindings: &[(&TracedTensor, TensorRead<'a>)],
898    backend: &mut impl TensorBackend,
899) -> Result<Vec<ExecSlot<'a>>> {
900    let program_keys: HashSet<_> = program
901        .inputs
902        .iter()
903        .map(|input| input.key.clone())
904        .collect();
905    let tangent_root_specs = tangent_root_specs(&program.inputs);
906    let default_map: HashMap<_, _> = program
907        .inputs
908        .iter()
909        .filter_map(|input| {
910            input
911                .default_tensor
912                .as_ref()
913                .map(|tensor| (input.key.clone(), tensor.as_ref()))
914        })
915        .collect();
916    let mut binding_map = HashMap::new();
917    for (index, (placeholder, read)) in bindings.iter().enumerate() {
918        if placeholder.data.is_some() {
919            return Err(Error::UnexpectedBinding {
920                binding_index: index,
921            });
922        }
923        let key = placeholder.input_key().ok_or(Error::UnexpectedBinding {
924            binding_index: index,
925        })?;
926        validate_binding_placeholder_read(index, placeholder, read)?;
927        let is_program_input = program_keys.contains(&key);
928        if !is_program_input && !tangent_root_specs.contains_key(&key) {
929            return Err(Error::UnexpectedBinding {
930                binding_index: index,
931            });
932        }
933        if binding_map.insert(key.clone(), read.clone()).is_some() {
934            return Err(Error::DuplicateBinding {
935                input_key: format!("{:?}", key),
936            });
937        }
938    }
939
940    program
941        .inputs
942        .iter()
943        .map(|input| resolve_input_read(input, &binding_map, &default_map, backend))
944        .collect()
945}
946
947fn tangent_root_specs(inputs: &[GraphProgramInput]) -> HashMap<TensorInputKey, &GraphProgramInput> {
948    let mut specs = HashMap::new();
949    for input in inputs {
950        if !matches!(input.key, TensorInputKey::User { .. }) {
951            specs
952                .entry(tangent_primal_root(&input.key).clone())
953                .or_insert(input);
954        }
955    }
956    specs
957}
958
959fn resolve_input(
960    input: &GraphProgramInput,
961    bindings: &HashMap<TensorInputKey, &Tensor>,
962    defaults: &HashMap<TensorInputKey, &Tensor>,
963    backend: &mut impl TensorBackend,
964) -> Result<Tensor> {
965    let tensor = if let Some(bound) = bindings.get(&input.key) {
966        (*bound).clone()
967    } else if let Some(default) = &input.default_tensor {
968        resolve_default_tensor(default.as_ref(), backend)?
969    } else if let Some(zero) = deferred_zero_for_tangent_key(&input.key, bindings, defaults)? {
970        zero
971    } else {
972        return Err(Error::UnboundPlaceholder {
973            input_key: format!("{:?}", input.key),
974        });
975    };
976    validate_input_tensor(input, &tensor)?;
977    Ok(tensor)
978}
979
980fn resolve_input_read<'a>(
981    input: &GraphProgramInput,
982    bindings: &HashMap<TensorInputKey, TensorRead<'a>>,
983    defaults: &HashMap<TensorInputKey, &'a Tensor>,
984    backend: &mut impl TensorBackend,
985) -> Result<ExecSlot<'a>> {
986    let slot = if let Some(bound) = bindings.get(&input.key) {
987        ExecSlot::Read(bound.clone())
988    } else if let Some(default) = defaults.get(&input.key) {
989        if should_upload_default_tensor(default) {
990            ExecSlot::Owned(backend.upload_host_tensor(default)?)
991        } else {
992            ExecSlot::Read(TensorRead::from_tensor(default))
993        }
994    } else if let Some(zero) = deferred_zero_for_tangent_key_read(&input.key, bindings, defaults)? {
995        ExecSlot::Owned(zero)
996    } else {
997        return Err(Error::UnboundPlaceholder {
998            input_key: format!("{:?}", input.key),
999        });
1000    };
1001    validate_input_slot(input, &slot)?;
1002    Ok(slot)
1003}
1004
1005fn resolve_default_tensor(default: &Tensor, backend: &mut impl TensorBackend) -> Result<Tensor> {
1006    if should_upload_default_tensor(default) {
1007        Ok(backend.upload_host_tensor(default)?)
1008    } else {
1009        Ok(default.clone())
1010    }
1011}
1012
1013fn should_upload_default_tensor(default: &Tensor) -> bool {
1014    default.shape().is_empty() && tensor_has_host_buffer(default)
1015}
1016
1017fn tensor_has_host_buffer(tensor: &Tensor) -> bool {
1018    !tensor.is_backend_buffer()
1019}
1020
1021fn validate_binding_placeholder(
1022    index: usize,
1023    placeholder: &TracedTensor,
1024    tensor: &Tensor,
1025) -> Result<()> {
1026    if placeholder.data.is_some() {
1027        return Err(Error::UnexpectedBinding {
1028            binding_index: index,
1029        });
1030    }
1031    if placeholder.dtype != tensor.dtype() {
1032        return Err(Error::PlaceholderDtypeMismatch {
1033            expected: placeholder.dtype,
1034            actual: tensor.dtype(),
1035        });
1036    }
1037    match placeholder.try_concrete_shape() {
1038        Some(expected_shape) => {
1039            if expected_shape.as_slice() != tensor.shape() {
1040                return Err(Error::PlaceholderShapeMismatch {
1041                    expected: expected_shape,
1042                    actual: tensor.shape().to_vec(),
1043                });
1044            }
1045        }
1046        None => {
1047            if placeholder.rank != tensor.shape().len() {
1048                return Err(Error::PlaceholderRankMismatch {
1049                    expected: placeholder.rank,
1050                    actual: tensor.shape().len(),
1051                });
1052            }
1053        }
1054    }
1055    Ok(())
1056}
1057
1058fn validate_binding_placeholder_read(
1059    index: usize,
1060    placeholder: &TracedTensor,
1061    read: &TensorRead<'_>,
1062) -> Result<()> {
1063    if placeholder.data.is_some() {
1064        return Err(Error::UnexpectedBinding {
1065            binding_index: index,
1066        });
1067    }
1068    if placeholder.dtype != read.dtype() {
1069        return Err(Error::PlaceholderDtypeMismatch {
1070            expected: placeholder.dtype,
1071            actual: read.dtype(),
1072        });
1073    }
1074    match placeholder.try_concrete_shape() {
1075        Some(expected_shape) => {
1076            if expected_shape.as_slice() != read.shape() {
1077                return Err(Error::PlaceholderShapeMismatch {
1078                    expected: expected_shape,
1079                    actual: read.shape().to_vec(),
1080                });
1081            }
1082        }
1083        None => {
1084            if placeholder.rank != read.shape().len() {
1085                return Err(Error::PlaceholderRankMismatch {
1086                    expected: placeholder.rank,
1087                    actual: read.shape().len(),
1088                });
1089            }
1090        }
1091    }
1092    Ok(())
1093}
1094
1095fn validate_input_tensor(input: &GraphProgramInput, tensor: &Tensor) -> Result<()> {
1096    if input.dtype != tensor.dtype() {
1097        return Err(Error::PlaceholderDtypeMismatch {
1098            expected: input.dtype,
1099            actual: tensor.dtype(),
1100        });
1101    }
1102    if input.shape.as_slice() != tensor.shape() {
1103        return Err(Error::PlaceholderShapeMismatch {
1104            expected: input.shape.clone(),
1105            actual: tensor.shape().to_vec(),
1106        });
1107    }
1108    Ok(())
1109}
1110
1111fn validate_input_slot(input: &GraphProgramInput, slot: &ExecSlot<'_>) -> Result<()> {
1112    if input.dtype != slot.dtype() {
1113        return Err(Error::PlaceholderDtypeMismatch {
1114            expected: input.dtype,
1115            actual: slot.dtype(),
1116        });
1117    }
1118    if input.shape.as_slice() != slot.shape() {
1119        return Err(Error::PlaceholderShapeMismatch {
1120            expected: input.shape.clone(),
1121            actual: slot.shape().to_vec(),
1122        });
1123    }
1124    Ok(())
1125}
1126
1127fn deferred_zero_for_tangent_key(
1128    key: &TensorInputKey,
1129    bindings: &HashMap<TensorInputKey, &Tensor>,
1130    defaults: &HashMap<TensorInputKey, &Tensor>,
1131) -> Result<Option<Tensor>> {
1132    if !key.is_tangent() {
1133        return Ok(None);
1134    }
1135    let root = tangent_primal_root(key);
1136    let Some(primal) = bindings.get(root).or_else(|| defaults.get(root)) else {
1137        return Ok(None);
1138    };
1139    zeros_tensor(primal.dtype(), primal.shape().to_vec()).map(Some)
1140}
1141
1142fn deferred_zero_for_tangent_key_read<'a>(
1143    key: &TensorInputKey,
1144    bindings: &HashMap<TensorInputKey, TensorRead<'a>>,
1145    defaults: &HashMap<TensorInputKey, &'a Tensor>,
1146) -> Result<Option<Tensor>> {
1147    if !key.is_tangent() {
1148        return Ok(None);
1149    }
1150    let root = tangent_primal_root(key);
1151    if let Some(primal) = bindings.get(root) {
1152        return zeros_tensor(primal.dtype(), primal.shape().to_vec()).map(Some);
1153    }
1154    let Some(primal) = defaults.get(root) else {
1155        return Ok(None);
1156    };
1157    zeros_tensor(primal.dtype(), primal.shape().to_vec()).map(Some)
1158}
1159
1160fn tangent_primal_root(key: &TensorInputKey) -> &TensorInputKey {
1161    key.primal_root()
1162}
1163
1164fn zeros_tensor(dtype: DType, shape: Vec<usize>) -> Result<Tensor> {
1165    match dtype {
1166        DType::F32 => Ok(Tensor::F32(TypedTensor::zeros(shape)?)),
1167        DType::F64 => Ok(Tensor::F64(TypedTensor::zeros(shape)?)),
1168        DType::I32 => Ok(Tensor::I32(TypedTensor::zeros(shape)?)),
1169        DType::I64 => Ok(Tensor::I64(TypedTensor::zeros(shape)?)),
1170        DType::Bool => {
1171            let len = checked_default_element_count(&shape)?;
1172            Ok(Tensor::Bool(TypedTensor::from_vec_col_major(
1173                shape,
1174                vec![false; len],
1175            )?))
1176        }
1177        DType::C32 => Ok(Tensor::C32(TypedTensor::zeros(shape)?)),
1178        DType::C64 => Ok(Tensor::C64(TypedTensor::zeros(shape)?)),
1179    }
1180}
1181
1182fn checked_default_element_count(shape: &[usize]) -> Result<usize> {
1183    shape.iter().try_fold(1usize, |acc, &dim| {
1184        acc.checked_mul(dim)
1185            .ok_or_else(|| Error::InvalidCompiledGraph {
1186                message: format!("deferred zero shape product overflows usize for shape {shape:?}"),
1187            })
1188    })
1189}
1190
1191#[cfg(test)]
1192mod tests;
tenferro_runtime/graph/executor.rs

tenferro_runtime/graph/
executor.rs