Skip to main content

strided_perm/
lib.rs

1//! Cache-efficient tensor permutation / transpose.
2//!
3//! This crate provides optimized copy and permutation operations for strided
4//! multidimensional arrays. It is designed as a single-responsibility crate
5//! sitting between `strided-view` (data structures) and `strided-kernel`
6//! (general map/reduce/broadcast operations).
7//!
8//! # Dependency graph
9//!
10//! ```text
11//! strided-view -> strided-perm -> strided-kernel -> strided-einsum2
12//! ```
13
14pub mod block;
15pub mod copy;
16pub mod fuse;
17pub mod hptt;
18pub mod kernel;
19pub mod order;
20
21// Re-export primary API
22pub use copy::{copy_into, copy_into_col_major, try_fuse_group};
23#[cfg(feature = "parallel")]
24pub use copy::{copy_into_col_major_par, copy_into_par};
25pub use fuse::{compress_dims, compute_costs, compute_importance, fuse_dims, sort_by_importance};
26pub use kernel::{
27    build_plan_fused, build_plan_fused_small, for_each_inner_block_preordered, total_len,
28    KernelPlan, SMALL_TENSOR_THRESHOLD,
29};
30pub use order::compute_order;
31
32// Constants
33pub const BLOCK_MEMORY_SIZE: usize = 32 * 1024;
34pub const CACHE_LINE_SIZE: usize = 64;