strided_perm/lib.rs
1//! Cache-efficient tensor permutation / transpose.
2//!
3//! This crate provides optimized copy and permutation operations for strided
4//! multidimensional arrays. It is designed as a single-responsibility crate
5//! sitting between `strided-view` (data structures) and `strided-kernel`
6//! (general map/reduce/broadcast operations).
7//!
8//! # Dependency graph
9//!
10//! ```text
11//! strided-view -> strided-perm -> strided-kernel -> strided-einsum2
12//! ```
13
14#[cfg(test)]
15mod block;
16mod copy;
17mod fuse;
18mod hptt;
19#[cfg(test)]
20#[allow(dead_code)]
21mod kernel;
22#[cfg(test)]
23mod order;
24
25// Re-export primary API
26pub use copy::{copy_into, copy_into_col_major};
27#[cfg(feature = "parallel")]
28pub use copy::{copy_into_col_major_par, copy_into_par};
29
30// Constants
31pub const BLOCK_MEMORY_SIZE: usize = 32 * 1024;
32pub const CACHE_LINE_SIZE: usize = 64;