pub const SMALL_TENSOR_THRESHOLD: usize = 1024;
Maximum total elements for the small tensor fast path.