blob: ff752f25b310bab8233bc36d5424914265412bdc [file] [log] [blame]
#[cfg(target_arch = "arm")]
use crate::core_arch::arm::*;
#[cfg(target_arch = "aarch64")]
use crate::core_arch::aarch64::*;
use crate::core_arch::simd::*;
use std::{i16, i32, i8, mem::transmute, u16, u32, u8, vec::Vec};
macro_rules! V_u8 {
() => {
vec![0x00u8, 0x01u8, 0x02u8, 0x0Fu8, 0x80u8, 0xF0u8, 0xFFu8]
};
}
macro_rules! V_u16 {
() => {
vec![
0x0000u16, 0x0101u16, 0x0202u16, 0x0F0Fu16, 0x8000u16, 0xF0F0u16, 0xFFFFu16,
]
};
}
macro_rules! V_u32 {
() => {
vec![
0x00000000u32,
0x01010101u32,
0x02020202u32,
0x0F0F0F0Fu32,
0x80000000u32,
0xF0F0F0F0u32,
0xFFFFFFFFu32,
]
};
}
macro_rules! V_u64 {
() => {
vec![
0x0000000000000000u64,
0x0101010101010101u64,
0x0202020202020202u64,
0x0F0F0F0F0F0F0F0Fu64,
0x8080808080808080u64,
0xF0F0F0F0F0F0F0F0u64,
0xFFFFFFFFFFFFFFFFu64,
]
};
}
macro_rules! V_i8 {
() => {
vec![
0x00i8, 0x01i8, 0x02i8, 0x0Fi8, -128i8, /* 0x80 */
-16i8, /* 0xF0 */
-1i8, /* 0xFF */
]
};
}
macro_rules! V_i16 {
() => {
vec![
0x0000i16, 0x0101i16, 0x0202i16, 0x0F0Fi16, -32768i16, /* 0x8000 */
-3856i16, /* 0xF0F0 */
-1i16, /* 0xFFF */
]
};
}
macro_rules! V_i32 {
() => {
vec![
0x00000000i32,
0x01010101i32,
0x02020202i32,
0x0F0F0F0Fi32,
-2139062144i32, /* 0x80000000 */
-252645136i32, /* 0xF0F0F0F0 */
-1i32, /* 0xFFFFFFFF */
]
};
}
macro_rules! V_i64 {
() => {
vec![
0x0000000000000000i64,
0x0101010101010101i64,
0x0202020202020202i64,
0x0F0F0F0F0F0F0F0Fi64,
-9223372036854775808i64, /* 0x8000000000000000 */
-1152921504606846976i64, /* 0xF000000000000000 */
-1i64, /* 0xFFFFFFFFFFFFFFFF */
]
};
}
macro_rules! V_f32 {
() => {
vec![
0.0f32,
1.0f32,
-1.0f32,
1.2f32,
2.4f32,
std::f32::MAX,
std::f32::MIN,
std::f32::INFINITY,
std::f32::NEG_INFINITY,
std::f32::NAN,
]
};
}
macro_rules! to64 {
($t : ident) => {
|v: $t| -> u64 { transmute(v) }
};
}
macro_rules! to128 {
($t : ident) => {
|v: $t| -> u128 { transmute(v) }
};
}
pub(crate) fn test<T, U, V, W, X>(
vals: Vec<T>,
fill1: fn(T) -> V,
fill2: fn(U) -> W,
cast: fn(W) -> X,
test_fun: fn(V, V) -> W,
verify_fun: fn(T, T) -> U,
) where
T: Copy + core::fmt::Debug + std::cmp::PartialEq,
U: Copy + core::fmt::Debug + std::cmp::PartialEq,
V: Copy + core::fmt::Debug,
W: Copy + core::fmt::Debug,
X: Copy + core::fmt::Debug + std::cmp::PartialEq,
{
let pairs = vals.iter().zip(vals.iter());
for (i, j) in pairs {
let a: V = fill1(*i);
let b: V = fill1(*j);
let actual_pre: W = test_fun(a, b);
let expected_pre: W = fill2(verify_fun(*i, *j));
let actual: X = cast(actual_pre);
let expected: X = cast(expected_pre);
assert_eq!(
actual, expected,
"[{:?}:{:?}] :\nf({:?}, {:?}) = {:?}\ng({:?}, {:?}) = {:?}\n",
*i, *j, &a, &b, actual_pre, &a, &b, expected_pre
);
}
}
macro_rules! gen_test_fn {
($n: ident, $t: ident, $u: ident, $v: ident, $w: ident, $x: ident, $vals: expr, $fill1: expr, $fill2: expr, $cast: expr) => {
pub(crate) fn $n(test_fun: fn($v, $v) -> $w, verify_fun: fn($t, $t) -> $u) {
unsafe {
test::<$t, $u, $v, $w, $x>($vals, $fill1, $fill2, $cast, test_fun, verify_fun)
};
}
};
}
macro_rules! gen_fill_fn {
($id: ident, $el_width: expr, $num_els: expr, $in_t : ident, $out_t: ident, $cmp_t: ident) => {
pub(crate) fn $id(val: $in_t) -> $out_t {
let initial: [$in_t; $num_els] = [val; $num_els];
let result: $cmp_t = unsafe { transmute(initial) };
let result_out: $out_t = unsafe { transmute(result) };
// println!("FILL: {:016x} as {} x {}: {:016x}", val.reverse_bits(), $el_width, $num_els, (result as u64).reverse_bits());
result_out
}
};
}
gen_fill_fn!(fill_u8, 8, 8, u8, uint8x8_t, u64);
gen_fill_fn!(fill_s8, 8, 8, i8, int8x8_t, u64);
gen_fill_fn!(fillq_u8, 8, 16, u8, uint8x16_t, u128);
gen_fill_fn!(fillq_s8, 8, 16, i8, int8x16_t, u128);
gen_fill_fn!(fill_u16, 16, 4, u16, uint16x4_t, u64);
gen_fill_fn!(fill_s16, 16, 4, i16, int16x4_t, u64);
gen_fill_fn!(fillq_u16, 16, 8, u16, uint16x8_t, u128);
gen_fill_fn!(fillq_s16, 16, 8, i16, int16x8_t, u128);
gen_fill_fn!(fill_u32, 32, 2, u32, uint32x2_t, u64);
gen_fill_fn!(fill_s32, 32, 2, i32, int32x2_t, u64);
gen_fill_fn!(fillq_u32, 32, 4, u32, uint32x4_t, u128);
gen_fill_fn!(fillq_s32, 32, 4, i32, int32x4_t, u128);
gen_fill_fn!(fill_u64, 64, 1, u64, uint64x1_t, u64);
gen_fill_fn!(fill_s64, 64, 1, i64, int64x1_t, u64);
gen_fill_fn!(fillq_u64, 64, 2, u64, uint64x2_t, u128);
gen_fill_fn!(fillq_s64, 64, 2, i64, int64x2_t, u128);
gen_fill_fn!(fill_f32, 32, 2, f32, float32x2_t, u64);
gen_fill_fn!(fillq_f32, 32, 4, f32, float32x4_t, u128);
gen_test_fn!(
test_ari_u8,
u8,
u8,
uint8x8_t,
uint8x8_t,
u64,
V_u8!(),
fill_u8,
fill_u8,
to64!(uint8x8_t)
);
gen_test_fn!(
test_bit_u8,
u8,
u8,
uint8x8_t,
uint8x8_t,
u64,
V_u8!(),
fill_u8,
fill_u8,
to64!(uint8x8_t)
);
gen_test_fn!(
test_cmp_u8,
u8,
u8,
uint8x8_t,
uint8x8_t,
u64,
V_u8!(),
fill_u8,
fill_u8,
to64!(uint8x8_t)
);
gen_test_fn!(
testq_ari_u8,
u8,
u8,
uint8x16_t,
uint8x16_t,
u128,
V_u8!(),
fillq_u8,
fillq_u8,
to128!(uint8x16_t)
);
gen_test_fn!(
testq_bit_u8,
u8,
u8,
uint8x16_t,
uint8x16_t,
u128,
V_u8!(),
fillq_u8,
fillq_u8,
to128!(uint8x16_t)
);
gen_test_fn!(
testq_cmp_u8,
u8,
u8,
uint8x16_t,
uint8x16_t,
u128,
V_u8!(),
fillq_u8,
fillq_u8,
to128!(uint8x16_t)
);
gen_test_fn!(
test_ari_s8,
i8,
i8,
int8x8_t,
int8x8_t,
u64,
V_i8!(),
fill_s8,
fill_s8,
to64!(int8x8_t)
);
gen_test_fn!(
test_bit_s8,
i8,
i8,
int8x8_t,
int8x8_t,
u64,
V_i8!(),
fill_s8,
fill_s8,
to64!(int8x8_t)
);
gen_test_fn!(
test_cmp_s8,
i8,
u8,
int8x8_t,
uint8x8_t,
u64,
V_i8!(),
fill_s8,
fill_u8,
to64!(uint8x8_t)
);
gen_test_fn!(
testq_ari_s8,
i8,
i8,
int8x16_t,
int8x16_t,
u128,
V_i8!(),
fillq_s8,
fillq_s8,
to128!(int8x16_t)
);
gen_test_fn!(
testq_bit_s8,
i8,
i8,
int8x16_t,
int8x16_t,
u128,
V_i8!(),
fillq_s8,
fillq_s8,
to128!(int8x16_t)
);
gen_test_fn!(
testq_cmp_s8,
i8,
u8,
int8x16_t,
uint8x16_t,
u128,
V_i8!(),
fillq_s8,
fillq_u8,
to128!(uint8x16_t)
);
gen_test_fn!(
test_ari_u16,
u16,
u16,
uint16x4_t,
uint16x4_t,
u64,
V_u16!(),
fill_u16,
fill_u16,
to64!(uint16x4_t)
);
gen_test_fn!(
test_bit_u16,
u16,
u16,
uint16x4_t,
uint16x4_t,
u64,
V_u16!(),
fill_u16,
fill_u16,
to64!(uint16x4_t)
);
gen_test_fn!(
test_cmp_u16,
u16,
u16,
uint16x4_t,
uint16x4_t,
u64,
V_u16!(),
fill_u16,
fill_u16,
to64!(uint16x4_t)
);
gen_test_fn!(
testq_ari_u16,
u16,
u16,
uint16x8_t,
uint16x8_t,
u128,
V_u16!(),
fillq_u16,
fillq_u16,
to128!(uint16x8_t)
);
gen_test_fn!(
testq_bit_u16,
u16,
u16,
uint16x8_t,
uint16x8_t,
u128,
V_u16!(),
fillq_u16,
fillq_u16,
to128!(uint16x8_t)
);
gen_test_fn!(
testq_cmp_u16,
u16,
u16,
uint16x8_t,
uint16x8_t,
u128,
V_u16!(),
fillq_u16,
fillq_u16,
to128!(uint16x8_t)
);
gen_test_fn!(
test_ari_s16,
i16,
i16,
int16x4_t,
int16x4_t,
u64,
V_i16!(),
fill_s16,
fill_s16,
to64!(int16x4_t)
);
gen_test_fn!(
test_bit_s16,
i16,
i16,
int16x4_t,
int16x4_t,
u64,
V_i16!(),
fill_s16,
fill_s16,
to64!(int16x4_t)
);
gen_test_fn!(
test_cmp_s16,
i16,
u16,
int16x4_t,
uint16x4_t,
u64,
V_i16!(),
fill_s16,
fill_u16,
to64!(uint16x4_t)
);
gen_test_fn!(
testq_ari_s16,
i16,
i16,
int16x8_t,
int16x8_t,
u128,
V_i16!(),
fillq_s16,
fillq_s16,
to128!(int16x8_t)
);
gen_test_fn!(
testq_bit_s16,
i16,
i16,
int16x8_t,
int16x8_t,
u128,
V_i16!(),
fillq_s16,
fillq_s16,
to128!(int16x8_t)
);
gen_test_fn!(
testq_cmp_s16,
i16,
u16,
int16x8_t,
uint16x8_t,
u128,
V_i16!(),
fillq_s16,
fillq_u16,
to128!(uint16x8_t)
);
gen_test_fn!(
test_ari_u32,
u32,
u32,
uint32x2_t,
uint32x2_t,
u64,
V_u32!(),
fill_u32,
fill_u32,
to64!(uint32x2_t)
);
gen_test_fn!(
test_bit_u32,
u32,
u32,
uint32x2_t,
uint32x2_t,
u64,
V_u32!(),
fill_u32,
fill_u32,
to64!(uint32x2_t)
);
gen_test_fn!(
test_cmp_u32,
u32,
u32,
uint32x2_t,
uint32x2_t,
u64,
V_u32!(),
fill_u32,
fill_u32,
to64!(uint32x2_t)
);
gen_test_fn!(
testq_ari_u32,
u32,
u32,
uint32x4_t,
uint32x4_t,
u128,
V_u32!(),
fillq_u32,
fillq_u32,
to128!(uint32x4_t)
);
gen_test_fn!(
testq_bit_u32,
u32,
u32,
uint32x4_t,
uint32x4_t,
u128,
V_u32!(),
fillq_u32,
fillq_u32,
to128!(uint32x4_t)
);
gen_test_fn!(
testq_cmp_u32,
u32,
u32,
uint32x4_t,
uint32x4_t,
u128,
V_u32!(),
fillq_u32,
fillq_u32,
to128!(uint32x4_t)
);
gen_test_fn!(
test_ari_s32,
i32,
i32,
int32x2_t,
int32x2_t,
u64,
V_i32!(),
fill_s32,
fill_s32,
to64!(int32x2_t)
);
gen_test_fn!(
test_bit_s32,
i32,
i32,
int32x2_t,
int32x2_t,
u64,
V_i32!(),
fill_s32,
fill_s32,
to64!(int32x2_t)
);
gen_test_fn!(
test_cmp_s32,
i32,
u32,
int32x2_t,
uint32x2_t,
u64,
V_i32!(),
fill_s32,
fill_u32,
to64!(uint32x2_t)
);
gen_test_fn!(
testq_ari_s32,
i32,
i32,
int32x4_t,
int32x4_t,
u128,
V_i32!(),
fillq_s32,
fillq_s32,
to128!(int32x4_t)
);
gen_test_fn!(
testq_bit_s32,
i32,
i32,
int32x4_t,
int32x4_t,
u128,
V_i32!(),
fillq_s32,
fillq_s32,
to128!(int32x4_t)
);
gen_test_fn!(
testq_cmp_s32,
i32,
u32,
int32x4_t,
uint32x4_t,
u128,
V_i32!(),
fillq_s32,
fillq_u32,
to128!(uint32x4_t)
);
gen_test_fn!(
test_ari_u64,
u64,
u64,
uint64x1_t,
uint64x1_t,
u64,
V_u64!(),
fill_u64,
fill_u64,
to64!(uint64x1_t)
);
gen_test_fn!(
test_bit_u64,
u64,
u64,
uint64x1_t,
uint64x1_t,
u64,
V_u64!(),
fill_u64,
fill_u64,
to64!(uint64x1_t)
);
gen_test_fn!(
test_cmp_u64,
u64,
u64,
uint64x1_t,
uint64x1_t,
u64,
V_u64!(),
fill_u64,
fill_u64,
to64!(uint64x1_t)
);
gen_test_fn!(
testq_ari_u64,
u64,
u64,
uint64x2_t,
uint64x2_t,
u128,
V_u64!(),
fillq_u64,
fillq_u64,
to128!(uint64x2_t)
);
gen_test_fn!(
testq_bit_u64,
u64,
u64,
uint64x2_t,
uint64x2_t,
u128,
V_u64!(),
fillq_u64,
fillq_u64,
to128!(uint64x2_t)
);
gen_test_fn!(
testq_cmp_u64,
u64,
u64,
uint64x2_t,
uint64x2_t,
u128,
V_u64!(),
fillq_u64,
fillq_u64,
to128!(uint64x2_t)
);
gen_test_fn!(
test_ari_s64,
i64,
i64,
int64x1_t,
int64x1_t,
u64,
V_i64!(),
fill_s64,
fill_s64,
to64!(int64x1_t)
);
gen_test_fn!(
test_bit_s64,
i64,
i64,
int64x1_t,
int64x1_t,
u64,
V_i64!(),
fill_s64,
fill_s64,
to64!(int64x1_t)
);
gen_test_fn!(
test_cmp_s64,
i64,
u64,
int64x1_t,
uint64x1_t,
u64,
V_i64!(),
fill_s64,
fill_u64,
to64!(uint64x1_t)
);
gen_test_fn!(
testq_ari_s64,
i64,
i64,
int64x2_t,
int64x2_t,
u128,
V_i64!(),
fillq_s64,
fillq_s64,
to128!(int64x2_t)
);
gen_test_fn!(
testq_bit_s64,
i64,
i64,
int64x2_t,
int64x2_t,
u128,
V_i64!(),
fillq_s64,
fillq_s64,
to128!(int64x2_t)
);
gen_test_fn!(
testq_cmp_s64,
i64,
u64,
int64x2_t,
uint64x2_t,
u128,
V_i64!(),
fillq_s64,
fillq_u64,
to128!(uint64x2_t)
);
gen_test_fn!(
test_ari_f32,
f32,
f32,
float32x2_t,
float32x2_t,
u64,
V_f32!(),
fill_f32,
fill_f32,
to64!(float32x2_t)
);
gen_test_fn!(
test_cmp_f32,
f32,
u32,
float32x2_t,
uint32x2_t,
u64,
V_f32!(),
fill_f32,
fill_u32,
to64!(uint32x2_t)
);
gen_test_fn!(
testq_ari_f32,
f32,
f32,
float32x4_t,
float32x4_t,
u128,
V_f32!(),
fillq_f32,
fillq_f32,
to128!(float32x4_t)
);
gen_test_fn!(
testq_cmp_f32,
f32,
u32,
float32x4_t,
uint32x4_t,
u128,
V_f32!(),
fillq_f32,
fillq_u32,
to128!(uint32x4_t)
);