| use crate::{ |
| core_arch::{simd::*, simd_llvm::*, x86::*}, |
| mem::transmute, |
| }; |
| |
| #[cfg(test)] |
| use stdarch_test::assert_instr; |
| |
| /// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastmw_epi32&expand=553) |
| #[inline] |
| #[target_feature(enable = "avx512cd")] |
| #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d |
| pub unsafe fn _mm512_broadcastmw_epi32(k: __mmask16) -> __m512i { |
| _mm512_set1_epi32(k as i32) |
| } |
| |
| /// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastmw_epi32&expand=552) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d |
| pub unsafe fn _mm256_broadcastmw_epi32(k: __mmask16) -> __m256i { |
| _mm256_set1_epi32(k as i32) |
| } |
| |
| /// Broadcast the low 16-bits from input mask k to all 32-bit elements of dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastmw_epi32&expand=551) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmw2d |
| pub unsafe fn _mm_broadcastmw_epi32(k: __mmask16) -> __m128i { |
| _mm_set1_epi32(k as i32) |
| } |
| |
| /// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_broadcastmb_epi64&expand=550) |
| #[inline] |
| #[target_feature(enable = "avx512cd")] |
| #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q |
| pub unsafe fn _mm512_broadcastmb_epi64(k: __mmask8) -> __m512i { |
| _mm512_set1_epi64(k as i64) |
| } |
| |
| /// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_broadcastmb_epi64&expand=549) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q |
| pub unsafe fn _mm256_broadcastmb_epi64(k: __mmask8) -> __m256i { |
| _mm256_set1_epi64x(k as i64) |
| } |
| |
| /// Broadcast the low 8-bits from input mask k to all 64-bit elements of dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_broadcastmb_epi64&expand=548) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpbroadcast))] // should be vpbroadcastmb2q |
| pub unsafe fn _mm_broadcastmb_epi64(k: __mmask8) -> __m128i { |
| _mm_set1_epi64x(k as i64) |
| } |
| |
| /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conflict_epi32&expand=1248) |
| #[inline] |
| #[target_feature(enable = "avx512cd")] |
| #[cfg_attr(test, assert_instr(vpconflictd))] |
| pub unsafe fn _mm512_conflict_epi32(a: __m512i) -> __m512i { |
| transmute(vpconflictd(a.as_i32x16())) |
| } |
| |
| /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conflict_epi32&expand=1249) |
| #[inline] |
| #[target_feature(enable = "avx512cd")] |
| #[cfg_attr(test, assert_instr(vpconflictd))] |
| pub unsafe fn _mm512_mask_conflict_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { |
| let conflict = _mm512_conflict_epi32(a).as_i32x16(); |
| transmute(simd_select_bitmask(k, conflict, src.as_i32x16())) |
| } |
| |
| /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conflict_epi32&expand=1250) |
| #[inline] |
| #[target_feature(enable = "avx512cd")] |
| #[cfg_attr(test, assert_instr(vpconflictd))] |
| pub unsafe fn _mm512_maskz_conflict_epi32(k: __mmask16, a: __m512i) -> __m512i { |
| let conflict = _mm512_conflict_epi32(a).as_i32x16(); |
| let zero = _mm512_setzero_si512().as_i32x16(); |
| transmute(simd_select_bitmask(k, conflict, zero)) |
| } |
| |
| /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conflict_epi32&expand=1245) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpconflictd))] |
| pub unsafe fn _mm256_conflict_epi32(a: __m256i) -> __m256i { |
| transmute(vpconflictd256(a.as_i32x8())) |
| } |
| |
| /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conflict_epi32&expand=1246) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpconflictd))] |
| pub unsafe fn _mm256_mask_conflict_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { |
| let conflict = _mm256_conflict_epi32(a).as_i32x8(); |
| transmute(simd_select_bitmask(k, conflict, src.as_i32x8())) |
| } |
| |
| /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conflict_epi32&expand=1247) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpconflictd))] |
| pub unsafe fn _mm256_maskz_conflict_epi32(k: __mmask8, a: __m256i) -> __m256i { |
| let conflict = _mm256_conflict_epi32(a).as_i32x8(); |
| let zero = _mm256_setzero_si256().as_i32x8(); |
| transmute(simd_select_bitmask(k, conflict, zero)) |
| } |
| |
| /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conflict_epi32&expand=1242) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpconflictd))] |
| pub unsafe fn _mm_conflict_epi32(a: __m128i) -> __m128i { |
| transmute(vpconflictd128(a.as_i32x4())) |
| } |
| |
| /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conflict_epi32&expand=1243) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpconflictd))] |
| pub unsafe fn _mm_mask_conflict_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
| let conflict = _mm_conflict_epi32(a).as_i32x4(); |
| transmute(simd_select_bitmask(k, conflict, src.as_i32x4())) |
| } |
| |
| /// Test each 32-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conflict_epi32&expand=1244) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpconflictd))] |
| pub unsafe fn _mm_maskz_conflict_epi32(k: __mmask8, a: __m128i) -> __m128i { |
| let conflict = _mm_conflict_epi32(a).as_i32x4(); |
| let zero = _mm_setzero_si128().as_i32x4(); |
| transmute(simd_select_bitmask(k, conflict, zero)) |
| } |
| |
| /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_conflict_epi64&expand=1257) |
| #[inline] |
| #[target_feature(enable = "avx512cd")] |
| #[cfg_attr(test, assert_instr(vpconflictq))] |
| pub unsafe fn _mm512_conflict_epi64(a: __m512i) -> __m512i { |
| transmute(vpconflictq(a.as_i64x8())) |
| } |
| |
| /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_conflict_epi64&expand=1258) |
| #[inline] |
| #[target_feature(enable = "avx512cd")] |
| #[cfg_attr(test, assert_instr(vpconflictq))] |
| pub unsafe fn _mm512_mask_conflict_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { |
| let conflict = _mm512_conflict_epi64(a).as_i64x8(); |
| transmute(simd_select_bitmask(k, conflict, src.as_i64x8())) |
| } |
| |
| /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_conflict_epi64&expand=1259) |
| #[inline] |
| #[target_feature(enable = "avx512cd")] |
| #[cfg_attr(test, assert_instr(vpconflictq))] |
| pub unsafe fn _mm512_maskz_conflict_epi64(k: __mmask8, a: __m512i) -> __m512i { |
| let conflict = _mm512_conflict_epi64(a).as_i64x8(); |
| let zero = _mm512_setzero_si512().as_i64x8(); |
| transmute(simd_select_bitmask(k, conflict, zero)) |
| } |
| |
| /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_conflict_epi64&expand=1254) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpconflictq))] |
| pub unsafe fn _mm256_conflict_epi64(a: __m256i) -> __m256i { |
| transmute(vpconflictq256(a.as_i64x4())) |
| } |
| |
| /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_conflict_epi64&expand=1255) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpconflictq))] |
| pub unsafe fn _mm256_mask_conflict_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { |
| let conflict = _mm256_conflict_epi64(a).as_i64x4(); |
| transmute(simd_select_bitmask(k, conflict, src.as_i64x4())) |
| } |
| |
| /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_conflict_epi64&expand=1256) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpconflictq))] |
| pub unsafe fn _mm256_maskz_conflict_epi64(k: __mmask8, a: __m256i) -> __m256i { |
| let conflict = _mm256_conflict_epi64(a).as_i64x4(); |
| let zero = _mm256_setzero_si256().as_i64x4(); |
| transmute(simd_select_bitmask(k, conflict, zero)) |
| } |
| |
| /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit. Each element's comparison forms a zero extended bit vector in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_conflict_epi64&expand=1251) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpconflictq))] |
| pub unsafe fn _mm_conflict_epi64(a: __m128i) -> __m128i { |
| transmute(vpconflictq128(a.as_i64x2())) |
| } |
| |
| /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using writemask k (elements are copied from src when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_conflict_epi64&expand=1252) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpconflictq))] |
| pub unsafe fn _mm_mask_conflict_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
| let conflict = _mm_conflict_epi64(a).as_i64x2(); |
| transmute(simd_select_bitmask(k, conflict, src.as_i64x2())) |
| } |
| |
| /// Test each 64-bit element of a for equality with all other elements in a closer to the least significant bit using zeromask k (elements are zeroed out when the corresponding mask bit is not set). Each element's comparison forms a zero extended bit vector in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_conflict_epi64&expand=1253) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vpconflictq))] |
| pub unsafe fn _mm_maskz_conflict_epi64(k: __mmask8, a: __m128i) -> __m128i { |
| let conflict = _mm_conflict_epi64(a).as_i64x2(); |
| let zero = _mm_setzero_si128().as_i64x2(); |
| transmute(simd_select_bitmask(k, conflict, zero)) |
| } |
| |
| /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_lzcnt_epi32&expand=3491) |
| #[inline] |
| #[target_feature(enable = "avx512cd")] |
| #[cfg_attr(test, assert_instr(vplzcntd))] |
| pub unsafe fn _mm512_lzcnt_epi32(a: __m512i) -> __m512i { |
| transmute(vplzcntd(a.as_i32x16(), false)) |
| } |
| |
| /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_lzcnt_epi32&expand=3492) |
| #[inline] |
| #[target_feature(enable = "avx512cd")] |
| #[cfg_attr(test, assert_instr(vplzcntd))] |
| pub unsafe fn _mm512_mask_lzcnt_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i { |
| let zerocount = _mm512_lzcnt_epi32(a).as_i32x16(); |
| transmute(simd_select_bitmask(k, zerocount, src.as_i32x16())) |
| } |
| |
| /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_lzcnt_epi32&expand=3493) |
| #[inline] |
| #[target_feature(enable = "avx512cd")] |
| #[cfg_attr(test, assert_instr(vplzcntd))] |
| pub unsafe fn _mm512_maskz_lzcnt_epi32(k: __mmask16, a: __m512i) -> __m512i { |
| let zerocount = _mm512_lzcnt_epi32(a).as_i32x16(); |
| let zero = _mm512_setzero_si512().as_i32x16(); |
| transmute(simd_select_bitmask(k, zerocount, zero)) |
| } |
| |
| /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_lzcnt_epi32&expand=3488) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vplzcntd))] |
| pub unsafe fn _mm256_lzcnt_epi32(a: __m256i) -> __m256i { |
| transmute(vplzcntd256(a.as_i32x8(), false)) |
| } |
| |
| /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_lzcnt_epi32&expand=3489) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vplzcntd))] |
| pub unsafe fn _mm256_mask_lzcnt_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { |
| let zerocount = _mm256_lzcnt_epi32(a).as_i32x8(); |
| transmute(simd_select_bitmask(k, zerocount, src.as_i32x8())) |
| } |
| |
| /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_lzcnt_epi32&expand=3490) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vplzcntd))] |
| pub unsafe fn _mm256_maskz_lzcnt_epi32(k: __mmask8, a: __m256i) -> __m256i { |
| let zerocount = _mm256_lzcnt_epi32(a).as_i32x8(); |
| let zero = _mm256_setzero_si256().as_i32x8(); |
| transmute(simd_select_bitmask(k, zerocount, zero)) |
| } |
| |
| /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lzcnt_epi32&expand=3485) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vplzcntd))] |
| pub unsafe fn _mm_lzcnt_epi32(a: __m128i) -> __m128i { |
| transmute(vplzcntd128(a.as_i32x4(), false)) |
| } |
| |
| /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_lzcnt_epi32&expand=3486) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vplzcntd))] |
| pub unsafe fn _mm_mask_lzcnt_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
| let zerocount = _mm_lzcnt_epi32(a).as_i32x4(); |
| transmute(simd_select_bitmask(k, zerocount, src.as_i32x4())) |
| } |
| |
| /// Counts the number of leading zero bits in each packed 32-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_lzcnt_epi32&expand=3487) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vplzcntd))] |
| pub unsafe fn _mm_maskz_lzcnt_epi32(k: __mmask8, a: __m128i) -> __m128i { |
| let zerocount = _mm_lzcnt_epi32(a).as_i32x4(); |
| let zero = _mm_setzero_si128().as_i32x4(); |
| transmute(simd_select_bitmask(k, zerocount, zero)) |
| } |
| |
| /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_lzcnt_epi64&expand=3500) |
| #[inline] |
| #[target_feature(enable = "avx512cd")] |
| #[cfg_attr(test, assert_instr(vplzcntq))] |
| pub unsafe fn _mm512_lzcnt_epi64(a: __m512i) -> __m512i { |
| transmute(vplzcntq(a.as_i64x8(), false)) |
| } |
| |
| /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_mask_lzcnt_epi64&expand=3501) |
| #[inline] |
| #[target_feature(enable = "avx512cd")] |
| #[cfg_attr(test, assert_instr(vplzcntq))] |
| pub unsafe fn _mm512_mask_lzcnt_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i { |
| let zerocount = _mm512_lzcnt_epi64(a).as_i64x8(); |
| transmute(simd_select_bitmask(k, zerocount, src.as_i64x8())) |
| } |
| |
| /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm512_maskz_lzcnt_epi64&expand=3502) |
| #[inline] |
| #[target_feature(enable = "avx512cd")] |
| #[cfg_attr(test, assert_instr(vplzcntq))] |
| pub unsafe fn _mm512_maskz_lzcnt_epi64(k: __mmask8, a: __m512i) -> __m512i { |
| let zerocount = _mm512_lzcnt_epi64(a).as_i64x8(); |
| let zero = _mm512_setzero_si512().as_i64x8(); |
| transmute(simd_select_bitmask(k, zerocount, zero)) |
| } |
| |
| /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_lzcnt_epi64&expand=3497) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vplzcntq))] |
| pub unsafe fn _mm256_lzcnt_epi64(a: __m256i) -> __m256i { |
| transmute(vplzcntq256(a.as_i64x4(), false)) |
| } |
| |
| /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_mask_lzcnt_epi64&expand=3498) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vplzcntq))] |
| pub unsafe fn _mm256_mask_lzcnt_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i { |
| let zerocount = _mm256_lzcnt_epi64(a).as_i64x4(); |
| transmute(simd_select_bitmask(k, zerocount, src.as_i64x4())) |
| } |
| |
| /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm256_maskz_lzcnt_epi64&expand=3499) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vplzcntq))] |
| pub unsafe fn _mm256_maskz_lzcnt_epi64(k: __mmask8, a: __m256i) -> __m256i { |
| let zerocount = _mm256_lzcnt_epi64(a).as_i64x4(); |
| let zero = _mm256_setzero_si256().as_i64x4(); |
| transmute(simd_select_bitmask(k, zerocount, zero)) |
| } |
| |
| /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lzcnt_epi64&expand=3494) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vplzcntq))] |
| pub unsafe fn _mm_lzcnt_epi64(a: __m128i) -> __m128i { |
| transmute(vplzcntq128(a.as_i64x2(), false)) |
| } |
| |
| /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using writemask k (elements are copied from src when the corresponding mask bit is not set). |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mask_lzcnt_epi64&expand=3495) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vplzcntq))] |
| pub unsafe fn _mm_mask_lzcnt_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i { |
| let zerocount = _mm_lzcnt_epi64(a).as_i64x2(); |
| transmute(simd_select_bitmask(k, zerocount, src.as_i64x2())) |
| } |
| |
| /// Counts the number of leading zero bits in each packed 64-bit integer in a, and store the results in dst using zeromask k (elements are zeroed out when the corresponding mask bit is not set). |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskz_lzcnt_epi64&expand=3496) |
| #[inline] |
| #[target_feature(enable = "avx512cd,avx512vl")] |
| #[cfg_attr(test, assert_instr(vplzcntq))] |
| pub unsafe fn _mm_maskz_lzcnt_epi64(k: __mmask8, a: __m128i) -> __m128i { |
| let zerocount = _mm_lzcnt_epi64(a).as_i64x2(); |
| let zero = _mm_setzero_si128().as_i64x2(); |
| transmute(simd_select_bitmask(k, zerocount, zero)) |
| } |
| |
| #[allow(improper_ctypes)] |
| extern "C" { |
| #[link_name = "llvm.x86.avx512.conflict.d.512"] |
| fn vpconflictd(a: i32x16) -> i32x16; |
| #[link_name = "llvm.x86.avx512.conflict.d.256"] |
| fn vpconflictd256(a: i32x8) -> i32x8; |
| #[link_name = "llvm.x86.avx512.conflict.d.128"] |
| fn vpconflictd128(a: i32x4) -> i32x4; |
| |
| #[link_name = "llvm.x86.avx512.conflict.q.512"] |
| fn vpconflictq(a: i64x8) -> i64x8; |
| #[link_name = "llvm.x86.avx512.conflict.q.256"] |
| fn vpconflictq256(a: i64x4) -> i64x4; |
| #[link_name = "llvm.x86.avx512.conflict.q.128"] |
| fn vpconflictq128(a: i64x2) -> i64x2; |
| |
| #[link_name = "llvm.ctlz.v16i32"] |
| fn vplzcntd(a: i32x16, nonzero: bool) -> i32x16; |
| #[link_name = "llvm.ctlz.v8i32"] |
| fn vplzcntd256(a: i32x8, nonzero: bool) -> i32x8; |
| #[link_name = "llvm.ctlz.v4i32"] |
| fn vplzcntd128(a: i32x4, nonzero: bool) -> i32x4; |
| |
| #[link_name = "llvm.ctlz.v8i64"] |
| fn vplzcntq(a: i64x8, nonzero: bool) -> i64x8; |
| #[link_name = "llvm.ctlz.v4i64"] |
| fn vplzcntq256(a: i64x4, nonzero: bool) -> i64x4; |
| #[link_name = "llvm.ctlz.v2i64"] |
| fn vplzcntq128(a: i64x2, nonzero: bool) -> i64x2; |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| |
| use crate::core_arch::x86::*; |
| use stdarch_test::simd_test; |
| |
| #[simd_test(enable = "avx512cd")] |
| unsafe fn test_mm512_broadcastmw_epi32() { |
| let a: __mmask16 = 2; |
| let r = _mm512_broadcastmw_epi32(a); |
| let e = _mm512_set1_epi32(2); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm256_broadcastmw_epi32() { |
| let a: __mmask16 = 2; |
| let r = _mm256_broadcastmw_epi32(a); |
| let e = _mm256_set1_epi32(2); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm_broadcastmw_epi32() { |
| let a: __mmask16 = 2; |
| let r = _mm_broadcastmw_epi32(a); |
| let e = _mm_set1_epi32(2); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd")] |
| unsafe fn test_mm512_broadcastmb_epi64() { |
| let a: __mmask8 = 2; |
| let r = _mm512_broadcastmb_epi64(a); |
| let e = _mm512_set1_epi64(2); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm256_broadcastmb_epi64() { |
| let a: __mmask8 = 2; |
| let r = _mm256_broadcastmb_epi64(a); |
| let e = _mm256_set1_epi64x(2); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm_broadcastmb_epi64() { |
| let a: __mmask8 = 2; |
| let r = _mm_broadcastmb_epi64(a); |
| let e = _mm_set1_epi64x(2); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd")] |
| unsafe fn test_mm512_conflict_epi32() { |
| let a = _mm512_set1_epi32(1); |
| let r = _mm512_conflict_epi32(a); |
| let e = _mm512_set_epi32( |
| 1 << 14 |
| | 1 << 13 |
| | 1 << 12 |
| | 1 << 11 |
| | 1 << 10 |
| | 1 << 9 |
| | 1 << 8 |
| | 1 << 7 |
| | 1 << 6 |
| | 1 << 5 |
| | 1 << 4 |
| | 1 << 3 |
| | 1 << 2 |
| | 1 << 1 |
| | 1 << 0, |
| 1 << 13 |
| | 1 << 12 |
| | 1 << 11 |
| | 1 << 10 |
| | 1 << 9 |
| | 1 << 8 |
| | 1 << 7 |
| | 1 << 6 |
| | 1 << 5 |
| | 1 << 4 |
| | 1 << 3 |
| | 1 << 2 |
| | 1 << 1 |
| | 1 << 0, |
| 1 << 12 |
| | 1 << 11 |
| | 1 << 10 |
| | 1 << 9 |
| | 1 << 8 |
| | 1 << 7 |
| | 1 << 6 |
| | 1 << 5 |
| | 1 << 4 |
| | 1 << 3 |
| | 1 << 2 |
| | 1 << 1 |
| | 1 << 0, |
| 1 << 11 |
| | 1 << 10 |
| | 1 << 9 |
| | 1 << 8 |
| | 1 << 7 |
| | 1 << 6 |
| | 1 << 5 |
| | 1 << 4 |
| | 1 << 3 |
| | 1 << 2 |
| | 1 << 1 |
| | 1 << 0, |
| 1 << 10 |
| | 1 << 9 |
| | 1 << 8 |
| | 1 << 7 |
| | 1 << 6 |
| | 1 << 5 |
| | 1 << 4 |
| | 1 << 3 |
| | 1 << 2 |
| | 1 << 1 |
| | 1 << 0, |
| 1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 1 | 1 << 0, |
| 1 << 0, |
| 0, |
| ); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd")] |
| unsafe fn test_mm512_mask_conflict_epi32() { |
| let a = _mm512_set1_epi32(1); |
| let r = _mm512_mask_conflict_epi32(a, 0, a); |
| assert_eq_m512i(r, a); |
| let r = _mm512_mask_conflict_epi32(a, 0b11111111_11111111, a); |
| let e = _mm512_set_epi32( |
| 1 << 14 |
| | 1 << 13 |
| | 1 << 12 |
| | 1 << 11 |
| | 1 << 10 |
| | 1 << 9 |
| | 1 << 8 |
| | 1 << 7 |
| | 1 << 6 |
| | 1 << 5 |
| | 1 << 4 |
| | 1 << 3 |
| | 1 << 2 |
| | 1 << 1 |
| | 1 << 0, |
| 1 << 13 |
| | 1 << 12 |
| | 1 << 11 |
| | 1 << 10 |
| | 1 << 9 |
| | 1 << 8 |
| | 1 << 7 |
| | 1 << 6 |
| | 1 << 5 |
| | 1 << 4 |
| | 1 << 3 |
| | 1 << 2 |
| | 1 << 1 |
| | 1 << 0, |
| 1 << 12 |
| | 1 << 11 |
| | 1 << 10 |
| | 1 << 9 |
| | 1 << 8 |
| | 1 << 7 |
| | 1 << 6 |
| | 1 << 5 |
| | 1 << 4 |
| | 1 << 3 |
| | 1 << 2 |
| | 1 << 1 |
| | 1 << 0, |
| 1 << 11 |
| | 1 << 10 |
| | 1 << 9 |
| | 1 << 8 |
| | 1 << 7 |
| | 1 << 6 |
| | 1 << 5 |
| | 1 << 4 |
| | 1 << 3 |
| | 1 << 2 |
| | 1 << 1 |
| | 1 << 0, |
| 1 << 10 |
| | 1 << 9 |
| | 1 << 8 |
| | 1 << 7 |
| | 1 << 6 |
| | 1 << 5 |
| | 1 << 4 |
| | 1 << 3 |
| | 1 << 2 |
| | 1 << 1 |
| | 1 << 0, |
| 1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 1 | 1 << 0, |
| 1 << 0, |
| 0, |
| ); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd")] |
| unsafe fn test_mm512_maskz_conflict_epi32() { |
| let a = _mm512_set1_epi32(1); |
| let r = _mm512_maskz_conflict_epi32(0, a); |
| assert_eq_m512i(r, _mm512_setzero_si512()); |
| let r = _mm512_maskz_conflict_epi32(0b11111111_11111111, a); |
| let e = _mm512_set_epi32( |
| 1 << 14 |
| | 1 << 13 |
| | 1 << 12 |
| | 1 << 11 |
| | 1 << 10 |
| | 1 << 9 |
| | 1 << 8 |
| | 1 << 7 |
| | 1 << 6 |
| | 1 << 5 |
| | 1 << 4 |
| | 1 << 3 |
| | 1 << 2 |
| | 1 << 1 |
| | 1 << 0, |
| 1 << 13 |
| | 1 << 12 |
| | 1 << 11 |
| | 1 << 10 |
| | 1 << 9 |
| | 1 << 8 |
| | 1 << 7 |
| | 1 << 6 |
| | 1 << 5 |
| | 1 << 4 |
| | 1 << 3 |
| | 1 << 2 |
| | 1 << 1 |
| | 1 << 0, |
| 1 << 12 |
| | 1 << 11 |
| | 1 << 10 |
| | 1 << 9 |
| | 1 << 8 |
| | 1 << 7 |
| | 1 << 6 |
| | 1 << 5 |
| | 1 << 4 |
| | 1 << 3 |
| | 1 << 2 |
| | 1 << 1 |
| | 1 << 0, |
| 1 << 11 |
| | 1 << 10 |
| | 1 << 9 |
| | 1 << 8 |
| | 1 << 7 |
| | 1 << 6 |
| | 1 << 5 |
| | 1 << 4 |
| | 1 << 3 |
| | 1 << 2 |
| | 1 << 1 |
| | 1 << 0, |
| 1 << 10 |
| | 1 << 9 |
| | 1 << 8 |
| | 1 << 7 |
| | 1 << 6 |
| | 1 << 5 |
| | 1 << 4 |
| | 1 << 3 |
| | 1 << 2 |
| | 1 << 1 |
| | 1 << 0, |
| 1 << 9 | 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 8 | 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 7 | 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 1 | 1 << 0, |
| 1 << 0, |
| 0, |
| ); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm256_conflict_epi32() { |
| let a = _mm256_set1_epi32(1); |
| let r = _mm256_conflict_epi32(a); |
| let e = _mm256_set_epi32( |
| 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 1 | 1 << 0, |
| 1 << 0, |
| 0, |
| ); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm256_mask_conflict_epi32() { |
| let a = _mm256_set1_epi32(1); |
| let r = _mm256_mask_conflict_epi32(a, 0, a); |
| assert_eq_m256i(r, a); |
| let r = _mm256_mask_conflict_epi32(a, 0b11111111, a); |
| let e = _mm256_set_epi32( |
| 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 1 | 1 << 0, |
| 1 << 0, |
| 0, |
| ); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm256_maskz_conflict_epi32() { |
| let a = _mm256_set1_epi32(1); |
| let r = _mm256_maskz_conflict_epi32(0, a); |
| assert_eq_m256i(r, _mm256_setzero_si256()); |
| let r = _mm256_maskz_conflict_epi32(0b11111111, a); |
| let e = _mm256_set_epi32( |
| 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 1 | 1 << 0, |
| 1 << 0, |
| 0, |
| ); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm_conflict_epi32() { |
| let a = _mm_set1_epi32(1); |
| let r = _mm_conflict_epi32(a); |
| let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm_mask_conflict_epi32() { |
| let a = _mm_set1_epi32(1); |
| let r = _mm_mask_conflict_epi32(a, 0, a); |
| assert_eq_m128i(r, a); |
| let r = _mm_mask_conflict_epi32(a, 0b00001111, a); |
| let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm_maskz_conflict_epi32() { |
| let a = _mm_set1_epi32(1); |
| let r = _mm_maskz_conflict_epi32(0, a); |
| assert_eq_m128i(r, _mm_setzero_si128()); |
| let r = _mm_maskz_conflict_epi32(0b00001111, a); |
| let e = _mm_set_epi32(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd")] |
| unsafe fn test_mm512_conflict_epi64() { |
| let a = _mm512_set1_epi64(1); |
| let r = _mm512_conflict_epi64(a); |
| let e = _mm512_set_epi64( |
| 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 1 | 1 << 0, |
| 1 << 0, |
| 0, |
| ); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd")] |
| unsafe fn test_mm512_mask_conflict_epi64() { |
| let a = _mm512_set1_epi64(1); |
| let r = _mm512_mask_conflict_epi64(a, 0, a); |
| assert_eq_m512i(r, a); |
| let r = _mm512_mask_conflict_epi64(a, 0b11111111, a); |
| let e = _mm512_set_epi64( |
| 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 1 | 1 << 0, |
| 1 << 0, |
| 0, |
| ); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd")] |
| unsafe fn test_mm512_maskz_conflict_epi64() { |
| let a = _mm512_set1_epi64(1); |
| let r = _mm512_maskz_conflict_epi64(0, a); |
| assert_eq_m512i(r, _mm512_setzero_si512()); |
| let r = _mm512_maskz_conflict_epi64(0b11111111, a); |
| let e = _mm512_set_epi64( |
| 1 << 6 | 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 5 | 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 4 | 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 3 | 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 2 | 1 << 1 | 1 << 0, |
| 1 << 1 | 1 << 0, |
| 1 << 0, |
| 0, |
| ); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm256_conflict_epi64() { |
| let a = _mm256_set1_epi64x(1); |
| let r = _mm256_conflict_epi64(a); |
| let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm256_mask_conflict_epi64() { |
| let a = _mm256_set1_epi64x(1); |
| let r = _mm256_mask_conflict_epi64(a, 0, a); |
| assert_eq_m256i(r, a); |
| let r = _mm256_mask_conflict_epi64(a, 0b00001111, a); |
| let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm256_maskz_conflict_epi64() { |
| let a = _mm256_set1_epi64x(1); |
| let r = _mm256_maskz_conflict_epi64(0, a); |
| assert_eq_m256i(r, _mm256_setzero_si256()); |
| let r = _mm256_maskz_conflict_epi64(0b00001111, a); |
| let e = _mm256_set_epi64x(1 << 2 | 1 << 1 | 1 << 0, 1 << 1 | 1 << 0, 1 << 0, 0); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm_conflict_epi64() { |
| let a = _mm_set1_epi64x(1); |
| let r = _mm_conflict_epi64(a); |
| let e = _mm_set_epi64x(1 << 0, 0); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm_mask_conflict_epi64() { |
| let a = _mm_set1_epi64x(1); |
| let r = _mm_mask_conflict_epi64(a, 0, a); |
| assert_eq_m128i(r, a); |
| let r = _mm_mask_conflict_epi64(a, 0b00000011, a); |
| let e = _mm_set_epi64x(1 << 0, 0); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm_maskz_conflict_epi64() { |
| let a = _mm_set1_epi64x(1); |
| let r = _mm_maskz_conflict_epi64(0, a); |
| assert_eq_m128i(r, _mm_setzero_si128()); |
| let r = _mm_maskz_conflict_epi64(0b00000011, a); |
| let e = _mm_set_epi64x(1 << 0, 0); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd")] |
| unsafe fn test_mm512_lzcnt_epi32() { |
| let a = _mm512_set1_epi32(1); |
| let r = _mm512_lzcnt_epi32(a); |
| let e = _mm512_set1_epi32(31); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd")] |
| unsafe fn test_mm512_mask_lzcnt_epi32() { |
| let a = _mm512_set1_epi32(1); |
| let r = _mm512_mask_lzcnt_epi32(a, 0, a); |
| assert_eq_m512i(r, a); |
| let r = _mm512_mask_lzcnt_epi32(a, 0b11111111_11111111, a); |
| let e = _mm512_set1_epi32(31); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd")] |
| unsafe fn test_mm512_maskz_lzcnt_epi32() { |
| let a = _mm512_set1_epi32(2); |
| let r = _mm512_maskz_lzcnt_epi32(0, a); |
| assert_eq_m512i(r, _mm512_setzero_si512()); |
| let r = _mm512_maskz_lzcnt_epi32(0b11111111_11111111, a); |
| let e = _mm512_set1_epi32(30); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm256_lzcnt_epi32() { |
| let a = _mm256_set1_epi32(1); |
| let r = _mm256_lzcnt_epi32(a); |
| let e = _mm256_set1_epi32(31); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm256_mask_lzcnt_epi32() { |
| let a = _mm256_set1_epi32(1); |
| let r = _mm256_mask_lzcnt_epi32(a, 0, a); |
| assert_eq_m256i(r, a); |
| let r = _mm256_mask_lzcnt_epi32(a, 0b11111111, a); |
| let e = _mm256_set1_epi32(31); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm256_maskz_lzcnt_epi32() { |
| let a = _mm256_set1_epi32(1); |
| let r = _mm256_maskz_lzcnt_epi32(0, a); |
| assert_eq_m256i(r, _mm256_setzero_si256()); |
| let r = _mm256_maskz_lzcnt_epi32(0b11111111, a); |
| let e = _mm256_set1_epi32(31); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm_lzcnt_epi32() { |
| let a = _mm_set1_epi32(1); |
| let r = _mm_lzcnt_epi32(a); |
| let e = _mm_set1_epi32(31); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm_mask_lzcnt_epi32() { |
| let a = _mm_set1_epi32(1); |
| let r = _mm_mask_lzcnt_epi32(a, 0, a); |
| assert_eq_m128i(r, a); |
| let r = _mm_mask_lzcnt_epi32(a, 0b00001111, a); |
| let e = _mm_set1_epi32(31); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm_maskz_lzcnt_epi32() { |
| let a = _mm_set1_epi32(1); |
| let r = _mm_maskz_lzcnt_epi32(0, a); |
| assert_eq_m128i(r, _mm_setzero_si128()); |
| let r = _mm_maskz_lzcnt_epi32(0b00001111, a); |
| let e = _mm_set1_epi32(31); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd")] |
| unsafe fn test_mm512_lzcnt_epi64() { |
| let a = _mm512_set1_epi64(1); |
| let r = _mm512_lzcnt_epi64(a); |
| let e = _mm512_set1_epi64(63); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd")] |
| unsafe fn test_mm512_mask_lzcnt_epi64() { |
| let a = _mm512_set1_epi64(1); |
| let r = _mm512_mask_lzcnt_epi64(a, 0, a); |
| assert_eq_m512i(r, a); |
| let r = _mm512_mask_lzcnt_epi64(a, 0b11111111, a); |
| let e = _mm512_set1_epi64(63); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd")] |
| unsafe fn test_mm512_maskz_lzcnt_epi64() { |
| let a = _mm512_set1_epi64(2); |
| let r = _mm512_maskz_lzcnt_epi64(0, a); |
| assert_eq_m512i(r, _mm512_setzero_si512()); |
| let r = _mm512_maskz_lzcnt_epi64(0b11111111, a); |
| let e = _mm512_set1_epi64(62); |
| assert_eq_m512i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm256_lzcnt_epi64() { |
| let a = _mm256_set1_epi64x(1); |
| let r = _mm256_lzcnt_epi64(a); |
| let e = _mm256_set1_epi64x(63); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm256_mask_lzcnt_epi64() { |
| let a = _mm256_set1_epi64x(1); |
| let r = _mm256_mask_lzcnt_epi64(a, 0, a); |
| assert_eq_m256i(r, a); |
| let r = _mm256_mask_lzcnt_epi64(a, 0b00001111, a); |
| let e = _mm256_set1_epi64x(63); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm256_maskz_lzcnt_epi64() { |
| let a = _mm256_set1_epi64x(1); |
| let r = _mm256_maskz_lzcnt_epi64(0, a); |
| assert_eq_m256i(r, _mm256_setzero_si256()); |
| let r = _mm256_maskz_lzcnt_epi64(0b00001111, a); |
| let e = _mm256_set1_epi64x(63); |
| assert_eq_m256i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm_lzcnt_epi64() { |
| let a = _mm_set1_epi64x(1); |
| let r = _mm_lzcnt_epi64(a); |
| let e = _mm_set1_epi64x(63); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm_mask_lzcnt_epi64() { |
| let a = _mm_set1_epi64x(1); |
| let r = _mm_mask_lzcnt_epi64(a, 0, a); |
| assert_eq_m128i(r, a); |
| let r = _mm_mask_lzcnt_epi64(a, 0b00001111, a); |
| let e = _mm_set1_epi64x(63); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "avx512cd,avx512vl")] |
| unsafe fn test_mm_maskz_lzcnt_epi64() { |
| let a = _mm_set1_epi64x(1); |
| let r = _mm_maskz_lzcnt_epi64(0, a); |
| assert_eq_m128i(r, _mm_setzero_si128()); |
| let r = _mm_maskz_lzcnt_epi64(0b00001111, a); |
| let e = _mm_set1_epi64x(63); |
| assert_eq_m128i(r, e); |
| } |
| } |