| //! Streaming SIMD Extensions 2 (SSE2) |
| |
| #[cfg(test)] |
| use stdarch_test::assert_instr; |
| |
| use crate::{ |
| core_arch::{simd::*, simd_llvm::*, x86::*}, |
| intrinsics, |
| mem::{self, transmute}, |
| ptr, |
| }; |
| |
| /// Provides a hint to the processor that the code sequence is a spin-wait loop. |
| /// |
| /// This can help improve the performance and power consumption of spin-wait |
| /// loops. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_pause) |
| #[inline] |
| #[cfg_attr(all(test, target_feature = "sse2"), assert_instr(pause))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_pause() { |
| // note: `pause` is guaranteed to be interpreted as a `nop` by CPUs without |
| // the SSE2 target-feature - therefore it does not require any target features |
| pause() |
| } |
| |
| /// Invalidates and flushes the cache line that contains `p` from all levels of |
| /// the cache hierarchy. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_clflush) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(clflush))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_clflush(p: *const u8) { |
| clflush(p) |
| } |
| |
| /// Performs a serializing operation on all load-from-memory instructions |
| /// that were issued prior to this instruction. |
| /// |
| /// Guarantees that every load instruction that precedes, in program order, is |
| /// globally visible before any load instruction which follows the fence in |
| /// program order. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_lfence) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(lfence))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_lfence() { |
| lfence() |
| } |
| |
| /// Performs a serializing operation on all load-from-memory and store-to-memory |
| /// instructions that were issued prior to this instruction. |
| /// |
| /// Guarantees that every memory access that precedes, in program order, the |
| /// memory fence instruction is globally visible before any memory instruction |
| /// which follows the fence in program order. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mfence) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(mfence))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_mfence() { |
| mfence() |
| } |
| |
| /// Adds packed 8-bit integers in `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(paddb))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_add_epi8(a: __m128i, b: __m128i) -> __m128i { |
| transmute(simd_add(a.as_i8x16(), b.as_i8x16())) |
| } |
| |
| /// Adds packed 16-bit integers in `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(paddw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_add_epi16(a: __m128i, b: __m128i) -> __m128i { |
| transmute(simd_add(a.as_i16x8(), b.as_i16x8())) |
| } |
| |
| /// Adds packed 32-bit integers in `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(paddd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_add_epi32(a: __m128i, b: __m128i) -> __m128i { |
| transmute(simd_add(a.as_i32x4(), b.as_i32x4())) |
| } |
| |
| /// Adds packed 64-bit integers in `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_epi64) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(paddq))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_add_epi64(a: __m128i, b: __m128i) -> __m128i { |
| transmute(simd_add(a.as_i64x2(), b.as_i64x2())) |
| } |
| |
| /// Adds packed 8-bit integers in `a` and `b` using saturation. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(paddsb))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_adds_epi8(a: __m128i, b: __m128i) -> __m128i { |
| transmute(simd_saturating_add(a.as_i8x16(), b.as_i8x16())) |
| } |
| |
| /// Adds packed 16-bit integers in `a` and `b` using saturation. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(paddsw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_adds_epi16(a: __m128i, b: __m128i) -> __m128i { |
| transmute(simd_saturating_add(a.as_i16x8(), b.as_i16x8())) |
| } |
| |
| /// Adds packed unsigned 8-bit integers in `a` and `b` using saturation. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(paddusb))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_adds_epu8(a: __m128i, b: __m128i) -> __m128i { |
| transmute(simd_saturating_add(a.as_u8x16(), b.as_u8x16())) |
| } |
| |
| /// Adds packed unsigned 16-bit integers in `a` and `b` using saturation. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_adds_epu16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(paddusw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_adds_epu16(a: __m128i, b: __m128i) -> __m128i { |
| transmute(simd_saturating_add(a.as_u16x8(), b.as_u16x8())) |
| } |
| |
| /// Averages packed unsigned 8-bit integers in `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pavgb))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_avg_epu8(a: __m128i, b: __m128i) -> __m128i { |
| transmute(pavgb(a.as_u8x16(), b.as_u8x16())) |
| } |
| |
| /// Averages packed unsigned 16-bit integers in `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_avg_epu16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pavgw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_avg_epu16(a: __m128i, b: __m128i) -> __m128i { |
| transmute(pavgw(a.as_u16x8(), b.as_u16x8())) |
| } |
| |
| /// Multiplies and then horizontally add signed 16 bit integers in `a` and `b`. |
| /// |
| /// Multiplies packed signed 16-bit integers in `a` and `b`, producing |
| /// intermediate signed 32-bit integers. Horizontally add adjacent pairs of |
| /// intermediate 32-bit integers. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_madd_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pmaddwd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_madd_epi16(a: __m128i, b: __m128i) -> __m128i { |
| transmute(pmaddwd(a.as_i16x8(), b.as_i16x8())) |
| } |
| |
| /// Compares packed 16-bit integers in `a` and `b`, and returns the packed |
| /// maximum values. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pmaxsw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_max_epi16(a: __m128i, b: __m128i) -> __m128i { |
| transmute(pmaxsw(a.as_i16x8(), b.as_i16x8())) |
| } |
| |
| /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the |
| /// packed maximum values. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_epu8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pmaxub))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_max_epu8(a: __m128i, b: __m128i) -> __m128i { |
| transmute(pmaxub(a.as_u8x16(), b.as_u8x16())) |
| } |
| |
| /// Compares packed 16-bit integers in `a` and `b`, and returns the packed |
| /// minimum values. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pminsw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_min_epi16(a: __m128i, b: __m128i) -> __m128i { |
| transmute(pminsw(a.as_i16x8(), b.as_i16x8())) |
| } |
| |
| /// Compares packed unsigned 8-bit integers in `a` and `b`, and returns the |
| /// packed minimum values. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_epu8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pminub))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_min_epu8(a: __m128i, b: __m128i) -> __m128i { |
| transmute(pminub(a.as_u8x16(), b.as_u8x16())) |
| } |
| |
| /// Multiplies the packed 16-bit integers in `a` and `b`. |
| /// |
| /// The multiplication produces intermediate 32-bit integers, and returns the |
| /// high 16 bits of the intermediate integers. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pmulhw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_mulhi_epi16(a: __m128i, b: __m128i) -> __m128i { |
| transmute(pmulhw(a.as_i16x8(), b.as_i16x8())) |
| } |
| |
| /// Multiplies the packed unsigned 16-bit integers in `a` and `b`. |
| /// |
| /// The multiplication produces intermediate 32-bit integers, and returns the |
| /// high 16 bits of the intermediate integers. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mulhi_epu16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pmulhuw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_mulhi_epu16(a: __m128i, b: __m128i) -> __m128i { |
| transmute(pmulhuw(a.as_u16x8(), b.as_u16x8())) |
| } |
| |
| /// Multiplies the packed 16-bit integers in `a` and `b`. |
| /// |
| /// The multiplication produces intermediate 32-bit integers, and returns the |
| /// low 16 bits of the intermediate integers. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mullo_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pmullw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_mullo_epi16(a: __m128i, b: __m128i) -> __m128i { |
| transmute(simd_mul(a.as_i16x8(), b.as_i16x8())) |
| } |
| |
| /// Multiplies the low unsigned 32-bit integers from each packed 64-bit element |
| /// in `a` and `b`. |
| /// |
| /// Returns the unsigned 64-bit results. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_epu32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pmuludq))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_mul_epu32(a: __m128i, b: __m128i) -> __m128i { |
| transmute(pmuludq(a.as_u32x4(), b.as_u32x4())) |
| } |
| |
| /// Sum the absolute differences of packed unsigned 8-bit integers. |
| /// |
| /// Computes the absolute differences of packed unsigned 8-bit integers in `a` |
| /// and `b`, then horizontally sum each consecutive 8 differences to produce |
| /// two unsigned 16-bit integers, and pack these unsigned 16-bit integers in |
| /// the low 16 bits of 64-bit elements returned. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sad_epu8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psadbw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_sad_epu8(a: __m128i, b: __m128i) -> __m128i { |
| transmute(psadbw(a.as_u8x16(), b.as_u8x16())) |
| } |
| |
| /// Subtracts packed 8-bit integers in `b` from packed 8-bit integers in `a`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psubb))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_sub_epi8(a: __m128i, b: __m128i) -> __m128i { |
| transmute(simd_sub(a.as_i8x16(), b.as_i8x16())) |
| } |
| |
| /// Subtracts packed 16-bit integers in `b` from packed 16-bit integers in `a`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psubw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_sub_epi16(a: __m128i, b: __m128i) -> __m128i { |
| transmute(simd_sub(a.as_i16x8(), b.as_i16x8())) |
| } |
| |
| /// Subtract packed 32-bit integers in `b` from packed 32-bit integers in `a`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psubd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_sub_epi32(a: __m128i, b: __m128i) -> __m128i { |
| transmute(simd_sub(a.as_i32x4(), b.as_i32x4())) |
| } |
| |
| /// Subtract packed 64-bit integers in `b` from packed 64-bit integers in `a`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_epi64) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psubq))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_sub_epi64(a: __m128i, b: __m128i) -> __m128i { |
| transmute(simd_sub(a.as_i64x2(), b.as_i64x2())) |
| } |
| |
| /// Subtract packed 8-bit integers in `b` from packed 8-bit integers in `a` |
| /// using saturation. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psubsb))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_subs_epi8(a: __m128i, b: __m128i) -> __m128i { |
| transmute(simd_saturating_sub(a.as_i8x16(), b.as_i8x16())) |
| } |
| |
| /// Subtract packed 16-bit integers in `b` from packed 16-bit integers in `a` |
| /// using saturation. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psubsw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_subs_epi16(a: __m128i, b: __m128i) -> __m128i { |
| transmute(simd_saturating_sub(a.as_i16x8(), b.as_i16x8())) |
| } |
| |
| /// Subtract packed unsigned 8-bit integers in `b` from packed unsigned 8-bit |
| /// integers in `a` using saturation. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psubusb))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_subs_epu8(a: __m128i, b: __m128i) -> __m128i { |
| transmute(simd_saturating_sub(a.as_u8x16(), b.as_u8x16())) |
| } |
| |
| /// Subtract packed unsigned 16-bit integers in `b` from packed unsigned 16-bit |
| /// integers in `a` using saturation. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_subs_epu16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psubusw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_subs_epu16(a: __m128i, b: __m128i) -> __m128i { |
| transmute(simd_saturating_sub(a.as_u16x8(), b.as_u16x8())) |
| } |
| |
| /// Shifts `a` left by `IMM8` bytes while shifting in zeros. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_slli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
| static_assert_imm8!(IMM8); |
| _mm_slli_si128_impl::<IMM8>(a) |
| } |
| |
| /// Implementation detail: converts the immediate argument of the |
| /// `_mm_slli_si128` intrinsic into a compile-time constant. |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| unsafe fn _mm_slli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i { |
| const fn mask(shift: i32, i: u32) -> u32 { |
| let shift = shift as u32 & 0xff; |
| if shift > 15 { |
| i |
| } else { |
| 16 - shift + i |
| } |
| } |
| let zero = _mm_set1_epi8(0).as_i8x16(); |
| transmute::<i8x16, _>(simd_shuffle16!( |
| zero, |
| a.as_i8x16(), |
| <const IMM8: i32> [ |
| mask(IMM8, 0), |
| mask(IMM8, 1), |
| mask(IMM8, 2), |
| mask(IMM8, 3), |
| mask(IMM8, 4), |
| mask(IMM8, 5), |
| mask(IMM8, 6), |
| mask(IMM8, 7), |
| mask(IMM8, 8), |
| mask(IMM8, 9), |
| mask(IMM8, 10), |
| mask(IMM8, 11), |
| mask(IMM8, 12), |
| mask(IMM8, 13), |
| mask(IMM8, 14), |
| mask(IMM8, 15), |
| ], |
| )) |
| } |
| |
| /// Shifts `a` left by `IMM8` bytes while shifting in zeros. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bslli_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pslldq, IMM8 = 1))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_bslli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
| static_assert_imm8!(IMM8); |
| _mm_slli_si128_impl::<IMM8>(a) |
| } |
| |
| /// Shifts `a` right by `IMM8` bytes while shifting in zeros. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_bsrli_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_bsrli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
| static_assert_imm8!(IMM8); |
| _mm_srli_si128_impl::<IMM8>(a) |
| } |
| |
| /// Shifts packed 16-bit integers in `a` left by `IMM8` while shifting in zeros. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psllw, IMM8 = 7))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_slli_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
| static_assert_imm8!(IMM8); |
| transmute(pslliw(a.as_i16x8(), IMM8)) |
| } |
| |
| /// Shifts packed 16-bit integers in `a` left by `count` while shifting in |
| /// zeros. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psllw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_sll_epi16(a: __m128i, count: __m128i) -> __m128i { |
| transmute(psllw(a.as_i16x8(), count.as_i16x8())) |
| } |
| |
| /// Shifts packed 32-bit integers in `a` left by `IMM8` while shifting in zeros. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pslld, IMM8 = 7))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_slli_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
| static_assert_imm8!(IMM8); |
| transmute(psllid(a.as_i32x4(), IMM8)) |
| } |
| |
| /// Shifts packed 32-bit integers in `a` left by `count` while shifting in |
| /// zeros. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pslld))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_sll_epi32(a: __m128i, count: __m128i) -> __m128i { |
| transmute(pslld(a.as_i32x4(), count.as_i32x4())) |
| } |
| |
| /// Shifts packed 64-bit integers in `a` left by `IMM8` while shifting in zeros. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_slli_epi64) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psllq, IMM8 = 7))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_slli_epi64<const IMM8: i32>(a: __m128i) -> __m128i { |
| static_assert_imm8!(IMM8); |
| transmute(pslliq(a.as_i64x2(), IMM8)) |
| } |
| |
| /// Shifts packed 64-bit integers in `a` left by `count` while shifting in |
| /// zeros. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sll_epi64) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psllq))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_sll_epi64(a: __m128i, count: __m128i) -> __m128i { |
| transmute(psllq(a.as_i64x2(), count.as_i64x2())) |
| } |
| |
| /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in sign |
| /// bits. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psraw, IMM8 = 1))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_srai_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
| static_assert_imm8!(IMM8); |
| transmute(psraiw(a.as_i16x8(), IMM8)) |
| } |
| |
| /// Shifts packed 16-bit integers in `a` right by `count` while shifting in sign |
| /// bits. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psraw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_sra_epi16(a: __m128i, count: __m128i) -> __m128i { |
| transmute(psraw(a.as_i16x8(), count.as_i16x8())) |
| } |
| |
| /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in sign |
| /// bits. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srai_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psrad, IMM8 = 1))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_srai_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
| static_assert_imm8!(IMM8); |
| transmute(psraid(a.as_i32x4(), IMM8)) |
| } |
| |
| /// Shifts packed 32-bit integers in `a` right by `count` while shifting in sign |
| /// bits. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sra_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psrad))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_sra_epi32(a: __m128i, count: __m128i) -> __m128i { |
| transmute(psrad(a.as_i32x4(), count.as_i32x4())) |
| } |
| |
| /// Shifts `a` right by `IMM8` bytes while shifting in zeros. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psrldq, IMM8 = 1))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_srli_si128<const IMM8: i32>(a: __m128i) -> __m128i { |
| static_assert_imm8!(IMM8); |
| _mm_srli_si128_impl::<IMM8>(a) |
| } |
| |
| /// Implementation detail: converts the immediate argument of the |
| /// `_mm_srli_si128` intrinsic into a compile-time constant. |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| unsafe fn _mm_srli_si128_impl<const IMM8: i32>(a: __m128i) -> __m128i { |
| const fn mask(shift: i32, i: u32) -> u32 { |
| if (shift as u32) > 15 { |
| i + 16 |
| } else { |
| i + (shift as u32) |
| } |
| } |
| let zero = _mm_set1_epi8(0).as_i8x16(); |
| let x: i8x16 = simd_shuffle16!( |
| a.as_i8x16(), |
| zero, |
| <const IMM8: i32> [ |
| mask(IMM8, 0), |
| mask(IMM8, 1), |
| mask(IMM8, 2), |
| mask(IMM8, 3), |
| mask(IMM8, 4), |
| mask(IMM8, 5), |
| mask(IMM8, 6), |
| mask(IMM8, 7), |
| mask(IMM8, 8), |
| mask(IMM8, 9), |
| mask(IMM8, 10), |
| mask(IMM8, 11), |
| mask(IMM8, 12), |
| mask(IMM8, 13), |
| mask(IMM8, 14), |
| mask(IMM8, 15), |
| ], |
| ); |
| transmute(x) |
| } |
| |
| /// Shifts packed 16-bit integers in `a` right by `IMM8` while shifting in |
| /// zeros. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psrlw, IMM8 = 1))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_srli_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
| static_assert_imm8!(IMM8); |
| transmute(psrliw(a.as_i16x8(), IMM8)) |
| } |
| |
| /// Shifts packed 16-bit integers in `a` right by `count` while shifting in |
| /// zeros. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psrlw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_srl_epi16(a: __m128i, count: __m128i) -> __m128i { |
| transmute(psrlw(a.as_i16x8(), count.as_i16x8())) |
| } |
| |
| /// Shifts packed 32-bit integers in `a` right by `IMM8` while shifting in |
| /// zeros. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psrld, IMM8 = 8))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_srli_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
| static_assert_imm8!(IMM8); |
| transmute(psrlid(a.as_i32x4(), IMM8)) |
| } |
| |
| /// Shifts packed 32-bit integers in `a` right by `count` while shifting in |
| /// zeros. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psrld))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_srl_epi32(a: __m128i, count: __m128i) -> __m128i { |
| transmute(psrld(a.as_i32x4(), count.as_i32x4())) |
| } |
| |
| /// Shifts packed 64-bit integers in `a` right by `IMM8` while shifting in |
| /// zeros. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srli_epi64) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psrlq, IMM8 = 1))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_srli_epi64<const IMM8: i32>(a: __m128i) -> __m128i { |
| static_assert_imm8!(IMM8); |
| transmute(psrliq(a.as_i64x2(), IMM8)) |
| } |
| |
| /// Shifts packed 64-bit integers in `a` right by `count` while shifting in |
| /// zeros. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_srl_epi64) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(psrlq))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_srl_epi64(a: __m128i, count: __m128i) -> __m128i { |
| transmute(psrlq(a.as_i64x2(), count.as_i64x2())) |
| } |
| |
| /// Computes the bitwise AND of 128 bits (representing integer data) in `a` and |
| /// `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(andps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_and_si128(a: __m128i, b: __m128i) -> __m128i { |
| simd_and(a, b) |
| } |
| |
| /// Computes the bitwise NOT of 128 bits (representing integer data) in `a` and |
| /// then AND with `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(andnps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_andnot_si128(a: __m128i, b: __m128i) -> __m128i { |
| simd_and(simd_xor(_mm_set1_epi8(-1), a), b) |
| } |
| |
| /// Computes the bitwise OR of 128 bits (representing integer data) in `a` and |
| /// `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(orps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_or_si128(a: __m128i, b: __m128i) -> __m128i { |
| simd_or(a, b) |
| } |
| |
| /// Computes the bitwise XOR of 128 bits (representing integer data) in `a` and |
| /// `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(xorps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_xor_si128(a: __m128i, b: __m128i) -> __m128i { |
| simd_xor(a, b) |
| } |
| |
| /// Compares packed 8-bit integers in `a` and `b` for equality. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pcmpeqb))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpeq_epi8(a: __m128i, b: __m128i) -> __m128i { |
| transmute::<i8x16, _>(simd_eq(a.as_i8x16(), b.as_i8x16())) |
| } |
| |
| /// Compares packed 16-bit integers in `a` and `b` for equality. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pcmpeqw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpeq_epi16(a: __m128i, b: __m128i) -> __m128i { |
| transmute::<i16x8, _>(simd_eq(a.as_i16x8(), b.as_i16x8())) |
| } |
| |
| /// Compares packed 32-bit integers in `a` and `b` for equality. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pcmpeqd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpeq_epi32(a: __m128i, b: __m128i) -> __m128i { |
| transmute::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4())) |
| } |
| |
| /// Compares packed 8-bit integers in `a` and `b` for greater-than. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pcmpgtb))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpgt_epi8(a: __m128i, b: __m128i) -> __m128i { |
| transmute::<i8x16, _>(simd_gt(a.as_i8x16(), b.as_i8x16())) |
| } |
| |
| /// Compares packed 16-bit integers in `a` and `b` for greater-than. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pcmpgtw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpgt_epi16(a: __m128i, b: __m128i) -> __m128i { |
| transmute::<i16x8, _>(simd_gt(a.as_i16x8(), b.as_i16x8())) |
| } |
| |
| /// Compares packed 32-bit integers in `a` and `b` for greater-than. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pcmpgtd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpgt_epi32(a: __m128i, b: __m128i) -> __m128i { |
| transmute::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4())) |
| } |
| |
| /// Compares packed 8-bit integers in `a` and `b` for less-than. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pcmpgtb))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmplt_epi8(a: __m128i, b: __m128i) -> __m128i { |
| transmute::<i8x16, _>(simd_lt(a.as_i8x16(), b.as_i8x16())) |
| } |
| |
| /// Compares packed 16-bit integers in `a` and `b` for less-than. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pcmpgtw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmplt_epi16(a: __m128i, b: __m128i) -> __m128i { |
| transmute::<i16x8, _>(simd_lt(a.as_i16x8(), b.as_i16x8())) |
| } |
| |
| /// Compares packed 32-bit integers in `a` and `b` for less-than. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pcmpgtd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmplt_epi32(a: __m128i, b: __m128i) -> __m128i { |
| transmute::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4())) |
| } |
| |
| /// Converts the lower two packed 32-bit integers in `a` to packed |
| /// double-precision (64-bit) floating-point elements. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cvtdq2pd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cvtepi32_pd(a: __m128i) -> __m128d { |
| let a = a.as_i32x4(); |
| simd_cast::<i32x2, __m128d>(simd_shuffle2!(a, a, [0, 1])) |
| } |
| |
| /// Returns `a` with its lower element replaced by `b` after converting it to |
| /// an `f64`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cvtsi2sd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cvtsi32_sd(a: __m128d, b: i32) -> __m128d { |
| simd_insert(a, 0, b as f64) |
| } |
| |
| /// Converts packed 32-bit integers in `a` to packed single-precision (32-bit) |
| /// floating-point elements. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtepi32_ps) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cvtdq2ps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cvtepi32_ps(a: __m128i) -> __m128 { |
| cvtdq2ps(a.as_i32x4()) |
| } |
| |
| /// Converts packed single-precision (32-bit) floating-point elements in `a` |
| /// to packed 32-bit integers. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cvtps2dq))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cvtps_epi32(a: __m128) -> __m128i { |
| transmute(cvtps2dq(a)) |
| } |
| |
| /// Returns a vector whose lowest element is `a` and all higher elements are |
| /// `0`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi32_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(all(test, target_arch = "x86_64"), assert_instr(movd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cvtsi32_si128(a: i32) -> __m128i { |
| transmute(i32x4::new(a, 0, 0, 0)) |
| } |
| |
| /// Returns the lowest element of `a`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsi128_si32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cvtsi128_si32(a: __m128i) -> i32 { |
| simd_extract(a.as_i32x4(), 0) |
| } |
| |
| /// Sets packed 64-bit integers with the supplied values, from highest to |
| /// lowest. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi64x) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| // no particular instruction to test |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_set_epi64x(e1: i64, e0: i64) -> __m128i { |
| transmute(i64x2::new(e0, e1)) |
| } |
| |
| /// Sets packed 32-bit integers with the supplied values. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| // no particular instruction to test |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_set_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { |
| transmute(i32x4::new(e0, e1, e2, e3)) |
| } |
| |
| /// Sets packed 16-bit integers with the supplied values. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| // no particular instruction to test |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_set_epi16( |
| e7: i16, |
| e6: i16, |
| e5: i16, |
| e4: i16, |
| e3: i16, |
| e2: i16, |
| e1: i16, |
| e0: i16, |
| ) -> __m128i { |
| transmute(i16x8::new(e0, e1, e2, e3, e4, e5, e6, e7)) |
| } |
| |
| /// Sets packed 8-bit integers with the supplied values. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_epi8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| // no particular instruction to test |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_set_epi8( |
| e15: i8, |
| e14: i8, |
| e13: i8, |
| e12: i8, |
| e11: i8, |
| e10: i8, |
| e9: i8, |
| e8: i8, |
| e7: i8, |
| e6: i8, |
| e5: i8, |
| e4: i8, |
| e3: i8, |
| e2: i8, |
| e1: i8, |
| e0: i8, |
| ) -> __m128i { |
| #[rustfmt::skip] |
| transmute(i8x16::new( |
| e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, |
| )) |
| } |
| |
| /// Broadcasts 64-bit integer `a` to all elements. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi64x) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| // no particular instruction to test |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_set1_epi64x(a: i64) -> __m128i { |
| _mm_set_epi64x(a, a) |
| } |
| |
| /// Broadcasts 32-bit integer `a` to all elements. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| // no particular instruction to test |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_set1_epi32(a: i32) -> __m128i { |
| _mm_set_epi32(a, a, a, a) |
| } |
| |
| /// Broadcasts 16-bit integer `a` to all elements. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| // no particular instruction to test |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_set1_epi16(a: i16) -> __m128i { |
| _mm_set_epi16(a, a, a, a, a, a, a, a) |
| } |
| |
| /// Broadcasts 8-bit integer `a` to all elements. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_epi8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| // no particular instruction to test |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_set1_epi8(a: i8) -> __m128i { |
| _mm_set_epi8(a, a, a, a, a, a, a, a, a, a, a, a, a, a, a, a) |
| } |
| |
| /// Sets packed 32-bit integers with the supplied values in reverse order. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| // no particular instruction to test |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_setr_epi32(e3: i32, e2: i32, e1: i32, e0: i32) -> __m128i { |
| _mm_set_epi32(e0, e1, e2, e3) |
| } |
| |
| /// Sets packed 16-bit integers with the supplied values in reverse order. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| // no particular instruction to test |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_setr_epi16( |
| e7: i16, |
| e6: i16, |
| e5: i16, |
| e4: i16, |
| e3: i16, |
| e2: i16, |
| e1: i16, |
| e0: i16, |
| ) -> __m128i { |
| _mm_set_epi16(e0, e1, e2, e3, e4, e5, e6, e7) |
| } |
| |
| /// Sets packed 8-bit integers with the supplied values in reverse order. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_epi8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| // no particular instruction to test |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_setr_epi8( |
| e15: i8, |
| e14: i8, |
| e13: i8, |
| e12: i8, |
| e11: i8, |
| e10: i8, |
| e9: i8, |
| e8: i8, |
| e7: i8, |
| e6: i8, |
| e5: i8, |
| e4: i8, |
| e3: i8, |
| e2: i8, |
| e1: i8, |
| e0: i8, |
| ) -> __m128i { |
| #[rustfmt::skip] |
| _mm_set_epi8( |
| e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, |
| ) |
| } |
| |
| /// Returns a vector with all elements set to zero. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(xorps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_setzero_si128() -> __m128i { |
| _mm_set1_epi64x(0) |
| } |
| |
| /// Loads 64-bit integer from memory into first element of returned vector. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_epi64) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| // FIXME movsd on windows |
| #[cfg_attr( |
| all( |
| test, |
| not(windows), |
| not(all(target_os = "linux", target_arch = "x86_64")), |
| target_arch = "x86_64" |
| ), |
| assert_instr(movq) |
| )] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_loadl_epi64(mem_addr: *const __m128i) -> __m128i { |
| _mm_set_epi64x(0, ptr::read_unaligned(mem_addr as *const i64)) |
| } |
| |
| /// Loads 128-bits of integer data from memory into a new vector. |
| /// |
| /// `mem_addr` must be aligned on a 16-byte boundary. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(movaps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_load_si128(mem_addr: *const __m128i) -> __m128i { |
| *mem_addr |
| } |
| |
| /// Loads 128-bits of integer data from memory into a new vector. |
| /// |
| /// `mem_addr` does not need to be aligned on any particular boundary. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(movups))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_loadu_si128(mem_addr: *const __m128i) -> __m128i { |
| let mut dst: __m128i = _mm_undefined_si128(); |
| ptr::copy_nonoverlapping( |
| mem_addr as *const u8, |
| &mut dst as *mut __m128i as *mut u8, |
| mem::size_of::<__m128i>(), |
| ); |
| dst |
| } |
| |
| /// Conditionally store 8-bit integer elements from `a` into memory using |
| /// `mask`. |
| /// |
| /// Elements are not stored when the highest bit is not set in the |
| /// corresponding element. |
| /// |
| /// `mem_addr` should correspond to a 128-bit memory location and does not need |
| /// to be aligned on any particular boundary. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_maskmoveu_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(maskmovdqu))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_maskmoveu_si128(a: __m128i, mask: __m128i, mem_addr: *mut i8) { |
| maskmovdqu(a.as_i8x16(), mask.as_i8x16(), mem_addr) |
| } |
| |
| /// Stores 128-bits of integer data from `a` into memory. |
| /// |
| /// `mem_addr` must be aligned on a 16-byte boundary. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(movaps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_store_si128(mem_addr: *mut __m128i, a: __m128i) { |
| *mem_addr = a; |
| } |
| |
| /// Stores 128-bits of integer data from `a` into memory. |
| /// |
| /// `mem_addr` does not need to be aligned on any particular boundary. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(movups))] // FIXME movdqu expected |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_storeu_si128(mem_addr: *mut __m128i, a: __m128i) { |
| storeudq(mem_addr as *mut i8, a); |
| } |
| |
| /// Stores the lower 64-bit integer `a` to a memory location. |
| /// |
| /// `mem_addr` does not need to be aligned on any particular boundary. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_epi64) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| // FIXME mov on windows, movlps on i686 |
| #[cfg_attr( |
| all( |
| test, |
| not(windows), |
| not(all(target_os = "linux", target_arch = "x86_64")), |
| target_arch = "x86_64" |
| ), |
| assert_instr(movq) |
| )] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_storel_epi64(mem_addr: *mut __m128i, a: __m128i) { |
| ptr::copy_nonoverlapping(&a as *const _ as *const u8, mem_addr as *mut u8, 8); |
| } |
| |
| /// Stores a 128-bit integer vector to a 128-bit aligned memory location. |
| /// To minimize caching, the data is flagged as non-temporal (unlikely to be |
| /// used again soon). |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(movntps))] // FIXME movntdq |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_stream_si128(mem_addr: *mut __m128i, a: __m128i) { |
| intrinsics::nontemporal_store(mem_addr, a); |
| } |
| |
| /// Stores a 32-bit integer value in the specified memory location. |
| /// To minimize caching, the data is flagged as non-temporal (unlikely to be |
| /// used again soon). |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_si32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(movnti))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_stream_si32(mem_addr: *mut i32, a: i32) { |
| intrinsics::nontemporal_store(mem_addr, a); |
| } |
| |
| /// Returns a vector where the low element is extracted from `a` and its upper |
| /// element is zero. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_epi64) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| // FIXME movd on windows, movd on i686 |
| #[cfg_attr(all(test, not(windows), target_arch = "x86_64"), assert_instr(movq))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_move_epi64(a: __m128i) -> __m128i { |
| let zero = _mm_setzero_si128(); |
| let r: i64x2 = simd_shuffle2!(a.as_i64x2(), zero.as_i64x2(), [0, 2]); |
| transmute(r) |
| } |
| |
| /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers |
| /// using signed saturation. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(packsswb))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_packs_epi16(a: __m128i, b: __m128i) -> __m128i { |
| transmute(packsswb(a.as_i16x8(), b.as_i16x8())) |
| } |
| |
| /// Converts packed 32-bit integers from `a` and `b` to packed 16-bit integers |
| /// using signed saturation. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packs_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(packssdw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_packs_epi32(a: __m128i, b: __m128i) -> __m128i { |
| transmute(packssdw(a.as_i32x4(), b.as_i32x4())) |
| } |
| |
| /// Converts packed 16-bit integers from `a` and `b` to packed 8-bit integers |
| /// using unsigned saturation. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_packus_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(packuswb))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_packus_epi16(a: __m128i, b: __m128i) -> __m128i { |
| transmute(packuswb(a.as_i16x8(), b.as_i16x8())) |
| } |
| |
| /// Returns the `imm8` element of `a`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_extract_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pextrw, IMM8 = 7))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_extract_epi16<const IMM8: i32>(a: __m128i) -> i32 { |
| static_assert_imm3!(IMM8); |
| simd_extract::<_, u16>(a.as_u16x8(), IMM8 as u32) as i32 |
| } |
| |
| /// Returns a new vector where the `imm8` element of `a` is replaced with `i`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_insert_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pinsrw, IMM8 = 7))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_insert_epi16<const IMM8: i32>(a: __m128i, i: i32) -> __m128i { |
| static_assert_imm3!(IMM8); |
| transmute(simd_insert(a.as_i16x8(), IMM8 as u32, i as i16)) |
| } |
| |
| /// Returns a mask of the most significant bit of each element in `a`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_epi8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pmovmskb))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_movemask_epi8(a: __m128i) -> i32 { |
| simd_bitmask::<_, u16>(a.as_i8x16()) as u32 as i32 |
| } |
| |
| /// Shuffles 32-bit integers in `a` using the control in `IMM8`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pshufd, IMM8 = 9))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_shuffle_epi32<const IMM8: i32>(a: __m128i) -> __m128i { |
| static_assert_imm8!(IMM8); |
| let a = a.as_i32x4(); |
| let x: i32x4 = simd_shuffle4!( |
| a, |
| a, |
| <const IMM8: i32> [ |
| IMM8 as u32 & 0b11, |
| (IMM8 as u32 >> 2) & 0b11, |
| (IMM8 as u32 >> 4) & 0b11, |
| (IMM8 as u32 >> 6) & 0b11, |
| ], |
| ); |
| transmute(x) |
| } |
| |
| /// Shuffles 16-bit integers in the high 64 bits of `a` using the control in |
| /// `IMM8`. |
| /// |
| /// Put the results in the high 64 bits of the returned vector, with the low 64 |
| /// bits being copied from from `a`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflehi_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pshufhw, IMM8 = 9))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_shufflehi_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
| static_assert_imm8!(IMM8); |
| let a = a.as_i16x8(); |
| let x: i16x8 = simd_shuffle8!( |
| a, |
| a, |
| <const IMM8: i32> [ |
| 0, |
| 1, |
| 2, |
| 3, |
| (IMM8 as u32 & 0b11) + 4, |
| ((IMM8 as u32 >> 2) & 0b11) + 4, |
| ((IMM8 as u32 >> 4) & 0b11) + 4, |
| ((IMM8 as u32 >> 6) & 0b11) + 4, |
| ], |
| ); |
| transmute(x) |
| } |
| |
| /// Shuffles 16-bit integers in the low 64 bits of `a` using the control in |
| /// `IMM8`. |
| /// |
| /// Put the results in the low 64 bits of the returned vector, with the high 64 |
| /// bits being copied from from `a`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shufflelo_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(pshuflw, IMM8 = 9))] |
| #[rustc_legacy_const_generics(1)] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_shufflelo_epi16<const IMM8: i32>(a: __m128i) -> __m128i { |
| static_assert_imm8!(IMM8); |
| let a = a.as_i16x8(); |
| let x: i16x8 = simd_shuffle8!( |
| a, |
| a, |
| <const IMM8: i32> [ |
| IMM8 as u32 & 0b11, |
| (IMM8 as u32 >> 2) & 0b11, |
| (IMM8 as u32 >> 4) & 0b11, |
| (IMM8 as u32 >> 6) & 0b11, |
| 4, |
| 5, |
| 6, |
| 7, |
| ], |
| ); |
| transmute(x) |
| } |
| |
| /// Unpacks and interleave 8-bit integers from the high half of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(punpckhbw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_unpackhi_epi8(a: __m128i, b: __m128i) -> __m128i { |
| transmute::<i8x16, _>(simd_shuffle16!( |
| a.as_i8x16(), |
| b.as_i8x16(), |
| [8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31], |
| )) |
| } |
| |
| /// Unpacks and interleave 16-bit integers from the high half of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(punpckhwd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_unpackhi_epi16(a: __m128i, b: __m128i) -> __m128i { |
| let x = simd_shuffle8!(a.as_i16x8(), b.as_i16x8(), [4, 12, 5, 13, 6, 14, 7, 15]); |
| transmute::<i16x8, _>(x) |
| } |
| |
| /// Unpacks and interleave 32-bit integers from the high half of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(unpckhps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_unpackhi_epi32(a: __m128i, b: __m128i) -> __m128i { |
| transmute::<i32x4, _>(simd_shuffle4!(a.as_i32x4(), b.as_i32x4(), [2, 6, 3, 7])) |
| } |
| |
| /// Unpacks and interleave 64-bit integers from the high half of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_epi64) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(unpckhpd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_unpackhi_epi64(a: __m128i, b: __m128i) -> __m128i { |
| transmute::<i64x2, _>(simd_shuffle2!(a.as_i64x2(), b.as_i64x2(), [1, 3])) |
| } |
| |
| /// Unpacks and interleave 8-bit integers from the low half of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi8) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(punpcklbw))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_unpacklo_epi8(a: __m128i, b: __m128i) -> __m128i { |
| transmute::<i8x16, _>(simd_shuffle16!( |
| a.as_i8x16(), |
| b.as_i8x16(), |
| [0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23], |
| )) |
| } |
| |
| /// Unpacks and interleave 16-bit integers from the low half of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi16) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(punpcklwd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_unpacklo_epi16(a: __m128i, b: __m128i) -> __m128i { |
| let x = simd_shuffle8!(a.as_i16x8(), b.as_i16x8(), [0, 8, 1, 9, 2, 10, 3, 11]); |
| transmute::<i16x8, _>(x) |
| } |
| |
| /// Unpacks and interleave 32-bit integers from the low half of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(unpcklps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_unpacklo_epi32(a: __m128i, b: __m128i) -> __m128i { |
| transmute::<i32x4, _>(simd_shuffle4!(a.as_i32x4(), b.as_i32x4(), [0, 4, 1, 5])) |
| } |
| |
| /// Unpacks and interleave 64-bit integers from the low half of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_epi64) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_unpacklo_epi64(a: __m128i, b: __m128i) -> __m128i { |
| transmute::<i64x2, _>(simd_shuffle2!(a.as_i64x2(), b.as_i64x2(), [0, 2])) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by the sum of the |
| /// low elements of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(addsd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_add_sd(a: __m128d, b: __m128d) -> __m128d { |
| simd_insert(a, 0, _mm_cvtsd_f64(a) + _mm_cvtsd_f64(b)) |
| } |
| |
| /// Adds packed double-precision (64-bit) floating-point elements in `a` and |
| /// `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_add_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(addpd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_add_pd(a: __m128d, b: __m128d) -> __m128d { |
| simd_add(a, b) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by the result of |
| /// diving the lower element of `a` by the lower element of `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(divsd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_div_sd(a: __m128d, b: __m128d) -> __m128d { |
| simd_insert(a, 0, _mm_cvtsd_f64(a) / _mm_cvtsd_f64(b)) |
| } |
| |
| /// Divide packed double-precision (64-bit) floating-point elements in `a` by |
| /// packed elements in `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_div_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(divpd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_div_pd(a: __m128d, b: __m128d) -> __m128d { |
| simd_div(a, b) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by the maximum |
| /// of the lower elements of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(maxsd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_max_sd(a: __m128d, b: __m128d) -> __m128d { |
| maxsd(a, b) |
| } |
| |
| /// Returns a new vector with the maximum values from corresponding elements in |
| /// `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_max_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(maxpd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_max_pd(a: __m128d, b: __m128d) -> __m128d { |
| maxpd(a, b) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by the minimum |
| /// of the lower elements of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(minsd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_min_sd(a: __m128d, b: __m128d) -> __m128d { |
| minsd(a, b) |
| } |
| |
| /// Returns a new vector with the minimum values from corresponding elements in |
| /// `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_min_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(minpd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_min_pd(a: __m128d, b: __m128d) -> __m128d { |
| minpd(a, b) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by multiplying the |
| /// low elements of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(mulsd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_mul_sd(a: __m128d, b: __m128d) -> __m128d { |
| simd_insert(a, 0, _mm_cvtsd_f64(a) * _mm_cvtsd_f64(b)) |
| } |
| |
| /// Multiplies packed double-precision (64-bit) floating-point elements in `a` |
| /// and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_mul_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(mulpd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_mul_pd(a: __m128d, b: __m128d) -> __m128d { |
| simd_mul(a, b) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by the square |
| /// root of the lower element `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(sqrtsd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_sqrt_sd(a: __m128d, b: __m128d) -> __m128d { |
| simd_insert(a, 0, _mm_cvtsd_f64(sqrtsd(b))) |
| } |
| |
| /// Returns a new vector with the square root of each of the values in `a`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sqrt_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(sqrtpd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_sqrt_pd(a: __m128d) -> __m128d { |
| simd_fsqrt(a) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by subtracting the |
| /// low element by `b` from the low element of `a`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(subsd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_sub_sd(a: __m128d, b: __m128d) -> __m128d { |
| simd_insert(a, 0, _mm_cvtsd_f64(a) - _mm_cvtsd_f64(b)) |
| } |
| |
| /// Subtract packed double-precision (64-bit) floating-point elements in `b` |
| /// from `a`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_sub_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(subpd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_sub_pd(a: __m128d, b: __m128d) -> __m128d { |
| simd_sub(a, b) |
| } |
| |
| /// Computes the bitwise AND of packed double-precision (64-bit) floating-point |
| /// elements in `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_and_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(andps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_and_pd(a: __m128d, b: __m128d) -> __m128d { |
| let a: __m128i = transmute(a); |
| let b: __m128i = transmute(b); |
| transmute(_mm_and_si128(a, b)) |
| } |
| |
| /// Computes the bitwise NOT of `a` and then AND with `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_andnot_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(andnps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_andnot_pd(a: __m128d, b: __m128d) -> __m128d { |
| let a: __m128i = transmute(a); |
| let b: __m128i = transmute(b); |
| transmute(_mm_andnot_si128(a, b)) |
| } |
| |
| /// Computes the bitwise OR of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_or_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(orps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_or_pd(a: __m128d, b: __m128d) -> __m128d { |
| let a: __m128i = transmute(a); |
| let b: __m128i = transmute(b); |
| transmute(_mm_or_si128(a, b)) |
| } |
| |
| /// Computes the bitwise XOR of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_xor_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(xorps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_xor_pd(a: __m128d, b: __m128d) -> __m128d { |
| let a: __m128i = transmute(a); |
| let b: __m128i = transmute(b); |
| transmute(_mm_xor_si128(a, b)) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by the equality |
| /// comparison of the lower elements of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpeqsd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpeq_sd(a: __m128d, b: __m128d) -> __m128d { |
| cmpsd(a, b, 0) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by the less-than |
| /// comparison of the lower elements of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpltsd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmplt_sd(a: __m128d, b: __m128d) -> __m128d { |
| cmpsd(a, b, 1) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by the |
| /// less-than-or-equal comparison of the lower elements of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmplesd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmple_sd(a: __m128d, b: __m128d) -> __m128d { |
| cmpsd(a, b, 2) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by the |
| /// greater-than comparison of the lower elements of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpltsd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpgt_sd(a: __m128d, b: __m128d) -> __m128d { |
| simd_insert(_mm_cmplt_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by the |
| /// greater-than-or-equal comparison of the lower elements of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmplesd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpge_sd(a: __m128d, b: __m128d) -> __m128d { |
| simd_insert(_mm_cmple_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by the result |
| /// of comparing both of the lower elements of `a` and `b` to `NaN`. If |
| /// neither are equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` |
| /// otherwise. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpordsd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpord_sd(a: __m128d, b: __m128d) -> __m128d { |
| cmpsd(a, b, 7) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by the result of |
| /// comparing both of the lower elements of `a` and `b` to `NaN`. If either is |
| /// equal to `NaN` then `0xFFFFFFFFFFFFFFFF` is used and `0` otherwise. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpunordsd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpunord_sd(a: __m128d, b: __m128d) -> __m128d { |
| cmpsd(a, b, 3) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by the not-equal |
| /// comparison of the lower elements of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpneqsd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpneq_sd(a: __m128d, b: __m128d) -> __m128d { |
| cmpsd(a, b, 4) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by the |
| /// not-less-than comparison of the lower elements of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpnltsd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpnlt_sd(a: __m128d, b: __m128d) -> __m128d { |
| cmpsd(a, b, 5) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by the |
| /// not-less-than-or-equal comparison of the lower elements of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpnlesd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpnle_sd(a: __m128d, b: __m128d) -> __m128d { |
| cmpsd(a, b, 6) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by the |
| /// not-greater-than comparison of the lower elements of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpnltsd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpngt_sd(a: __m128d, b: __m128d) -> __m128d { |
| simd_insert(_mm_cmpnlt_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) |
| } |
| |
| /// Returns a new vector with the low element of `a` replaced by the |
| /// not-greater-than-or-equal comparison of the lower elements of `a` and `b`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpnlesd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpnge_sd(a: __m128d, b: __m128d) -> __m128d { |
| simd_insert(_mm_cmpnle_sd(b, a), 1, simd_extract::<_, f64>(a, 1)) |
| } |
| |
| /// Compares corresponding elements in `a` and `b` for equality. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpeq_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpeqpd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpeq_pd(a: __m128d, b: __m128d) -> __m128d { |
| cmppd(a, b, 0) |
| } |
| |
| /// Compares corresponding elements in `a` and `b` for less-than. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmplt_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpltpd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmplt_pd(a: __m128d, b: __m128d) -> __m128d { |
| cmppd(a, b, 1) |
| } |
| |
| /// Compares corresponding elements in `a` and `b` for less-than-or-equal |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmple_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmplepd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmple_pd(a: __m128d, b: __m128d) -> __m128d { |
| cmppd(a, b, 2) |
| } |
| |
| /// Compares corresponding elements in `a` and `b` for greater-than. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpgt_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpltpd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpgt_pd(a: __m128d, b: __m128d) -> __m128d { |
| _mm_cmplt_pd(b, a) |
| } |
| |
| /// Compares corresponding elements in `a` and `b` for greater-than-or-equal. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpge_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmplepd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpge_pd(a: __m128d, b: __m128d) -> __m128d { |
| _mm_cmple_pd(b, a) |
| } |
| |
| /// Compares corresponding elements in `a` and `b` to see if neither is `NaN`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpord_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpordpd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpord_pd(a: __m128d, b: __m128d) -> __m128d { |
| cmppd(a, b, 7) |
| } |
| |
| /// Compares corresponding elements in `a` and `b` to see if either is `NaN`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpunord_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpunordpd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpunord_pd(a: __m128d, b: __m128d) -> __m128d { |
| cmppd(a, b, 3) |
| } |
| |
| /// Compares corresponding elements in `a` and `b` for not-equal. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpneq_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpneqpd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpneq_pd(a: __m128d, b: __m128d) -> __m128d { |
| cmppd(a, b, 4) |
| } |
| |
| /// Compares corresponding elements in `a` and `b` for not-less-than. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnlt_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpnltpd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpnlt_pd(a: __m128d, b: __m128d) -> __m128d { |
| cmppd(a, b, 5) |
| } |
| |
| /// Compares corresponding elements in `a` and `b` for not-less-than-or-equal. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnle_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpnlepd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpnle_pd(a: __m128d, b: __m128d) -> __m128d { |
| cmppd(a, b, 6) |
| } |
| |
| /// Compares corresponding elements in `a` and `b` for not-greater-than. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpngt_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpnltpd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpngt_pd(a: __m128d, b: __m128d) -> __m128d { |
| _mm_cmpnlt_pd(b, a) |
| } |
| |
| /// Compares corresponding elements in `a` and `b` for |
| /// not-greater-than-or-equal. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cmpnge_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cmpnlepd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cmpnge_pd(a: __m128d, b: __m128d) -> __m128d { |
| _mm_cmpnle_pd(b, a) |
| } |
| |
| /// Compares the lower element of `a` and `b` for equality. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comieq_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(comisd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_comieq_sd(a: __m128d, b: __m128d) -> i32 { |
| comieqsd(a, b) |
| } |
| |
| /// Compares the lower element of `a` and `b` for less-than. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comilt_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(comisd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_comilt_sd(a: __m128d, b: __m128d) -> i32 { |
| comiltsd(a, b) |
| } |
| |
| /// Compares the lower element of `a` and `b` for less-than-or-equal. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comile_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(comisd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_comile_sd(a: __m128d, b: __m128d) -> i32 { |
| comilesd(a, b) |
| } |
| |
| /// Compares the lower element of `a` and `b` for greater-than. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comigt_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(comisd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_comigt_sd(a: __m128d, b: __m128d) -> i32 { |
| comigtsd(a, b) |
| } |
| |
| /// Compares the lower element of `a` and `b` for greater-than-or-equal. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comige_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(comisd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_comige_sd(a: __m128d, b: __m128d) -> i32 { |
| comigesd(a, b) |
| } |
| |
| /// Compares the lower element of `a` and `b` for not-equal. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_comineq_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(comisd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_comineq_sd(a: __m128d, b: __m128d) -> i32 { |
| comineqsd(a, b) |
| } |
| |
| /// Compares the lower element of `a` and `b` for equality. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomieq_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(ucomisd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_ucomieq_sd(a: __m128d, b: __m128d) -> i32 { |
| ucomieqsd(a, b) |
| } |
| |
| /// Compares the lower element of `a` and `b` for less-than. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomilt_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(ucomisd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_ucomilt_sd(a: __m128d, b: __m128d) -> i32 { |
| ucomiltsd(a, b) |
| } |
| |
| /// Compares the lower element of `a` and `b` for less-than-or-equal. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomile_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(ucomisd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_ucomile_sd(a: __m128d, b: __m128d) -> i32 { |
| ucomilesd(a, b) |
| } |
| |
| /// Compares the lower element of `a` and `b` for greater-than. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomigt_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(ucomisd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_ucomigt_sd(a: __m128d, b: __m128d) -> i32 { |
| ucomigtsd(a, b) |
| } |
| |
| /// Compares the lower element of `a` and `b` for greater-than-or-equal. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomige_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(ucomisd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_ucomige_sd(a: __m128d, b: __m128d) -> i32 { |
| ucomigesd(a, b) |
| } |
| |
| /// Compares the lower element of `a` and `b` for not-equal. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_ucomineq_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(ucomisd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_ucomineq_sd(a: __m128d, b: __m128d) -> i32 { |
| ucomineqsd(a, b) |
| } |
| |
| /// Converts packed double-precision (64-bit) floating-point elements in `a` to |
| /// packed single-precision (32-bit) floating-point elements |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_ps) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cvtpd2ps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cvtpd_ps(a: __m128d) -> __m128 { |
| cvtpd2ps(a) |
| } |
| |
| /// Converts packed single-precision (32-bit) floating-point elements in `a` to |
| /// packed |
| /// double-precision (64-bit) floating-point elements. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtps_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cvtps2pd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cvtps_pd(a: __m128) -> __m128d { |
| cvtps2pd(a) |
| } |
| |
| /// Converts packed double-precision (64-bit) floating-point elements in `a` to |
| /// packed 32-bit integers. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtpd_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cvtpd2dq))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cvtpd_epi32(a: __m128d) -> __m128i { |
| transmute(cvtpd2dq(a)) |
| } |
| |
| /// Converts the lower double-precision (64-bit) floating-point element in a to |
| /// a 32-bit integer. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_si32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cvtsd2si))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cvtsd_si32(a: __m128d) -> i32 { |
| cvtsd2si(a) |
| } |
| |
| /// Converts the lower double-precision (64-bit) floating-point element in `b` |
| /// to a single-precision (32-bit) floating-point element, store the result in |
| /// the lower element of the return value, and copies the upper element from `a` |
| /// to the upper element the return value. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_ss) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cvtsd2ss))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cvtsd_ss(a: __m128, b: __m128d) -> __m128 { |
| cvtsd2ss(a, b) |
| } |
| |
| /// Returns the lower double-precision (64-bit) floating-point element of `a`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtsd_f64) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cvtsd_f64(a: __m128d) -> f64 { |
| simd_extract(a, 0) |
| } |
| |
| /// Converts the lower single-precision (32-bit) floating-point element in `b` |
| /// to a double-precision (64-bit) floating-point element, store the result in |
| /// the lower element of the return value, and copies the upper element from `a` |
| /// to the upper element the return value. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvtss_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cvtss2sd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cvtss_sd(a: __m128d, b: __m128) -> __m128d { |
| cvtss2sd(a, b) |
| } |
| |
| /// Converts packed double-precision (64-bit) floating-point elements in `a` to |
| /// packed 32-bit integers with truncation. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttpd_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cvttpd2dq))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cvttpd_epi32(a: __m128d) -> __m128i { |
| transmute(cvttpd2dq(a)) |
| } |
| |
| /// Converts the lower double-precision (64-bit) floating-point element in `a` |
| /// to a 32-bit integer with truncation. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttsd_si32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cvttsd2si))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cvttsd_si32(a: __m128d) -> i32 { |
| cvttsd2si(a) |
| } |
| |
| /// Converts packed single-precision (32-bit) floating-point elements in `a` to |
| /// packed 32-bit integers with truncation. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_cvttps_epi32) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(cvttps2dq))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_cvttps_epi32(a: __m128) -> __m128i { |
| transmute(cvttps2dq(a)) |
| } |
| |
| /// Copies double-precision (64-bit) floating-point element `a` to the lower |
| /// element of the packed 64-bit return value. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_set_sd(a: f64) -> __m128d { |
| _mm_set_pd(0.0, a) |
| } |
| |
| /// Broadcasts double-precision (64-bit) floating-point value a to all elements |
| /// of the return value. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set1_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_set1_pd(a: f64) -> __m128d { |
| _mm_set_pd(a, a) |
| } |
| |
| /// Broadcasts double-precision (64-bit) floating-point value a to all elements |
| /// of the return value. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd1) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_set_pd1(a: f64) -> __m128d { |
| _mm_set_pd(a, a) |
| } |
| |
| /// Sets packed double-precision (64-bit) floating-point elements in the return |
| /// value with the supplied values. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_set_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_set_pd(a: f64, b: f64) -> __m128d { |
| __m128d(b, a) |
| } |
| |
| /// Sets packed double-precision (64-bit) floating-point elements in the return |
| /// value with the supplied values in reverse order. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setr_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_setr_pd(a: f64, b: f64) -> __m128d { |
| _mm_set_pd(b, a) |
| } |
| |
| /// Returns packed double-precision (64-bit) floating-point elements with all |
| /// zeros. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_setzero_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(xorps))] // FIXME xorpd expected |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_setzero_pd() -> __m128d { |
| _mm_set_pd(0.0, 0.0) |
| } |
| |
| /// Returns a mask of the most significant bit of each element in `a`. |
| /// |
| /// The mask is stored in the 2 least significant bits of the return value. |
| /// All other bits are set to `0`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_movemask_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(movmskpd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_movemask_pd(a: __m128d) -> i32 { |
| movmskpd(a) |
| } |
| |
| /// Loads 128-bits (composed of 2 packed double-precision (64-bit) |
| /// floating-point elements) from memory into the returned vector. |
| /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection |
| /// exception may be generated. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(movaps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| #[allow(clippy::cast_ptr_alignment)] |
| pub unsafe fn _mm_load_pd(mem_addr: *const f64) -> __m128d { |
| *(mem_addr as *const __m128d) |
| } |
| |
| /// Loads a 64-bit double-precision value to the low element of a |
| /// 128-bit integer vector and clears the upper element. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(movsd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_load_sd(mem_addr: *const f64) -> __m128d { |
| _mm_setr_pd(*mem_addr, 0.) |
| } |
| |
| /// Loads a double-precision value into the high-order bits of a 128-bit |
| /// vector of `[2 x double]`. The low-order bits are copied from the low-order |
| /// bits of the first operand. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadh_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(movhps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_loadh_pd(a: __m128d, mem_addr: *const f64) -> __m128d { |
| _mm_setr_pd(simd_extract(a, 0), *mem_addr) |
| } |
| |
| /// Loads a double-precision value into the low-order bits of a 128-bit |
| /// vector of `[2 x double]`. The high-order bits are copied from the |
| /// high-order bits of the first operand. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadl_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(movlps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_loadl_pd(a: __m128d, mem_addr: *const f64) -> __m128d { |
| _mm_setr_pd(*mem_addr, simd_extract(a, 1)) |
| } |
| |
| /// Stores a 128-bit floating point vector of `[2 x double]` to a 128-bit |
| /// aligned memory location. |
| /// To minimize caching, the data is flagged as non-temporal (unlikely to be |
| /// used again soon). |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_stream_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(movntps))] // FIXME movntpd |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| #[allow(clippy::cast_ptr_alignment)] |
| pub unsafe fn _mm_stream_pd(mem_addr: *mut f64, a: __m128d) { |
| intrinsics::nontemporal_store(mem_addr as *mut __m128d, a); |
| } |
| |
| /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a |
| /// memory location. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_store_sd(mem_addr: *mut f64, a: __m128d) { |
| *mem_addr = simd_extract(a, 0) |
| } |
| |
| /// Stores 128-bits (composed of 2 packed double-precision (64-bit) |
| /// floating-point elements) from `a` into memory. `mem_addr` must be aligned |
| /// on a 16-byte boundary or a general-protection exception may be generated. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(movaps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| #[allow(clippy::cast_ptr_alignment)] |
| pub unsafe fn _mm_store_pd(mem_addr: *mut f64, a: __m128d) { |
| *(mem_addr as *mut __m128d) = a; |
| } |
| |
| /// Stores 128-bits (composed of 2 packed double-precision (64-bit) |
| /// floating-point elements) from `a` into memory. |
| /// `mem_addr` does not need to be aligned on any particular boundary. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeu_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(movups))] // FIXME movupd expected |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_storeu_pd(mem_addr: *mut f64, a: __m128d) { |
| storeupd(mem_addr as *mut i8, a); |
| } |
| |
| /// Stores the lower double-precision (64-bit) floating-point element from `a` |
| /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a |
| /// 16-byte boundary or a general-protection exception may be generated. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store1_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| #[allow(clippy::cast_ptr_alignment)] |
| pub unsafe fn _mm_store1_pd(mem_addr: *mut f64, a: __m128d) { |
| let b: __m128d = simd_shuffle2!(a, a, [0, 0]); |
| *(mem_addr as *mut __m128d) = b; |
| } |
| |
| /// Stores the lower double-precision (64-bit) floating-point element from `a` |
| /// into 2 contiguous elements in memory. `mem_addr` must be aligned on a |
| /// 16-byte boundary or a general-protection exception may be generated. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_store_pd1) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| #[allow(clippy::cast_ptr_alignment)] |
| pub unsafe fn _mm_store_pd1(mem_addr: *mut f64, a: __m128d) { |
| let b: __m128d = simd_shuffle2!(a, a, [0, 0]); |
| *(mem_addr as *mut __m128d) = b; |
| } |
| |
| /// Stores 2 double-precision (64-bit) floating-point elements from `a` into |
| /// memory in reverse order. |
| /// `mem_addr` must be aligned on a 16-byte boundary or a general-protection |
| /// exception may be generated. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storer_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| #[allow(clippy::cast_ptr_alignment)] |
| pub unsafe fn _mm_storer_pd(mem_addr: *mut f64, a: __m128d) { |
| let b: __m128d = simd_shuffle2!(a, a, [1, 0]); |
| *(mem_addr as *mut __m128d) = b; |
| } |
| |
| /// Stores the upper 64 bits of a 128-bit vector of `[2 x double]` to a |
| /// memory location. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storeh_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movhps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_storeh_pd(mem_addr: *mut f64, a: __m128d) { |
| *mem_addr = simd_extract(a, 1); |
| } |
| |
| /// Stores the lower 64 bits of a 128-bit vector of `[2 x double]` to a |
| /// memory location. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_storel_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_storel_pd(mem_addr: *mut f64, a: __m128d) { |
| *mem_addr = simd_extract(a, 0); |
| } |
| |
| /// Loads a double-precision (64-bit) floating-point element from memory |
| /// into both elements of returned vector. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load1_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| // #[cfg_attr(test, assert_instr(movapd))] // FIXME LLVM uses different codegen |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_load1_pd(mem_addr: *const f64) -> __m128d { |
| let d = *mem_addr; |
| _mm_setr_pd(d, d) |
| } |
| |
| /// Loads a double-precision (64-bit) floating-point element from memory |
| /// into both elements of returned vector. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_load_pd1) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| // #[cfg_attr(test, assert_instr(movapd))] // FIXME same as _mm_load1_pd |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_load_pd1(mem_addr: *const f64) -> __m128d { |
| _mm_load1_pd(mem_addr) |
| } |
| |
| /// Loads 2 double-precision (64-bit) floating-point elements from memory into |
| /// the returned vector in reverse order. `mem_addr` must be aligned on a |
| /// 16-byte boundary or a general-protection exception may be generated. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadr_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(movaps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_loadr_pd(mem_addr: *const f64) -> __m128d { |
| let a = _mm_load_pd(mem_addr); |
| simd_shuffle2!(a, a, [1, 0]) |
| } |
| |
| /// Loads 128-bits (composed of 2 packed double-precision (64-bit) |
| /// floating-point elements) from memory into the returned vector. |
| /// `mem_addr` does not need to be aligned on any particular boundary. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_loadu_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(movups))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_loadu_pd(mem_addr: *const f64) -> __m128d { |
| let mut dst = _mm_undefined_pd(); |
| ptr::copy_nonoverlapping( |
| mem_addr as *const u8, |
| &mut dst as *mut __m128d as *mut u8, |
| mem::size_of::<__m128d>(), |
| ); |
| dst |
| } |
| |
| /// Constructs a 128-bit floating-point vector of `[2 x double]` from two |
| /// 128-bit vector parameters of `[2 x double]`, using the immediate-value |
| /// parameter as a specifier. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_shuffle_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(shufps, MASK = 2))] |
| #[rustc_legacy_const_generics(2)] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_shuffle_pd<const MASK: i32>(a: __m128d, b: __m128d) -> __m128d { |
| static_assert_imm8!(MASK); |
| simd_shuffle2!(a, b, <const MASK: i32> [MASK as u32 & 0b1, ((MASK as u32 >> 1) & 0b1) + 2]) |
| } |
| |
| /// Constructs a 128-bit floating-point vector of `[2 x double]`. The lower |
| /// 64 bits are set to the lower 64 bits of the second parameter. The upper |
| /// 64 bits are set to the upper 64 bits of the first parameter. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_move_sd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(movsd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_move_sd(a: __m128d, b: __m128d) -> __m128d { |
| _mm_setr_pd(simd_extract(b, 0), simd_extract(a, 1)) |
| } |
| |
| /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit |
| /// floating-point vector of `[4 x float]`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_ps) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_castpd_ps(a: __m128d) -> __m128 { |
| transmute(a) |
| } |
| |
| /// Casts a 128-bit floating-point vector of `[2 x double]` into a 128-bit |
| /// integer vector. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castpd_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_castpd_si128(a: __m128d) -> __m128i { |
| transmute(a) |
| } |
| |
| /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit |
| /// floating-point vector of `[2 x double]`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_castps_pd(a: __m128) -> __m128d { |
| transmute(a) |
| } |
| |
| /// Casts a 128-bit floating-point vector of `[4 x float]` into a 128-bit |
| /// integer vector. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castps_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_castps_si128(a: __m128) -> __m128i { |
| transmute(a) |
| } |
| |
| /// Casts a 128-bit integer vector into a 128-bit floating-point vector |
| /// of `[2 x double]`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_castsi128_pd(a: __m128i) -> __m128d { |
| transmute(a) |
| } |
| |
| /// Casts a 128-bit integer vector into a 128-bit floating-point vector |
| /// of `[4 x float]`. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_castsi128_ps) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_castsi128_ps(a: __m128i) -> __m128 { |
| transmute(a) |
| } |
| |
| /// Returns vector of type __m128d with undefined elements. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_undefined_pd() -> __m128d { |
| __m128d(0.0, 0.0) |
| } |
| |
| /// Returns vector of type __m128i with undefined elements. |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_undefined_si128) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_undefined_si128() -> __m128i { |
| __m128i(0, 0) |
| } |
| |
| /// The resulting `__m128d` element is composed by the low-order values of |
| /// the two `__m128d` interleaved input elements, i.e.: |
| /// |
| /// * The `[127:64]` bits are copied from the `[127:64]` bits of the second |
| /// input * The `[63:0]` bits are copied from the `[127:64]` bits of the first |
| /// input |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpackhi_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(test, assert_instr(unpckhpd))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_unpackhi_pd(a: __m128d, b: __m128d) -> __m128d { |
| simd_shuffle2!(a, b, [1, 3]) |
| } |
| |
| /// The resulting `__m128d` element is composed by the high-order values of |
| /// the two `__m128d` interleaved input elements, i.e.: |
| /// |
| /// * The `[127:64]` bits are copied from the `[63:0]` bits of the second input |
| /// * The `[63:0]` bits are copied from the `[63:0]` bits of the first input |
| /// |
| /// [Intel's documentation](https://software.intel.com/sites/landingpage/IntrinsicsGuide/#text=_mm_unpacklo_pd) |
| #[inline] |
| #[target_feature(enable = "sse2")] |
| #[cfg_attr(all(test, not(target_os = "windows")), assert_instr(movlhps))] |
| #[stable(feature = "simd_x86", since = "1.27.0")] |
| pub unsafe fn _mm_unpacklo_pd(a: __m128d, b: __m128d) -> __m128d { |
| simd_shuffle2!(a, b, [0, 2]) |
| } |
| |
| #[allow(improper_ctypes)] |
| extern "C" { |
| #[link_name = "llvm.x86.sse2.pause"] |
| fn pause(); |
| #[link_name = "llvm.x86.sse2.clflush"] |
| fn clflush(p: *const u8); |
| #[link_name = "llvm.x86.sse2.lfence"] |
| fn lfence(); |
| #[link_name = "llvm.x86.sse2.mfence"] |
| fn mfence(); |
| #[link_name = "llvm.x86.sse2.pavg.b"] |
| fn pavgb(a: u8x16, b: u8x16) -> u8x16; |
| #[link_name = "llvm.x86.sse2.pavg.w"] |
| fn pavgw(a: u16x8, b: u16x8) -> u16x8; |
| #[link_name = "llvm.x86.sse2.pmadd.wd"] |
| fn pmaddwd(a: i16x8, b: i16x8) -> i32x4; |
| #[link_name = "llvm.x86.sse2.pmaxs.w"] |
| fn pmaxsw(a: i16x8, b: i16x8) -> i16x8; |
| #[link_name = "llvm.x86.sse2.pmaxu.b"] |
| fn pmaxub(a: u8x16, b: u8x16) -> u8x16; |
| #[link_name = "llvm.x86.sse2.pmins.w"] |
| fn pminsw(a: i16x8, b: i16x8) -> i16x8; |
| #[link_name = "llvm.x86.sse2.pminu.b"] |
| fn pminub(a: u8x16, b: u8x16) -> u8x16; |
| #[link_name = "llvm.x86.sse2.pmulh.w"] |
| fn pmulhw(a: i16x8, b: i16x8) -> i16x8; |
| #[link_name = "llvm.x86.sse2.pmulhu.w"] |
| fn pmulhuw(a: u16x8, b: u16x8) -> u16x8; |
| #[link_name = "llvm.x86.sse2.pmulu.dq"] |
| fn pmuludq(a: u32x4, b: u32x4) -> u64x2; |
| #[link_name = "llvm.x86.sse2.psad.bw"] |
| fn psadbw(a: u8x16, b: u8x16) -> u64x2; |
| #[link_name = "llvm.x86.sse2.pslli.w"] |
| fn pslliw(a: i16x8, imm8: i32) -> i16x8; |
| #[link_name = "llvm.x86.sse2.psll.w"] |
| fn psllw(a: i16x8, count: i16x8) -> i16x8; |
| #[link_name = "llvm.x86.sse2.pslli.d"] |
| fn psllid(a: i32x4, imm8: i32) -> i32x4; |
| #[link_name = "llvm.x86.sse2.psll.d"] |
| fn pslld(a: i32x4, count: i32x4) -> i32x4; |
| #[link_name = "llvm.x86.sse2.pslli.q"] |
| fn pslliq(a: i64x2, imm8: i32) -> i64x2; |
| #[link_name = "llvm.x86.sse2.psll.q"] |
| fn psllq(a: i64x2, count: i64x2) -> i64x2; |
| #[link_name = "llvm.x86.sse2.psrai.w"] |
| fn psraiw(a: i16x8, imm8: i32) -> i16x8; |
| #[link_name = "llvm.x86.sse2.psra.w"] |
| fn psraw(a: i16x8, count: i16x8) -> i16x8; |
| #[link_name = "llvm.x86.sse2.psrai.d"] |
| fn psraid(a: i32x4, imm8: i32) -> i32x4; |
| #[link_name = "llvm.x86.sse2.psra.d"] |
| fn psrad(a: i32x4, count: i32x4) -> i32x4; |
| #[link_name = "llvm.x86.sse2.psrli.w"] |
| fn psrliw(a: i16x8, imm8: i32) -> i16x8; |
| #[link_name = "llvm.x86.sse2.psrl.w"] |
| fn psrlw(a: i16x8, count: i16x8) -> i16x8; |
| #[link_name = "llvm.x86.sse2.psrli.d"] |
| fn psrlid(a: i32x4, imm8: i32) -> i32x4; |
| #[link_name = "llvm.x86.sse2.psrl.d"] |
| fn psrld(a: i32x4, count: i32x4) -> i32x4; |
| #[link_name = "llvm.x86.sse2.psrli.q"] |
| fn psrliq(a: i64x2, imm8: i32) -> i64x2; |
| #[link_name = "llvm.x86.sse2.psrl.q"] |
| fn psrlq(a: i64x2, count: i64x2) -> i64x2; |
| #[link_name = "llvm.x86.sse2.cvtdq2ps"] |
| fn cvtdq2ps(a: i32x4) -> __m128; |
| #[link_name = "llvm.x86.sse2.cvtps2dq"] |
| fn cvtps2dq(a: __m128) -> i32x4; |
| #[link_name = "llvm.x86.sse2.maskmov.dqu"] |
| fn maskmovdqu(a: i8x16, mask: i8x16, mem_addr: *mut i8); |
| #[link_name = "llvm.x86.sse2.packsswb.128"] |
| fn packsswb(a: i16x8, b: i16x8) -> i8x16; |
| #[link_name = "llvm.x86.sse2.packssdw.128"] |
| fn packssdw(a: i32x4, b: i32x4) -> i16x8; |
| #[link_name = "llvm.x86.sse2.packuswb.128"] |
| fn packuswb(a: i16x8, b: i16x8) -> u8x16; |
| #[link_name = "llvm.x86.sse2.max.sd"] |
| fn maxsd(a: __m128d, b: __m128d) -> __m128d; |
| #[link_name = "llvm.x86.sse2.max.pd"] |
| fn maxpd(a: __m128d, b: __m128d) -> __m128d; |
| #[link_name = "llvm.x86.sse2.min.sd"] |
| fn minsd(a: __m128d, b: __m128d) -> __m128d; |
| #[link_name = "llvm.x86.sse2.min.pd"] |
| fn minpd(a: __m128d, b: __m128d) -> __m128d; |
| #[link_name = "llvm.x86.sse2.sqrt.sd"] |
| fn sqrtsd(a: __m128d) -> __m128d; |
| #[link_name = "llvm.x86.sse2.sqrt.pd"] |
| fn sqrtpd(a: __m128d) -> __m128d; |
| #[link_name = "llvm.x86.sse2.cmp.sd"] |
| fn cmpsd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; |
| #[link_name = "llvm.x86.sse2.cmp.pd"] |
| fn cmppd(a: __m128d, b: __m128d, imm8: i8) -> __m128d; |
| #[link_name = "llvm.x86.sse2.comieq.sd"] |
| fn comieqsd(a: __m128d, b: __m128d) -> i32; |
| #[link_name = "llvm.x86.sse2.comilt.sd"] |
| fn comiltsd(a: __m128d, b: __m128d) -> i32; |
| #[link_name = "llvm.x86.sse2.comile.sd"] |
| fn comilesd(a: __m128d, b: __m128d) -> i32; |
| #[link_name = "llvm.x86.sse2.comigt.sd"] |
| fn comigtsd(a: __m128d, b: __m128d) -> i32; |
| #[link_name = "llvm.x86.sse2.comige.sd"] |
| fn comigesd(a: __m128d, b: __m128d) -> i32; |
| #[link_name = "llvm.x86.sse2.comineq.sd"] |
| fn comineqsd(a: __m128d, b: __m128d) -> i32; |
| #[link_name = "llvm.x86.sse2.ucomieq.sd"] |
| fn ucomieqsd(a: __m128d, b: __m128d) -> i32; |
| #[link_name = "llvm.x86.sse2.ucomilt.sd"] |
| fn ucomiltsd(a: __m128d, b: __m128d) -> i32; |
| #[link_name = "llvm.x86.sse2.ucomile.sd"] |
| fn ucomilesd(a: __m128d, b: __m128d) -> i32; |
| #[link_name = "llvm.x86.sse2.ucomigt.sd"] |
| fn ucomigtsd(a: __m128d, b: __m128d) -> i32; |
| #[link_name = "llvm.x86.sse2.ucomige.sd"] |
| fn ucomigesd(a: __m128d, b: __m128d) -> i32; |
| #[link_name = "llvm.x86.sse2.ucomineq.sd"] |
| fn ucomineqsd(a: __m128d, b: __m128d) -> i32; |
| #[link_name = "llvm.x86.sse2.movmsk.pd"] |
| fn movmskpd(a: __m128d) -> i32; |
| #[link_name = "llvm.x86.sse2.cvtpd2ps"] |
| fn cvtpd2ps(a: __m128d) -> __m128; |
| #[link_name = "llvm.x86.sse2.cvtps2pd"] |
| fn cvtps2pd(a: __m128) -> __m128d; |
| #[link_name = "llvm.x86.sse2.cvtpd2dq"] |
| fn cvtpd2dq(a: __m128d) -> i32x4; |
| #[link_name = "llvm.x86.sse2.cvtsd2si"] |
| fn cvtsd2si(a: __m128d) -> i32; |
| #[link_name = "llvm.x86.sse2.cvtsd2ss"] |
| fn cvtsd2ss(a: __m128, b: __m128d) -> __m128; |
| #[link_name = "llvm.x86.sse2.cvtss2sd"] |
| fn cvtss2sd(a: __m128d, b: __m128) -> __m128d; |
| #[link_name = "llvm.x86.sse2.cvttpd2dq"] |
| fn cvttpd2dq(a: __m128d) -> i32x4; |
| #[link_name = "llvm.x86.sse2.cvttsd2si"] |
| fn cvttsd2si(a: __m128d) -> i32; |
| #[link_name = "llvm.x86.sse2.cvttps2dq"] |
| fn cvttps2dq(a: __m128) -> i32x4; |
| #[link_name = "llvm.x86.sse2.storeu.dq"] |
| fn storeudq(mem_addr: *mut i8, a: __m128i); |
| #[link_name = "llvm.x86.sse2.storeu.pd"] |
| fn storeupd(mem_addr: *mut i8, a: __m128d); |
| } |
| |
| #[cfg(test)] |
| mod tests { |
| use crate::{ |
| core_arch::{simd::*, x86::*}, |
| hint::black_box, |
| }; |
| use std::{ |
| boxed, f32, |
| f64::{self, NAN}, |
| i32, |
| mem::{self, transmute}, |
| }; |
| use stdarch_test::simd_test; |
| |
| #[test] |
| fn test_mm_pause() { |
| unsafe { _mm_pause() } |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_clflush() { |
| let x = 0_u8; |
| _mm_clflush(&x as *const _); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_lfence() { |
| _mm_lfence(); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_mfence() { |
| _mm_mfence(); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_add_epi8() { |
| let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
| #[rustfmt::skip] |
| let b = _mm_setr_epi8( |
| 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
| ); |
| let r = _mm_add_epi8(a, b); |
| #[rustfmt::skip] |
| let e = _mm_setr_epi8( |
| 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, |
| ); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_add_epi8_overflow() { |
| let a = _mm_set1_epi8(0x7F); |
| let b = _mm_set1_epi8(1); |
| let r = _mm_add_epi8(a, b); |
| assert_eq_m128i(r, _mm_set1_epi8(-128)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_add_epi16() { |
| let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
| let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
| let r = _mm_add_epi16(a, b); |
| let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_add_epi32() { |
| let a = _mm_setr_epi32(0, 1, 2, 3); |
| let b = _mm_setr_epi32(4, 5, 6, 7); |
| let r = _mm_add_epi32(a, b); |
| let e = _mm_setr_epi32(4, 6, 8, 10); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_add_epi64() { |
| let a = _mm_setr_epi64x(0, 1); |
| let b = _mm_setr_epi64x(2, 3); |
| let r = _mm_add_epi64(a, b); |
| let e = _mm_setr_epi64x(2, 4); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_adds_epi8() { |
| let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
| #[rustfmt::skip] |
| let b = _mm_setr_epi8( |
| 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
| ); |
| let r = _mm_adds_epi8(a, b); |
| #[rustfmt::skip] |
| let e = _mm_setr_epi8( |
| 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, |
| ); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_adds_epi8_saturate_positive() { |
| let a = _mm_set1_epi8(0x7F); |
| let b = _mm_set1_epi8(1); |
| let r = _mm_adds_epi8(a, b); |
| assert_eq_m128i(r, a); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_adds_epi8_saturate_negative() { |
| let a = _mm_set1_epi8(-0x80); |
| let b = _mm_set1_epi8(-1); |
| let r = _mm_adds_epi8(a, b); |
| assert_eq_m128i(r, a); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_adds_epi16() { |
| let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
| let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
| let r = _mm_adds_epi16(a, b); |
| let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_adds_epi16_saturate_positive() { |
| let a = _mm_set1_epi16(0x7FFF); |
| let b = _mm_set1_epi16(1); |
| let r = _mm_adds_epi16(a, b); |
| assert_eq_m128i(r, a); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_adds_epi16_saturate_negative() { |
| let a = _mm_set1_epi16(-0x8000); |
| let b = _mm_set1_epi16(-1); |
| let r = _mm_adds_epi16(a, b); |
| assert_eq_m128i(r, a); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_adds_epu8() { |
| let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
| #[rustfmt::skip] |
| let b = _mm_setr_epi8( |
| 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
| ); |
| let r = _mm_adds_epu8(a, b); |
| #[rustfmt::skip] |
| let e = _mm_setr_epi8( |
| 16, 18, 20, 22, 24, 26, 28, 30, 32, 34, 36, 38, 40, 42, 44, 46, |
| ); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_adds_epu8_saturate() { |
| let a = _mm_set1_epi8(!0); |
| let b = _mm_set1_epi8(1); |
| let r = _mm_adds_epu8(a, b); |
| assert_eq_m128i(r, a); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_adds_epu16() { |
| let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
| let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
| let r = _mm_adds_epu16(a, b); |
| let e = _mm_setr_epi16(8, 10, 12, 14, 16, 18, 20, 22); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_adds_epu16_saturate() { |
| let a = _mm_set1_epi16(!0); |
| let b = _mm_set1_epi16(1); |
| let r = _mm_adds_epu16(a, b); |
| assert_eq_m128i(r, a); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_avg_epu8() { |
| let (a, b) = (_mm_set1_epi8(3), _mm_set1_epi8(9)); |
| let r = _mm_avg_epu8(a, b); |
| assert_eq_m128i(r, _mm_set1_epi8(6)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_avg_epu16() { |
| let (a, b) = (_mm_set1_epi16(3), _mm_set1_epi16(9)); |
| let r = _mm_avg_epu16(a, b); |
| assert_eq_m128i(r, _mm_set1_epi16(6)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_madd_epi16() { |
| let a = _mm_setr_epi16(1, 2, 3, 4, 5, 6, 7, 8); |
| let b = _mm_setr_epi16(9, 10, 11, 12, 13, 14, 15, 16); |
| let r = _mm_madd_epi16(a, b); |
| let e = _mm_setr_epi32(29, 81, 149, 233); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_max_epi16() { |
| let a = _mm_set1_epi16(1); |
| let b = _mm_set1_epi16(-1); |
| let r = _mm_max_epi16(a, b); |
| assert_eq_m128i(r, a); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_max_epu8() { |
| let a = _mm_set1_epi8(1); |
| let b = _mm_set1_epi8(!0); |
| let r = _mm_max_epu8(a, b); |
| assert_eq_m128i(r, b); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_min_epi16() { |
| let a = _mm_set1_epi16(1); |
| let b = _mm_set1_epi16(-1); |
| let r = _mm_min_epi16(a, b); |
| assert_eq_m128i(r, b); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_min_epu8() { |
| let a = _mm_set1_epi8(1); |
| let b = _mm_set1_epi8(!0); |
| let r = _mm_min_epu8(a, b); |
| assert_eq_m128i(r, a); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_mulhi_epi16() { |
| let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); |
| let r = _mm_mulhi_epi16(a, b); |
| assert_eq_m128i(r, _mm_set1_epi16(-16)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_mulhi_epu16() { |
| let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(1001)); |
| let r = _mm_mulhi_epu16(a, b); |
| assert_eq_m128i(r, _mm_set1_epi16(15)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_mullo_epi16() { |
| let (a, b) = (_mm_set1_epi16(1000), _mm_set1_epi16(-1001)); |
| let r = _mm_mullo_epi16(a, b); |
| assert_eq_m128i(r, _mm_set1_epi16(-17960)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_mul_epu32() { |
| let a = _mm_setr_epi64x(1_000_000_000, 1 << 34); |
| let b = _mm_setr_epi64x(1_000_000_000, 1 << 35); |
| let r = _mm_mul_epu32(a, b); |
| let e = _mm_setr_epi64x(1_000_000_000 * 1_000_000_000, 0); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_sad_epu8() { |
| #[rustfmt::skip] |
| let a = _mm_setr_epi8( |
| 255u8 as i8, 254u8 as i8, 253u8 as i8, 252u8 as i8, |
| 1, 2, 3, 4, |
| 155u8 as i8, 154u8 as i8, 153u8 as i8, 152u8 as i8, |
| 1, 2, 3, 4, |
| ); |
| let b = _mm_setr_epi8(0, 0, 0, 0, 2, 1, 2, 1, 1, 1, 1, 1, 1, 2, 1, 2); |
| let r = _mm_sad_epu8(a, b); |
| let e = _mm_setr_epi64x(1020, 614); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_sub_epi8() { |
| let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(6)); |
| let r = _mm_sub_epi8(a, b); |
| assert_eq_m128i(r, _mm_set1_epi8(-1)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_sub_epi16() { |
| let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(6)); |
| let r = _mm_sub_epi16(a, b); |
| assert_eq_m128i(r, _mm_set1_epi16(-1)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_sub_epi32() { |
| let (a, b) = (_mm_set1_epi32(5), _mm_set1_epi32(6)); |
| let r = _mm_sub_epi32(a, b); |
| assert_eq_m128i(r, _mm_set1_epi32(-1)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_sub_epi64() { |
| let (a, b) = (_mm_set1_epi64x(5), _mm_set1_epi64x(6)); |
| let r = _mm_sub_epi64(a, b); |
| assert_eq_m128i(r, _mm_set1_epi64x(-1)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_subs_epi8() { |
| let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); |
| let r = _mm_subs_epi8(a, b); |
| assert_eq_m128i(r, _mm_set1_epi8(3)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_subs_epi8_saturate_positive() { |
| let a = _mm_set1_epi8(0x7F); |
| let b = _mm_set1_epi8(-1); |
| let r = _mm_subs_epi8(a, b); |
| assert_eq_m128i(r, a); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_subs_epi8_saturate_negative() { |
| let a = _mm_set1_epi8(-0x80); |
| let b = _mm_set1_epi8(1); |
| let r = _mm_subs_epi8(a, b); |
| assert_eq_m128i(r, a); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_subs_epi16() { |
| let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); |
| let r = _mm_subs_epi16(a, b); |
| assert_eq_m128i(r, _mm_set1_epi16(3)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_subs_epi16_saturate_positive() { |
| let a = _mm_set1_epi16(0x7FFF); |
| let b = _mm_set1_epi16(-1); |
| let r = _mm_subs_epi16(a, b); |
| assert_eq_m128i(r, a); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_subs_epi16_saturate_negative() { |
| let a = _mm_set1_epi16(-0x8000); |
| let b = _mm_set1_epi16(1); |
| let r = _mm_subs_epi16(a, b); |
| assert_eq_m128i(r, a); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_subs_epu8() { |
| let (a, b) = (_mm_set1_epi8(5), _mm_set1_epi8(2)); |
| let r = _mm_subs_epu8(a, b); |
| assert_eq_m128i(r, _mm_set1_epi8(3)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_subs_epu8_saturate() { |
| let a = _mm_set1_epi8(0); |
| let b = _mm_set1_epi8(1); |
| let r = _mm_subs_epu8(a, b); |
| assert_eq_m128i(r, a); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_subs_epu16() { |
| let (a, b) = (_mm_set1_epi16(5), _mm_set1_epi16(2)); |
| let r = _mm_subs_epu16(a, b); |
| assert_eq_m128i(r, _mm_set1_epi16(3)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_subs_epu16_saturate() { |
| let a = _mm_set1_epi16(0); |
| let b = _mm_set1_epi16(1); |
| let r = _mm_subs_epu16(a, b); |
| assert_eq_m128i(r, a); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_slli_si128() { |
| #[rustfmt::skip] |
| let a = _mm_setr_epi8( |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
| ); |
| let r = _mm_slli_si128::<1>(a); |
| let e = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
| assert_eq_m128i(r, e); |
| |
| #[rustfmt::skip] |
| let a = _mm_setr_epi8( |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
| ); |
| let r = _mm_slli_si128::<15>(a); |
| let e = _mm_setr_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1); |
| assert_eq_m128i(r, e); |
| |
| #[rustfmt::skip] |
| let a = _mm_setr_epi8( |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
| ); |
| let r = _mm_slli_si128::<16>(a); |
| assert_eq_m128i(r, _mm_set1_epi8(0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_slli_epi16() { |
| #[rustfmt::skip] |
| let a = _mm_setr_epi16( |
| 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0, |
| ); |
| let r = _mm_slli_epi16::<4>(a); |
| |
| #[rustfmt::skip] |
| let e = _mm_setr_epi16( |
| 0xFFF0 as u16 as i16, 0xFFF0 as u16 as i16, 0x0FF0, 0x00F0, |
| 0, 0, 0, 0, |
| ); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_sll_epi16() { |
| let a = _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0); |
| let r = _mm_sll_epi16(a, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0)); |
| assert_eq_m128i(r, _mm_setr_epi16(0xFF0, 0, 0, 0, 0, 0, 0, 0)); |
| let r = _mm_sll_epi16(a, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0)); |
| assert_eq_m128i(r, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_slli_epi32() { |
| let r = _mm_slli_epi32::<4>(_mm_set1_epi32(0xFFFF)); |
| assert_eq_m128i(r, _mm_set1_epi32(0xFFFF0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_sll_epi32() { |
| let a = _mm_set1_epi32(0xFFFF); |
| let b = _mm_setr_epi32(4, 0, 0, 0); |
| let r = _mm_sll_epi32(a, b); |
| assert_eq_m128i(r, _mm_set1_epi32(0xFFFF0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_slli_epi64() { |
| let r = _mm_slli_epi64::<4>(_mm_set1_epi64x(0xFFFFFFFF)); |
| assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFFF0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_sll_epi64() { |
| let a = _mm_set1_epi64x(0xFFFFFFFF); |
| let b = _mm_setr_epi64x(4, 0); |
| let r = _mm_sll_epi64(a, b); |
| assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFFF0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_srai_epi16() { |
| let r = _mm_srai_epi16::<1>(_mm_set1_epi16(-1)); |
| assert_eq_m128i(r, _mm_set1_epi16(-1)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_sra_epi16() { |
| let a = _mm_set1_epi16(-1); |
| let b = _mm_setr_epi16(1, 0, 0, 0, 0, 0, 0, 0); |
| let r = _mm_sra_epi16(a, b); |
| assert_eq_m128i(r, _mm_set1_epi16(-1)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_srai_epi32() { |
| let r = _mm_srai_epi32::<1>(_mm_set1_epi32(-1)); |
| assert_eq_m128i(r, _mm_set1_epi32(-1)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_sra_epi32() { |
| let a = _mm_set1_epi32(-1); |
| let b = _mm_setr_epi32(1, 0, 0, 0); |
| let r = _mm_sra_epi32(a, b); |
| assert_eq_m128i(r, _mm_set1_epi32(-1)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_srli_si128() { |
| #[rustfmt::skip] |
| let a = _mm_setr_epi8( |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
| ); |
| let r = _mm_srli_si128::<1>(a); |
| #[rustfmt::skip] |
| let e = _mm_setr_epi8( |
| 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 0, |
| ); |
| assert_eq_m128i(r, e); |
| |
| #[rustfmt::skip] |
| let a = _mm_setr_epi8( |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
| ); |
| let r = _mm_srli_si128::<15>(a); |
| let e = _mm_setr_epi8(16, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
| assert_eq_m128i(r, e); |
| |
| #[rustfmt::skip] |
| let a = _mm_setr_epi8( |
| 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, |
| ); |
| let r = _mm_srli_si128::<16>(a); |
| assert_eq_m128i(r, _mm_set1_epi8(0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_srli_epi16() { |
| #[rustfmt::skip] |
| let a = _mm_setr_epi16( |
| 0xFFFF as u16 as i16, 0x0FFF, 0x00FF, 0x000F, 0, 0, 0, 0, |
| ); |
| let r = _mm_srli_epi16::<4>(a); |
| #[rustfmt::skip] |
| let e = _mm_setr_epi16( |
| 0xFFF as u16 as i16, 0xFF as u16 as i16, 0xF, 0, 0, 0, 0, 0, |
| ); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_srl_epi16() { |
| let a = _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0); |
| let r = _mm_srl_epi16(a, _mm_setr_epi16(4, 0, 0, 0, 0, 0, 0, 0)); |
| assert_eq_m128i(r, _mm_setr_epi16(0xF, 0, 0, 0, 0, 0, 0, 0)); |
| let r = _mm_srl_epi16(a, _mm_setr_epi16(0, 0, 0, 0, 4, 0, 0, 0)); |
| assert_eq_m128i(r, _mm_setr_epi16(0xFF, 0, 0, 0, 0, 0, 0, 0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_srli_epi32() { |
| let r = _mm_srli_epi32::<4>(_mm_set1_epi32(0xFFFF)); |
| assert_eq_m128i(r, _mm_set1_epi32(0xFFF)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_srl_epi32() { |
| let a = _mm_set1_epi32(0xFFFF); |
| let b = _mm_setr_epi32(4, 0, 0, 0); |
| let r = _mm_srl_epi32(a, b); |
| assert_eq_m128i(r, _mm_set1_epi32(0xFFF)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_srli_epi64() { |
| let r = _mm_srli_epi64::<4>(_mm_set1_epi64x(0xFFFFFFFF)); |
| assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFF)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_srl_epi64() { |
| let a = _mm_set1_epi64x(0xFFFFFFFF); |
| let b = _mm_setr_epi64x(4, 0); |
| let r = _mm_srl_epi64(a, b); |
| assert_eq_m128i(r, _mm_set1_epi64x(0xFFFFFFF)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_and_si128() { |
| let a = _mm_set1_epi8(5); |
| let b = _mm_set1_epi8(3); |
| let r = _mm_and_si128(a, b); |
| assert_eq_m128i(r, _mm_set1_epi8(1)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_andnot_si128() { |
| let a = _mm_set1_epi8(5); |
| let b = _mm_set1_epi8(3); |
| let r = _mm_andnot_si128(a, b); |
| assert_eq_m128i(r, _mm_set1_epi8(2)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_or_si128() { |
| let a = _mm_set1_epi8(5); |
| let b = _mm_set1_epi8(3); |
| let r = _mm_or_si128(a, b); |
| assert_eq_m128i(r, _mm_set1_epi8(7)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_xor_si128() { |
| let a = _mm_set1_epi8(5); |
| let b = _mm_set1_epi8(3); |
| let r = _mm_xor_si128(a, b); |
| assert_eq_m128i(r, _mm_set1_epi8(6)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpeq_epi8() { |
| let a = _mm_setr_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); |
| let b = _mm_setr_epi8(15, 14, 2, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); |
| let r = _mm_cmpeq_epi8(a, b); |
| #[rustfmt::skip] |
| assert_eq_m128i( |
| r, |
| _mm_setr_epi8( |
| 0, 0, 0xFFu8 as i8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 |
| ) |
| ); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpeq_epi16() { |
| let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
| let b = _mm_setr_epi16(7, 6, 2, 4, 3, 2, 1, 0); |
| let r = _mm_cmpeq_epi16(a, b); |
| assert_eq_m128i(r, _mm_setr_epi16(0, 0, !0, 0, 0, 0, 0, 0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpeq_epi32() { |
| let a = _mm_setr_epi32(0, 1, 2, 3); |
| let b = _mm_setr_epi32(3, 2, 2, 0); |
| let r = _mm_cmpeq_epi32(a, b); |
| assert_eq_m128i(r, _mm_setr_epi32(0, 0, !0, 0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpgt_epi8() { |
| let a = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
| let b = _mm_set1_epi8(0); |
| let r = _mm_cmpgt_epi8(a, b); |
| let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpgt_epi16() { |
| let a = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); |
| let b = _mm_set1_epi16(0); |
| let r = _mm_cmpgt_epi16(a, b); |
| let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpgt_epi32() { |
| let a = _mm_set_epi32(5, 0, 0, 0); |
| let b = _mm_set1_epi32(0); |
| let r = _mm_cmpgt_epi32(a, b); |
| assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmplt_epi8() { |
| let a = _mm_set1_epi8(0); |
| let b = _mm_set_epi8(5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
| let r = _mm_cmplt_epi8(a, b); |
| let e = _mm_set_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmplt_epi16() { |
| let a = _mm_set1_epi16(0); |
| let b = _mm_set_epi16(5, 0, 0, 0, 0, 0, 0, 0); |
| let r = _mm_cmplt_epi16(a, b); |
| let e = _mm_set_epi16(!0, 0, 0, 0, 0, 0, 0, 0); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmplt_epi32() { |
| let a = _mm_set1_epi32(0); |
| let b = _mm_set_epi32(5, 0, 0, 0); |
| let r = _mm_cmplt_epi32(a, b); |
| assert_eq_m128i(r, _mm_set_epi32(!0, 0, 0, 0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cvtepi32_pd() { |
| let a = _mm_set_epi32(35, 25, 15, 5); |
| let r = _mm_cvtepi32_pd(a); |
| assert_eq_m128d(r, _mm_setr_pd(5.0, 15.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cvtsi32_sd() { |
| let a = _mm_set1_pd(3.5); |
| let r = _mm_cvtsi32_sd(a, 5); |
| assert_eq_m128d(r, _mm_setr_pd(5.0, 3.5)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cvtepi32_ps() { |
| let a = _mm_setr_epi32(1, 2, 3, 4); |
| let r = _mm_cvtepi32_ps(a); |
| assert_eq_m128(r, _mm_setr_ps(1.0, 2.0, 3.0, 4.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cvtps_epi32() { |
| let a = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
| let r = _mm_cvtps_epi32(a); |
| assert_eq_m128i(r, _mm_setr_epi32(1, 2, 3, 4)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cvtsi32_si128() { |
| let r = _mm_cvtsi32_si128(5); |
| assert_eq_m128i(r, _mm_setr_epi32(5, 0, 0, 0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cvtsi128_si32() { |
| let r = _mm_cvtsi128_si32(_mm_setr_epi32(5, 0, 0, 0)); |
| assert_eq!(r, 5); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_set_epi64x() { |
| let r = _mm_set_epi64x(0, 1); |
| assert_eq_m128i(r, _mm_setr_epi64x(1, 0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_set_epi32() { |
| let r = _mm_set_epi32(0, 1, 2, 3); |
| assert_eq_m128i(r, _mm_setr_epi32(3, 2, 1, 0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_set_epi16() { |
| let r = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
| assert_eq_m128i(r, _mm_setr_epi16(7, 6, 5, 4, 3, 2, 1, 0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_set_epi8() { |
| #[rustfmt::skip] |
| let r = _mm_set_epi8( |
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
| ); |
| #[rustfmt::skip] |
| let e = _mm_setr_epi8( |
| 15, 14, 13, 12, 11, 10, 9, 8, |
| 7, 6, 5, 4, 3, 2, 1, 0, |
| ); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_set1_epi64x() { |
| let r = _mm_set1_epi64x(1); |
| assert_eq_m128i(r, _mm_set1_epi64x(1)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_set1_epi32() { |
| let r = _mm_set1_epi32(1); |
| assert_eq_m128i(r, _mm_set1_epi32(1)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_set1_epi16() { |
| let r = _mm_set1_epi16(1); |
| assert_eq_m128i(r, _mm_set1_epi16(1)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_set1_epi8() { |
| let r = _mm_set1_epi8(1); |
| assert_eq_m128i(r, _mm_set1_epi8(1)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_setr_epi32() { |
| let r = _mm_setr_epi32(0, 1, 2, 3); |
| assert_eq_m128i(r, _mm_setr_epi32(0, 1, 2, 3)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_setr_epi16() { |
| let r = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
| assert_eq_m128i(r, _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_setr_epi8() { |
| #[rustfmt::skip] |
| let r = _mm_setr_epi8( |
| 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, |
| ); |
| #[rustfmt::skip] |
| let e = _mm_setr_epi8( |
| 0, 1, 2, 3, 4, 5, 6, 7, |
| 8, 9, 10, 11, 12, 13, 14, 15, |
| ); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_setzero_si128() { |
| let r = _mm_setzero_si128(); |
| assert_eq_m128i(r, _mm_set1_epi64x(0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_loadl_epi64() { |
| let a = _mm_setr_epi64x(6, 5); |
| let r = _mm_loadl_epi64(&a as *const _); |
| assert_eq_m128i(r, _mm_setr_epi64x(6, 0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_load_si128() { |
| let a = _mm_set_epi64x(5, 6); |
| let r = _mm_load_si128(&a as *const _ as *const _); |
| assert_eq_m128i(a, r); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_loadu_si128() { |
| let a = _mm_set_epi64x(5, 6); |
| let r = _mm_loadu_si128(&a as *const _ as *const _); |
| assert_eq_m128i(a, r); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_maskmoveu_si128() { |
| let a = _mm_set1_epi8(9); |
| #[rustfmt::skip] |
| let mask = _mm_set_epi8( |
| 0, 0, 0x80u8 as i8, 0, 0, 0, 0, 0, |
| 0, 0, 0, 0, 0, 0, 0, 0, |
| ); |
| let mut r = _mm_set1_epi8(0); |
| _mm_maskmoveu_si128(a, mask, &mut r as *mut _ as *mut i8); |
| let e = _mm_set_epi8(0, 0, 9, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_store_si128() { |
| let a = _mm_set1_epi8(9); |
| let mut r = _mm_set1_epi8(0); |
| _mm_store_si128(&mut r as *mut _ as *mut __m128i, a); |
| assert_eq_m128i(r, a); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_storeu_si128() { |
| let a = _mm_set1_epi8(9); |
| let mut r = _mm_set1_epi8(0); |
| _mm_storeu_si128(&mut r as *mut _ as *mut __m128i, a); |
| assert_eq_m128i(r, a); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_storel_epi64() { |
| let a = _mm_setr_epi64x(2, 9); |
| let mut r = _mm_set1_epi8(0); |
| _mm_storel_epi64(&mut r as *mut _ as *mut __m128i, a); |
| assert_eq_m128i(r, _mm_setr_epi64x(2, 0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_stream_si128() { |
| let a = _mm_setr_epi32(1, 2, 3, 4); |
| let mut r = _mm_undefined_si128(); |
| _mm_stream_si128(&mut r as *mut _, a); |
| assert_eq_m128i(r, a); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_stream_si32() { |
| let a: i32 = 7; |
| let mut mem = boxed::Box::<i32>::new(-1); |
| _mm_stream_si32(&mut *mem as *mut i32, a); |
| assert_eq!(a, *mem); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_move_epi64() { |
| let a = _mm_setr_epi64x(5, 6); |
| let r = _mm_move_epi64(a); |
| assert_eq_m128i(r, _mm_setr_epi64x(5, 0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_packs_epi16() { |
| let a = _mm_setr_epi16(0x80, -0x81, 0, 0, 0, 0, 0, 0); |
| let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -0x81, 0x80); |
| let r = _mm_packs_epi16(a, b); |
| #[rustfmt::skip] |
| assert_eq_m128i( |
| r, |
| _mm_setr_epi8( |
| 0x7F, -0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -0x80, 0x7F |
| ) |
| ); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_packs_epi32() { |
| let a = _mm_setr_epi32(0x8000, -0x8001, 0, 0); |
| let b = _mm_setr_epi32(0, 0, -0x8001, 0x8000); |
| let r = _mm_packs_epi32(a, b); |
| assert_eq_m128i( |
| r, |
| _mm_setr_epi16(0x7FFF, -0x8000, 0, 0, 0, 0, -0x8000, 0x7FFF), |
| ); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_packus_epi16() { |
| let a = _mm_setr_epi16(0x100, -1, 0, 0, 0, 0, 0, 0); |
| let b = _mm_setr_epi16(0, 0, 0, 0, 0, 0, -1, 0x100); |
| let r = _mm_packus_epi16(a, b); |
| assert_eq_m128i( |
| r, |
| _mm_setr_epi8(!0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, !0), |
| ); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_extract_epi16() { |
| let a = _mm_setr_epi16(-1, 1, 2, 3, 4, 5, 6, 7); |
| let r1 = _mm_extract_epi16::<0>(a); |
| let r2 = _mm_extract_epi16::<3>(a); |
| assert_eq!(r1, 0xFFFF); |
| assert_eq!(r2, 3); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_insert_epi16() { |
| let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
| let r = _mm_insert_epi16::<0>(a, 9); |
| let e = _mm_setr_epi16(9, 1, 2, 3, 4, 5, 6, 7); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_movemask_epi8() { |
| #[rustfmt::skip] |
| let a = _mm_setr_epi8( |
| 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, 0b01, |
| 0b0101, 0b1111_0000u8 as i8, 0, 0, |
| 0, 0b1011_0101u8 as i8, 0b1111_0000u8 as i8, 0b0101, |
| 0b01, 0b1000_0000u8 as i8, 0b0, 0b1000_0000u8 as i8, |
| ); |
| let r = _mm_movemask_epi8(a); |
| assert_eq!(r, 0b10100110_00100101); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_shuffle_epi32() { |
| let a = _mm_setr_epi32(5, 10, 15, 20); |
| let r = _mm_shuffle_epi32::<0b00_01_01_11>(a); |
| let e = _mm_setr_epi32(20, 10, 10, 5); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_shufflehi_epi16() { |
| let a = _mm_setr_epi16(1, 2, 3, 4, 5, 10, 15, 20); |
| let r = _mm_shufflehi_epi16::<0b00_01_01_11>(a); |
| let e = _mm_setr_epi16(1, 2, 3, 4, 20, 10, 10, 5); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_shufflelo_epi16() { |
| let a = _mm_setr_epi16(5, 10, 15, 20, 1, 2, 3, 4); |
| let r = _mm_shufflelo_epi16::<0b00_01_01_11>(a); |
| let e = _mm_setr_epi16(20, 10, 10, 5, 1, 2, 3, 4); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_unpackhi_epi8() { |
| #[rustfmt::skip] |
| let a = _mm_setr_epi8( |
| 0, 1, 2, 3, 4, 5, 6, 7, |
| 8, 9, 10, 11, 12, 13, 14, 15, |
| ); |
| #[rustfmt::skip] |
| let b = _mm_setr_epi8( |
| 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
| ); |
| let r = _mm_unpackhi_epi8(a, b); |
| #[rustfmt::skip] |
| let e = _mm_setr_epi8( |
| 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31, |
| ); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_unpackhi_epi16() { |
| let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
| let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
| let r = _mm_unpackhi_epi16(a, b); |
| let e = _mm_setr_epi16(4, 12, 5, 13, 6, 14, 7, 15); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_unpackhi_epi32() { |
| let a = _mm_setr_epi32(0, 1, 2, 3); |
| let b = _mm_setr_epi32(4, 5, 6, 7); |
| let r = _mm_unpackhi_epi32(a, b); |
| let e = _mm_setr_epi32(2, 6, 3, 7); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_unpackhi_epi64() { |
| let a = _mm_setr_epi64x(0, 1); |
| let b = _mm_setr_epi64x(2, 3); |
| let r = _mm_unpackhi_epi64(a, b); |
| let e = _mm_setr_epi64x(1, 3); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_unpacklo_epi8() { |
| #[rustfmt::skip] |
| let a = _mm_setr_epi8( |
| 0, 1, 2, 3, 4, 5, 6, 7, |
| 8, 9, 10, 11, 12, 13, 14, 15, |
| ); |
| #[rustfmt::skip] |
| let b = _mm_setr_epi8( |
| 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, |
| ); |
| let r = _mm_unpacklo_epi8(a, b); |
| #[rustfmt::skip] |
| let e = _mm_setr_epi8( |
| 0, 16, 1, 17, 2, 18, 3, 19, |
| 4, 20, 5, 21, 6, 22, 7, 23, |
| ); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_unpacklo_epi16() { |
| let a = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7); |
| let b = _mm_setr_epi16(8, 9, 10, 11, 12, 13, 14, 15); |
| let r = _mm_unpacklo_epi16(a, b); |
| let e = _mm_setr_epi16(0, 8, 1, 9, 2, 10, 3, 11); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_unpacklo_epi32() { |
| let a = _mm_setr_epi32(0, 1, 2, 3); |
| let b = _mm_setr_epi32(4, 5, 6, 7); |
| let r = _mm_unpacklo_epi32(a, b); |
| let e = _mm_setr_epi32(0, 4, 1, 5); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_unpacklo_epi64() { |
| let a = _mm_setr_epi64x(0, 1); |
| let b = _mm_setr_epi64x(2, 3); |
| let r = _mm_unpacklo_epi64(a, b); |
| let e = _mm_setr_epi64x(0, 2); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_add_sd() { |
| let a = _mm_setr_pd(1.0, 2.0); |
| let b = _mm_setr_pd(5.0, 10.0); |
| let r = _mm_add_sd(a, b); |
| assert_eq_m128d(r, _mm_setr_pd(6.0, 2.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_add_pd() { |
| let a = _mm_setr_pd(1.0, 2.0); |
| let b = _mm_setr_pd(5.0, 10.0); |
| let r = _mm_add_pd(a, b); |
| assert_eq_m128d(r, _mm_setr_pd(6.0, 12.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_div_sd() { |
| let a = _mm_setr_pd(1.0, 2.0); |
| let b = _mm_setr_pd(5.0, 10.0); |
| let r = _mm_div_sd(a, b); |
| assert_eq_m128d(r, _mm_setr_pd(0.2, 2.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_div_pd() { |
| let a = _mm_setr_pd(1.0, 2.0); |
| let b = _mm_setr_pd(5.0, 10.0); |
| let r = _mm_div_pd(a, b); |
| assert_eq_m128d(r, _mm_setr_pd(0.2, 0.2)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_max_sd() { |
| let a = _mm_setr_pd(1.0, 2.0); |
| let b = _mm_setr_pd(5.0, 10.0); |
| let r = _mm_max_sd(a, b); |
| assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_max_pd() { |
| let a = _mm_setr_pd(1.0, 2.0); |
| let b = _mm_setr_pd(5.0, 10.0); |
| let r = _mm_max_pd(a, b); |
| assert_eq_m128d(r, _mm_setr_pd(5.0, 10.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_min_sd() { |
| let a = _mm_setr_pd(1.0, 2.0); |
| let b = _mm_setr_pd(5.0, 10.0); |
| let r = _mm_min_sd(a, b); |
| assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_min_pd() { |
| let a = _mm_setr_pd(1.0, 2.0); |
| let b = _mm_setr_pd(5.0, 10.0); |
| let r = _mm_min_pd(a, b); |
| assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_mul_sd() { |
| let a = _mm_setr_pd(1.0, 2.0); |
| let b = _mm_setr_pd(5.0, 10.0); |
| let r = _mm_mul_sd(a, b); |
| assert_eq_m128d(r, _mm_setr_pd(5.0, 2.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_mul_pd() { |
| let a = _mm_setr_pd(1.0, 2.0); |
| let b = _mm_setr_pd(5.0, 10.0); |
| let r = _mm_mul_pd(a, b); |
| assert_eq_m128d(r, _mm_setr_pd(5.0, 20.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_sqrt_sd() { |
| let a = _mm_setr_pd(1.0, 2.0); |
| let b = _mm_setr_pd(5.0, 10.0); |
| let r = _mm_sqrt_sd(a, b); |
| assert_eq_m128d(r, _mm_setr_pd(5.0f64.sqrt(), 2.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_sqrt_pd() { |
| let r = _mm_sqrt_pd(_mm_setr_pd(1.0, 2.0)); |
| assert_eq_m128d(r, _mm_setr_pd(1.0f64.sqrt(), 2.0f64.sqrt())); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_sub_sd() { |
| let a = _mm_setr_pd(1.0, 2.0); |
| let b = _mm_setr_pd(5.0, 10.0); |
| let r = _mm_sub_sd(a, b); |
| assert_eq_m128d(r, _mm_setr_pd(-4.0, 2.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_sub_pd() { |
| let a = _mm_setr_pd(1.0, 2.0); |
| let b = _mm_setr_pd(5.0, 10.0); |
| let r = _mm_sub_pd(a, b); |
| assert_eq_m128d(r, _mm_setr_pd(-4.0, -8.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_and_pd() { |
| let a = transmute(u64x2::splat(5)); |
| let b = transmute(u64x2::splat(3)); |
| let r = _mm_and_pd(a, b); |
| let e = transmute(u64x2::splat(1)); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_andnot_pd() { |
| let a = transmute(u64x2::splat(5)); |
| let b = transmute(u64x2::splat(3)); |
| let r = _mm_andnot_pd(a, b); |
| let e = transmute(u64x2::splat(2)); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_or_pd() { |
| let a = transmute(u64x2::splat(5)); |
| let b = transmute(u64x2::splat(3)); |
| let r = _mm_or_pd(a, b); |
| let e = transmute(u64x2::splat(7)); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_xor_pd() { |
| let a = transmute(u64x2::splat(5)); |
| let b = transmute(u64x2::splat(3)); |
| let r = _mm_xor_pd(a, b); |
| let e = transmute(u64x2::splat(6)); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpeq_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| let e = _mm_setr_epi64x(!0, transmute(2.0f64)); |
| let r = transmute::<_, __m128i>(_mm_cmpeq_sd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmplt_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
| let e = _mm_setr_epi64x(!0, transmute(2.0f64)); |
| let r = transmute::<_, __m128i>(_mm_cmplt_sd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmple_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| let e = _mm_setr_epi64x(!0, transmute(2.0f64)); |
| let r = transmute::<_, __m128i>(_mm_cmple_sd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpgt_sd() { |
| let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| let e = _mm_setr_epi64x(!0, transmute(2.0f64)); |
| let r = transmute::<_, __m128i>(_mm_cmpgt_sd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpge_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| let e = _mm_setr_epi64x(!0, transmute(2.0f64)); |
| let r = transmute::<_, __m128i>(_mm_cmpge_sd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpord_sd() { |
| let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
| let e = _mm_setr_epi64x(0, transmute(2.0f64)); |
| let r = transmute::<_, __m128i>(_mm_cmpord_sd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpunord_sd() { |
| let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
| let e = _mm_setr_epi64x(!0, transmute(2.0f64)); |
| let r = transmute::<_, __m128i>(_mm_cmpunord_sd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpneq_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
| let e = _mm_setr_epi64x(!0, transmute(2.0f64)); |
| let r = transmute::<_, __m128i>(_mm_cmpneq_sd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpnlt_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
| let e = _mm_setr_epi64x(0, transmute(2.0f64)); |
| let r = transmute::<_, __m128i>(_mm_cmpnlt_sd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpnle_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| let e = _mm_setr_epi64x(0, transmute(2.0f64)); |
| let r = transmute::<_, __m128i>(_mm_cmpnle_sd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpngt_sd() { |
| let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| let e = _mm_setr_epi64x(0, transmute(2.0f64)); |
| let r = transmute::<_, __m128i>(_mm_cmpngt_sd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpnge_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| let e = _mm_setr_epi64x(0, transmute(2.0f64)); |
| let r = transmute::<_, __m128i>(_mm_cmpnge_sd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpeq_pd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| let e = _mm_setr_epi64x(!0, 0); |
| let r = transmute::<_, __m128i>(_mm_cmpeq_pd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmplt_pd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| let e = _mm_setr_epi64x(0, !0); |
| let r = transmute::<_, __m128i>(_mm_cmplt_pd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmple_pd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| let e = _mm_setr_epi64x(!0, !0); |
| let r = transmute::<_, __m128i>(_mm_cmple_pd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpgt_pd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| let e = _mm_setr_epi64x(0, 0); |
| let r = transmute::<_, __m128i>(_mm_cmpgt_pd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpge_pd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| let e = _mm_setr_epi64x(!0, 0); |
| let r = transmute::<_, __m128i>(_mm_cmpge_pd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpord_pd() { |
| let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
| let e = _mm_setr_epi64x(0, !0); |
| let r = transmute::<_, __m128i>(_mm_cmpord_pd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpunord_pd() { |
| let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(5.0, 3.0)); |
| let e = _mm_setr_epi64x(!0, 0); |
| let r = transmute::<_, __m128i>(_mm_cmpunord_pd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpneq_pd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
| let e = _mm_setr_epi64x(!0, !0); |
| let r = transmute::<_, __m128i>(_mm_cmpneq_pd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpnlt_pd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(5.0, 3.0)); |
| let e = _mm_setr_epi64x(0, 0); |
| let r = transmute::<_, __m128i>(_mm_cmpnlt_pd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpnle_pd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| let e = _mm_setr_epi64x(0, 0); |
| let r = transmute::<_, __m128i>(_mm_cmpnle_pd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpngt_pd() { |
| let (a, b) = (_mm_setr_pd(5.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| let e = _mm_setr_epi64x(0, !0); |
| let r = transmute::<_, __m128i>(_mm_cmpngt_pd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cmpnge_pd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| let e = _mm_setr_epi64x(0, !0); |
| let r = transmute::<_, __m128i>(_mm_cmpnge_pd(a, b)); |
| assert_eq_m128i(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_comieq_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| assert!(_mm_comieq_sd(a, b) != 0); |
| |
| let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(1.0, 3.0)); |
| assert!(_mm_comieq_sd(a, b) == 0); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_comilt_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| assert!(_mm_comilt_sd(a, b) == 0); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_comile_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| assert!(_mm_comile_sd(a, b) != 0); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_comigt_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| assert!(_mm_comigt_sd(a, b) == 0); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_comige_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| assert!(_mm_comige_sd(a, b) != 0); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_comineq_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| assert!(_mm_comineq_sd(a, b) == 0); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_ucomieq_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| assert!(_mm_ucomieq_sd(a, b) != 0); |
| |
| let (a, b) = (_mm_setr_pd(NAN, 2.0), _mm_setr_pd(NAN, 3.0)); |
| assert!(_mm_ucomieq_sd(a, b) == 0); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_ucomilt_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| assert!(_mm_ucomilt_sd(a, b) == 0); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_ucomile_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| assert!(_mm_ucomile_sd(a, b) != 0); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_ucomigt_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| assert!(_mm_ucomigt_sd(a, b) == 0); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_ucomige_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| assert!(_mm_ucomige_sd(a, b) != 0); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_ucomineq_sd() { |
| let (a, b) = (_mm_setr_pd(1.0, 2.0), _mm_setr_pd(1.0, 3.0)); |
| assert!(_mm_ucomineq_sd(a, b) == 0); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_movemask_pd() { |
| let r = _mm_movemask_pd(_mm_setr_pd(-1.0, 5.0)); |
| assert_eq!(r, 0b01); |
| |
| let r = _mm_movemask_pd(_mm_setr_pd(-1.0, -5.0)); |
| assert_eq!(r, 0b11); |
| } |
| |
| #[repr(align(16))] |
| struct Memory { |
| data: [f64; 4], |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_load_pd() { |
| let mem = Memory { |
| data: [1.0f64, 2.0, 3.0, 4.0], |
| }; |
| let vals = &mem.data; |
| let d = vals.as_ptr(); |
| |
| let r = _mm_load_pd(d); |
| assert_eq_m128d(r, _mm_setr_pd(1.0, 2.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_load_sd() { |
| let a = 1.; |
| let expected = _mm_setr_pd(a, 0.); |
| let r = _mm_load_sd(&a); |
| assert_eq_m128d(r, expected); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_loadh_pd() { |
| let a = _mm_setr_pd(1., 2.); |
| let b = 3.; |
| let expected = _mm_setr_pd(_mm_cvtsd_f64(a), 3.); |
| let r = _mm_loadh_pd(a, &b); |
| assert_eq_m128d(r, expected); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_loadl_pd() { |
| let a = _mm_setr_pd(1., 2.); |
| let b = 3.; |
| let expected = _mm_setr_pd(3., get_m128d(a, 1)); |
| let r = _mm_loadl_pd(a, &b); |
| assert_eq_m128d(r, expected); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_stream_pd() { |
| #[repr(align(128))] |
| struct Memory { |
| pub data: [f64; 2], |
| } |
| let a = _mm_set1_pd(7.0); |
| let mut mem = Memory { data: [-1.0; 2] }; |
| |
| _mm_stream_pd(&mut mem.data[0] as *mut f64, a); |
| for i in 0..2 { |
| assert_eq!(mem.data[i], get_m128d(a, i)); |
| } |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_store_sd() { |
| let mut dest = 0.; |
| let a = _mm_setr_pd(1., 2.); |
| _mm_store_sd(&mut dest, a); |
| assert_eq!(dest, _mm_cvtsd_f64(a)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_store_pd() { |
| let mut mem = Memory { data: [0.0f64; 4] }; |
| let vals = &mut mem.data; |
| let a = _mm_setr_pd(1.0, 2.0); |
| let d = vals.as_mut_ptr(); |
| |
| _mm_store_pd(d, *black_box(&a)); |
| assert_eq!(vals[0], 1.0); |
| assert_eq!(vals[1], 2.0); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_storeu_pd() { |
| let mut mem = Memory { data: [0.0f64; 4] }; |
| let vals = &mut mem.data; |
| let a = _mm_setr_pd(1.0, 2.0); |
| |
| let mut ofs = 0; |
| let mut p = vals.as_mut_ptr(); |
| |
| // Make sure p is **not** aligned to 16-byte boundary |
| if (p as usize) & 0xf == 0 { |
| ofs = 1; |
| p = p.add(1); |
| } |
| |
| _mm_storeu_pd(p, *black_box(&a)); |
| |
| if ofs > 0 { |
| assert_eq!(vals[ofs - 1], 0.0); |
| } |
| assert_eq!(vals[ofs + 0], 1.0); |
| assert_eq!(vals[ofs + 1], 2.0); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_store1_pd() { |
| let mut mem = Memory { data: [0.0f64; 4] }; |
| let vals = &mut mem.data; |
| let a = _mm_setr_pd(1.0, 2.0); |
| let d = vals.as_mut_ptr(); |
| |
| _mm_store1_pd(d, *black_box(&a)); |
| assert_eq!(vals[0], 1.0); |
| assert_eq!(vals[1], 1.0); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_store_pd1() { |
| let mut mem = Memory { data: [0.0f64; 4] }; |
| let vals = &mut mem.data; |
| let a = _mm_setr_pd(1.0, 2.0); |
| let d = vals.as_mut_ptr(); |
| |
| _mm_store_pd1(d, *black_box(&a)); |
| assert_eq!(vals[0], 1.0); |
| assert_eq!(vals[1], 1.0); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_storer_pd() { |
| let mut mem = Memory { data: [0.0f64; 4] }; |
| let vals = &mut mem.data; |
| let a = _mm_setr_pd(1.0, 2.0); |
| let d = vals.as_mut_ptr(); |
| |
| _mm_storer_pd(d, *black_box(&a)); |
| assert_eq!(vals[0], 2.0); |
| assert_eq!(vals[1], 1.0); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_storeh_pd() { |
| let mut dest = 0.; |
| let a = _mm_setr_pd(1., 2.); |
| _mm_storeh_pd(&mut dest, a); |
| assert_eq!(dest, get_m128d(a, 1)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_storel_pd() { |
| let mut dest = 0.; |
| let a = _mm_setr_pd(1., 2.); |
| _mm_storel_pd(&mut dest, a); |
| assert_eq!(dest, _mm_cvtsd_f64(a)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_loadr_pd() { |
| let mut mem = Memory { |
| data: [1.0f64, 2.0, 3.0, 4.0], |
| }; |
| let vals = &mut mem.data; |
| let d = vals.as_ptr(); |
| |
| let r = _mm_loadr_pd(d); |
| assert_eq_m128d(r, _mm_setr_pd(2.0, 1.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_loadu_pd() { |
| let mut mem = Memory { |
| data: [1.0f64, 2.0, 3.0, 4.0], |
| }; |
| let vals = &mut mem.data; |
| let mut d = vals.as_ptr(); |
| |
| // make sure d is not aligned to 16-byte boundary |
| let mut offset = 0; |
| if (d as usize) & 0xf == 0 { |
| offset = 1; |
| d = d.add(offset); |
| } |
| |
| let r = _mm_loadu_pd(d); |
| let e = _mm_add_pd(_mm_setr_pd(1.0, 2.0), _mm_set1_pd(offset as f64)); |
| assert_eq_m128d(r, e); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cvtpd_ps() { |
| let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, 5.0)); |
| assert_eq_m128(r, _mm_setr_ps(-1.0, 5.0, 0.0, 0.0)); |
| |
| let r = _mm_cvtpd_ps(_mm_setr_pd(-1.0, -5.0)); |
| assert_eq_m128(r, _mm_setr_ps(-1.0, -5.0, 0.0, 0.0)); |
| |
| let r = _mm_cvtpd_ps(_mm_setr_pd(f64::MAX, f64::MIN)); |
| assert_eq_m128(r, _mm_setr_ps(f32::INFINITY, f32::NEG_INFINITY, 0.0, 0.0)); |
| |
| let r = _mm_cvtpd_ps(_mm_setr_pd(f32::MAX as f64, f32::MIN as f64)); |
| assert_eq_m128(r, _mm_setr_ps(f32::MAX, f32::MIN, 0.0, 0.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cvtps_pd() { |
| let r = _mm_cvtps_pd(_mm_setr_ps(-1.0, 2.0, -3.0, 5.0)); |
| assert_eq_m128d(r, _mm_setr_pd(-1.0, 2.0)); |
| |
| let r = _mm_cvtps_pd(_mm_setr_ps( |
| f32::MAX, |
| f32::INFINITY, |
| f32::NEG_INFINITY, |
| f32::MIN, |
| )); |
| assert_eq_m128d(r, _mm_setr_pd(f32::MAX as f64, f64::INFINITY)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cvtpd_epi32() { |
| let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, 5.0)); |
| assert_eq_m128i(r, _mm_setr_epi32(-1, 5, 0, 0)); |
| |
| let r = _mm_cvtpd_epi32(_mm_setr_pd(-1.0, -5.0)); |
| assert_eq_m128i(r, _mm_setr_epi32(-1, -5, 0, 0)); |
| |
| let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::MAX, f64::MIN)); |
| assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
| |
| let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::INFINITY, f64::NEG_INFINITY)); |
| assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
| |
| let r = _mm_cvtpd_epi32(_mm_setr_pd(f64::NAN, f64::NAN)); |
| assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cvtsd_si32() { |
| let r = _mm_cvtsd_si32(_mm_setr_pd(-2.0, 5.0)); |
| assert_eq!(r, -2); |
| |
| let r = _mm_cvtsd_si32(_mm_setr_pd(f64::MAX, f64::MIN)); |
| assert_eq!(r, i32::MIN); |
| |
| let r = _mm_cvtsd_si32(_mm_setr_pd(f64::NAN, f64::NAN)); |
| assert_eq!(r, i32::MIN); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cvtsd_ss() { |
| let a = _mm_setr_ps(-1.1, -2.2, 3.3, 4.4); |
| let b = _mm_setr_pd(2.0, -5.0); |
| |
| let r = _mm_cvtsd_ss(a, b); |
| |
| assert_eq_m128(r, _mm_setr_ps(2.0, -2.2, 3.3, 4.4)); |
| |
| let a = _mm_setr_ps(-1.1, f32::NEG_INFINITY, f32::MAX, f32::NEG_INFINITY); |
| let b = _mm_setr_pd(f64::INFINITY, -5.0); |
| |
| let r = _mm_cvtsd_ss(a, b); |
| |
| assert_eq_m128( |
| r, |
| _mm_setr_ps( |
| f32::INFINITY, |
| f32::NEG_INFINITY, |
| f32::MAX, |
| f32::NEG_INFINITY, |
| ), |
| ); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cvtsd_f64() { |
| let r = _mm_cvtsd_f64(_mm_setr_pd(-1.1, 2.2)); |
| assert_eq!(r, -1.1); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cvtss_sd() { |
| let a = _mm_setr_pd(-1.1, 2.2); |
| let b = _mm_setr_ps(1.0, 2.0, 3.0, 4.0); |
| |
| let r = _mm_cvtss_sd(a, b); |
| assert_eq_m128d(r, _mm_setr_pd(1.0, 2.2)); |
| |
| let a = _mm_setr_pd(-1.1, f64::INFINITY); |
| let b = _mm_setr_ps(f32::NEG_INFINITY, 2.0, 3.0, 4.0); |
| |
| let r = _mm_cvtss_sd(a, b); |
| assert_eq_m128d(r, _mm_setr_pd(f64::NEG_INFINITY, f64::INFINITY)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cvttpd_epi32() { |
| let a = _mm_setr_pd(-1.1, 2.2); |
| let r = _mm_cvttpd_epi32(a); |
| assert_eq_m128i(r, _mm_setr_epi32(-1, 2, 0, 0)); |
| |
| let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); |
| let r = _mm_cvttpd_epi32(a); |
| assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, 0, 0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cvttsd_si32() { |
| let a = _mm_setr_pd(-1.1, 2.2); |
| let r = _mm_cvttsd_si32(a); |
| assert_eq!(r, -1); |
| |
| let a = _mm_setr_pd(f64::NEG_INFINITY, f64::NAN); |
| let r = _mm_cvttsd_si32(a); |
| assert_eq!(r, i32::MIN); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_cvttps_epi32() { |
| let a = _mm_setr_ps(-1.1, 2.2, -3.3, 6.6); |
| let r = _mm_cvttps_epi32(a); |
| assert_eq_m128i(r, _mm_setr_epi32(-1, 2, -3, 6)); |
| |
| let a = _mm_setr_ps(f32::NEG_INFINITY, f32::INFINITY, f32::MIN, f32::MAX); |
| let r = _mm_cvttps_epi32(a); |
| assert_eq_m128i(r, _mm_setr_epi32(i32::MIN, i32::MIN, i32::MIN, i32::MIN)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_set_sd() { |
| let r = _mm_set_sd(-1.0_f64); |
| assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, 0_f64)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_set1_pd() { |
| let r = _mm_set1_pd(-1.0_f64); |
| assert_eq_m128d(r, _mm_setr_pd(-1.0_f64, -1.0_f64)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_set_pd1() { |
| let r = _mm_set_pd1(-2.0_f64); |
| assert_eq_m128d(r, _mm_setr_pd(-2.0_f64, -2.0_f64)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_set_pd() { |
| let r = _mm_set_pd(1.0_f64, 5.0_f64); |
| assert_eq_m128d(r, _mm_setr_pd(5.0_f64, 1.0_f64)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_setr_pd() { |
| let r = _mm_setr_pd(1.0_f64, -5.0_f64); |
| assert_eq_m128d(r, _mm_setr_pd(1.0_f64, -5.0_f64)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_setzero_pd() { |
| let r = _mm_setzero_pd(); |
| assert_eq_m128d(r, _mm_setr_pd(0_f64, 0_f64)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_load1_pd() { |
| let d = -5.0; |
| let r = _mm_load1_pd(&d); |
| assert_eq_m128d(r, _mm_setr_pd(d, d)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_load_pd1() { |
| let d = -5.0; |
| let r = _mm_load_pd1(&d); |
| assert_eq_m128d(r, _mm_setr_pd(d, d)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_unpackhi_pd() { |
| let a = _mm_setr_pd(1.0, 2.0); |
| let b = _mm_setr_pd(3.0, 4.0); |
| let r = _mm_unpackhi_pd(a, b); |
| assert_eq_m128d(r, _mm_setr_pd(2.0, 4.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_unpacklo_pd() { |
| let a = _mm_setr_pd(1.0, 2.0); |
| let b = _mm_setr_pd(3.0, 4.0); |
| let r = _mm_unpacklo_pd(a, b); |
| assert_eq_m128d(r, _mm_setr_pd(1.0, 3.0)); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_shuffle_pd() { |
| let a = _mm_setr_pd(1., 2.); |
| let b = _mm_setr_pd(3., 4.); |
| let expected = _mm_setr_pd(1., 3.); |
| let r = _mm_shuffle_pd::<0b00_00_00_00>(a, b); |
| assert_eq_m128d(r, expected); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_move_sd() { |
| let a = _mm_setr_pd(1., 2.); |
| let b = _mm_setr_pd(3., 4.); |
| let expected = _mm_setr_pd(3., 2.); |
| let r = _mm_move_sd(a, b); |
| assert_eq_m128d(r, expected); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_castpd_ps() { |
| let a = _mm_set1_pd(0.); |
| let expected = _mm_set1_ps(0.); |
| let r = _mm_castpd_ps(a); |
| assert_eq_m128(r, expected); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_castpd_si128() { |
| let a = _mm_set1_pd(0.); |
| let expected = _mm_set1_epi64x(0); |
| let r = _mm_castpd_si128(a); |
| assert_eq_m128i(r, expected); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_castps_pd() { |
| let a = _mm_set1_ps(0.); |
| let expected = _mm_set1_pd(0.); |
| let r = _mm_castps_pd(a); |
| assert_eq_m128d(r, expected); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_castps_si128() { |
| let a = _mm_set1_ps(0.); |
| let expected = _mm_set1_epi32(0); |
| let r = _mm_castps_si128(a); |
| assert_eq_m128i(r, expected); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_castsi128_pd() { |
| let a = _mm_set1_epi64x(0); |
| let expected = _mm_set1_pd(0.); |
| let r = _mm_castsi128_pd(a); |
| assert_eq_m128d(r, expected); |
| } |
| |
| #[simd_test(enable = "sse2")] |
| unsafe fn test_mm_castsi128_ps() { |
| let a = _mm_set1_epi32(0); |
| let expected = _mm_set1_ps(0.); |
| let r = _mm_castsi128_ps(a); |
| assert_eq_m128(r, expected); |
| } |
| } |