use crate::{
core_arch::{simd::*, simd_llvm::*, x86::*},
mem::{self, transmute},
ptr,
};
#[cfg(test)]
use stdarch_test::assert_instr;
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpabsd))]
pub unsafe fn _mm512_abs_epi32(a: __m512i) -> __m512i {
let a = a.as_i32x16();
let zero: i32x16 = mem::zeroed();
let sub = simd_sub(zero, a);
let cmp: i32x16 = simd_gt(a, zero);
transmute(simd_select(cmp, a, sub))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpabsd))]
pub unsafe fn _mm512_mask_abs_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
let abs = _mm512_abs_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, abs, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpabsd))]
pub unsafe fn _mm512_maskz_abs_epi32(k: __mmask16, a: __m512i) -> __m512i {
let abs = _mm512_abs_epi32(a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, abs, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpabsd))]
pub unsafe fn _mm256_mask_abs_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
let abs = _mm256_abs_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, abs, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpabsd))]
pub unsafe fn _mm256_maskz_abs_epi32(k: __mmask8, a: __m256i) -> __m256i {
let abs = _mm256_abs_epi32(a).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, abs, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpabsd))]
pub unsafe fn _mm_mask_abs_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let abs = _mm_abs_epi32(a).as_i32x4();
transmute(simd_select_bitmask(k, abs, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpabsd))]
pub unsafe fn _mm_maskz_abs_epi32(k: __mmask8, a: __m128i) -> __m128i {
let abs = _mm_abs_epi32(a).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, abs, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpabsq))]
pub unsafe fn _mm512_abs_epi64(a: __m512i) -> __m512i {
let a = a.as_i64x8();
let zero: i64x8 = mem::zeroed();
let sub = simd_sub(zero, a);
let cmp: i64x8 = simd_gt(a, zero);
transmute(simd_select(cmp, a, sub))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpabsq))]
pub unsafe fn _mm512_mask_abs_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
let abs = _mm512_abs_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, abs, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpabsq))]
pub unsafe fn _mm512_maskz_abs_epi64(k: __mmask8, a: __m512i) -> __m512i {
let abs = _mm512_abs_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, abs, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpabsq))]
pub unsafe fn _mm256_abs_epi64(a: __m256i) -> __m256i {
let a = a.as_i64x4();
let zero: i64x4 = mem::zeroed();
let sub = simd_sub(zero, a);
let cmp: i64x4 = simd_gt(a, zero);
transmute(simd_select(cmp, a, sub))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpabsq))]
pub unsafe fn _mm256_mask_abs_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
let abs = _mm256_abs_epi64(a).as_i64x4();
transmute(simd_select_bitmask(k, abs, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpabsq))]
pub unsafe fn _mm256_maskz_abs_epi64(k: __mmask8, a: __m256i) -> __m256i {
let abs = _mm256_abs_epi64(a).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, abs, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm512_abs_ps(v2: __m512) -> __m512 {
let a = _mm512_set1_epi32(0x7FFFFFFF);
let b = transmute::<f32x16, __m512i>(v2.as_f32x16());
let abs = _mm512_and_epi32(a, b);
transmute(abs)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandd))]
pub unsafe fn _mm512_mask_abs_ps(src: __m512, k: __mmask16, v2: __m512) -> __m512 {
let abs = _mm512_abs_ps(v2).as_f32x16();
transmute(simd_select_bitmask(k, abs, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm512_abs_pd(v2: __m512d) -> __m512d {
let a = _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF);
let b = transmute::<f64x8, __m512i>(v2.as_f64x8());
let abs = _mm512_and_epi64(a, b);
transmute(abs)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm512_mask_abs_pd(src: __m512d, k: __mmask8, v2: __m512d) -> __m512d {
let abs = _mm512_abs_pd(v2).as_f64x8();
transmute(simd_select_bitmask(k, abs, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovdqa32))]
pub unsafe fn _mm512_mask_mov_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
let mov = a.as_i32x16();
transmute(simd_select_bitmask(k, mov, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovdqa32))]
pub unsafe fn _mm512_maskz_mov_epi32(k: __mmask16, a: __m512i) -> __m512i {
let mov = a.as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovdqa32))]
pub unsafe fn _mm256_mask_mov_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
let mov = a.as_i32x8();
transmute(simd_select_bitmask(k, mov, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovdqa32))]
pub unsafe fn _mm256_maskz_mov_epi32(k: __mmask8, a: __m256i) -> __m256i {
let mov = a.as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovdqa32))]
pub unsafe fn _mm_mask_mov_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let mov = a.as_i32x4();
transmute(simd_select_bitmask(k, mov, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovdqa32))]
pub unsafe fn _mm_maskz_mov_epi32(k: __mmask8, a: __m128i) -> __m128i {
let mov = a.as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovdqa64))]
pub unsafe fn _mm512_mask_mov_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
let mov = a.as_i64x8();
transmute(simd_select_bitmask(k, mov, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovdqa64))]
pub unsafe fn _mm512_maskz_mov_epi64(k: __mmask8, a: __m512i) -> __m512i {
let mov = a.as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovdqa64))]
pub unsafe fn _mm256_mask_mov_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
let mov = a.as_i64x4();
transmute(simd_select_bitmask(k, mov, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovdqa64))]
pub unsafe fn _mm256_maskz_mov_epi64(k: __mmask8, a: __m256i) -> __m256i {
let mov = a.as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovdqa64))]
pub unsafe fn _mm_mask_mov_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let mov = a.as_i64x2();
transmute(simd_select_bitmask(k, mov, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovdqa64))]
pub unsafe fn _mm_maskz_mov_epi64(k: __mmask8, a: __m128i) -> __m128i {
let mov = a.as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_mask_mov_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
let mov = a.as_f32x16();
transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_maskz_mov_ps(k: __mmask16, a: __m512) -> __m512 {
let mov = a.as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm256_mask_mov_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
let mov = a.as_f32x8();
transmute(simd_select_bitmask(k, mov, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm256_maskz_mov_ps(k: __mmask8, a: __m256) -> __m256 {
let mov = a.as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm_mask_mov_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
let mov = a.as_f32x4();
transmute(simd_select_bitmask(k, mov, src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm_maskz_mov_ps(k: __mmask8, a: __m128) -> __m128 {
let mov = a.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovapd))]
pub unsafe fn _mm512_mask_mov_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
let mov = a.as_f64x8();
transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovapd))]
pub unsafe fn _mm512_maskz_mov_pd(k: __mmask8, a: __m512d) -> __m512d {
let mov = a.as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovapd))]
pub unsafe fn _mm256_mask_mov_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
let mov = a.as_f64x4();
transmute(simd_select_bitmask(k, mov, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovapd))]
pub unsafe fn _mm256_maskz_mov_pd(k: __mmask8, a: __m256d) -> __m256d {
let mov = a.as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovapd))]
pub unsafe fn _mm_mask_mov_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
let mov = a.as_f64x2();
transmute(simd_select_bitmask(k, mov, src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovapd))]
pub unsafe fn _mm_maskz_mov_pd(k: __mmask8, a: __m128d) -> __m128d {
let mov = a.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpaddd))]
pub unsafe fn _mm512_add_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_add(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpaddd))]
pub unsafe fn _mm512_mask_add_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let add = _mm512_add_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, add, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpaddd))]
pub unsafe fn _mm512_maskz_add_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let add = _mm512_add_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, add, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpaddd))]
pub unsafe fn _mm256_mask_add_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let add = _mm256_add_epi32(a, b).as_i32x8();
transmute(simd_select_bitmask(k, add, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpaddd))]
pub unsafe fn _mm256_maskz_add_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let add = _mm256_add_epi32(a, b).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, add, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpaddd))]
pub unsafe fn _mm_mask_add_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let add = _mm_add_epi32(a, b).as_i32x4();
transmute(simd_select_bitmask(k, add, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpaddd))]
pub unsafe fn _mm_maskz_add_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let add = _mm_add_epi32(a, b).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, add, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpaddq))]
pub unsafe fn _mm512_add_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_add(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpaddq))]
pub unsafe fn _mm512_mask_add_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let add = _mm512_add_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, add, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpaddq))]
pub unsafe fn _mm512_maskz_add_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let add = _mm512_add_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, add, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpaddq))]
pub unsafe fn _mm256_mask_add_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let add = _mm256_add_epi64(a, b).as_i64x4();
transmute(simd_select_bitmask(k, add, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpaddq))]
pub unsafe fn _mm256_maskz_add_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let add = _mm256_add_epi64(a, b).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, add, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpaddq))]
pub unsafe fn _mm_mask_add_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let add = _mm_add_epi64(a, b).as_i64x2();
transmute(simd_select_bitmask(k, add, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpaddq))]
pub unsafe fn _mm_maskz_add_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let add = _mm_add_epi64(a, b).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, add, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddps))]
pub unsafe fn _mm512_add_ps(a: __m512, b: __m512) -> __m512 {
transmute(simd_add(a.as_f32x16(), b.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddps))]
pub unsafe fn _mm512_mask_add_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
let add = _mm512_add_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, add, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddps))]
pub unsafe fn _mm512_maskz_add_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
let add = _mm512_add_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, add, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vaddps))]
pub unsafe fn _mm256_mask_add_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
let add = _mm256_add_ps(a, b).as_f32x8();
transmute(simd_select_bitmask(k, add, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vaddps))]
pub unsafe fn _mm256_maskz_add_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
let add = _mm256_add_ps(a, b).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, add, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vaddps))]
pub unsafe fn _mm_mask_add_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let add = _mm_add_ps(a, b).as_f32x4();
transmute(simd_select_bitmask(k, add, src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vaddps))]
pub unsafe fn _mm_maskz_add_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let add = _mm_add_ps(a, b).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, add, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddpd))]
pub unsafe fn _mm512_add_pd(a: __m512d, b: __m512d) -> __m512d {
transmute(simd_add(a.as_f64x8(), b.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddpd))]
pub unsafe fn _mm512_mask_add_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let add = _mm512_add_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, add, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddpd))]
pub unsafe fn _mm512_maskz_add_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let add = _mm512_add_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, add, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vaddpd))]
pub unsafe fn _mm256_mask_add_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let add = _mm256_add_pd(a, b).as_f64x4();
transmute(simd_select_bitmask(k, add, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vaddpd))]
pub unsafe fn _mm256_maskz_add_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let add = _mm256_add_pd(a, b).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, add, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vaddpd))]
pub unsafe fn _mm_mask_add_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let add = _mm_add_pd(a, b).as_f64x2();
transmute(simd_select_bitmask(k, add, src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vaddpd))]
pub unsafe fn _mm_maskz_add_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let add = _mm_add_pd(a, b).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, add, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsubd))]
pub unsafe fn _mm512_sub_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_sub(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsubd))]
pub unsafe fn _mm512_mask_sub_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let sub = _mm512_sub_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, sub, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsubd))]
pub unsafe fn _mm512_maskz_sub_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let sub = _mm512_sub_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, sub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsubd))]
pub unsafe fn _mm256_mask_sub_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let sub = _mm256_sub_epi32(a, b).as_i32x8();
transmute(simd_select_bitmask(k, sub, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsubd))]
pub unsafe fn _mm256_maskz_sub_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let sub = _mm256_sub_epi32(a, b).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, sub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsubd))]
pub unsafe fn _mm_mask_sub_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let sub = _mm_sub_epi32(a, b).as_i32x4();
transmute(simd_select_bitmask(k, sub, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsubd))]
pub unsafe fn _mm_maskz_sub_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let sub = _mm_sub_epi32(a, b).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, sub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsubq))]
pub unsafe fn _mm512_sub_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_sub(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsubq))]
pub unsafe fn _mm512_mask_sub_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let sub = _mm512_sub_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, sub, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsubq))]
pub unsafe fn _mm512_maskz_sub_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let sub = _mm512_sub_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, sub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsubq))]
pub unsafe fn _mm256_mask_sub_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let sub = _mm256_sub_epi64(a, b).as_i64x4();
transmute(simd_select_bitmask(k, sub, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsubq))]
pub unsafe fn _mm256_maskz_sub_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let sub = _mm256_sub_epi64(a, b).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, sub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsubq))]
pub unsafe fn _mm_mask_sub_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let sub = _mm_sub_epi64(a, b).as_i64x2();
transmute(simd_select_bitmask(k, sub, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsubq))]
pub unsafe fn _mm_maskz_sub_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let sub = _mm_sub_epi64(a, b).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, sub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubps))]
pub unsafe fn _mm512_sub_ps(a: __m512, b: __m512) -> __m512 {
transmute(simd_sub(a.as_f32x16(), b.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubps))]
pub unsafe fn _mm512_mask_sub_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
let sub = _mm512_sub_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, sub, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubps))]
pub unsafe fn _mm512_maskz_sub_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
let sub = _mm512_sub_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, sub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vsubps))]
pub unsafe fn _mm256_mask_sub_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
let sub = _mm256_sub_ps(a, b).as_f32x8();
transmute(simd_select_bitmask(k, sub, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vsubps))]
pub unsafe fn _mm256_maskz_sub_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
let sub = _mm256_sub_ps(a, b).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, sub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vsubps))]
pub unsafe fn _mm_mask_sub_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let sub = _mm_sub_ps(a, b).as_f32x4();
transmute(simd_select_bitmask(k, sub, src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vsubps))]
pub unsafe fn _mm_maskz_sub_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let sub = _mm_sub_ps(a, b).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, sub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubpd))]
pub unsafe fn _mm512_sub_pd(a: __m512d, b: __m512d) -> __m512d {
transmute(simd_sub(a.as_f64x8(), b.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubpd))]
pub unsafe fn _mm512_mask_sub_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let sub = _mm512_sub_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, sub, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubpd))]
pub unsafe fn _mm512_maskz_sub_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let sub = _mm512_sub_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, sub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vsubpd))]
pub unsafe fn _mm256_mask_sub_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let sub = _mm256_sub_pd(a, b).as_f64x4();
transmute(simd_select_bitmask(k, sub, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vsubpd))]
pub unsafe fn _mm256_maskz_sub_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let sub = _mm256_sub_pd(a, b).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, sub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vsubpd))]
pub unsafe fn _mm_mask_sub_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let sub = _mm_sub_pd(a, b).as_f64x2();
transmute(simd_select_bitmask(k, sub, src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vsubpd))]
pub unsafe fn _mm_maskz_sub_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let sub = _mm_sub_pd(a, b).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, sub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmuldq))]
pub unsafe fn _mm512_mul_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(vpmuldq(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmuldq))]
pub unsafe fn _mm512_mask_mul_epi32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let mul = _mm512_mul_epi32(a, b).as_i64x8();
transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmuldq))]
pub unsafe fn _mm512_maskz_mul_epi32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let mul = _mm512_mul_epi32(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmuldq))]
pub unsafe fn _mm256_mask_mul_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let mul = _mm256_mul_epi32(a, b).as_i64x4();
transmute(simd_select_bitmask(k, mul, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmuldq))]
pub unsafe fn _mm256_maskz_mul_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let mul = _mm256_mul_epi32(a, b).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmuldq))]
pub unsafe fn _mm_mask_mul_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let mul = _mm_mul_epi32(a, b).as_i64x2();
transmute(simd_select_bitmask(k, mul, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmuldq))]
pub unsafe fn _mm_maskz_mul_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let mul = _mm_mul_epi32(a, b).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmulld))]
pub unsafe fn _mm512_mullo_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_mul(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmulld))]
pub unsafe fn _mm512_mask_mullo_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
) -> __m512i {
let mul = _mm512_mullo_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, mul, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmulld))]
pub unsafe fn _mm512_maskz_mullo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let mul = _mm512_mullo_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmulld))]
pub unsafe fn _mm256_mask_mullo_epi32(
src: __m256i,
k: __mmask8,
a: __m256i,
b: __m256i,
) -> __m256i {
let mul = _mm256_mullo_epi32(a, b).as_i32x8();
transmute(simd_select_bitmask(k, mul, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmulld))]
pub unsafe fn _mm256_maskz_mullo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let mul = _mm256_mullo_epi32(a, b).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmulld))]
pub unsafe fn _mm_mask_mullo_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let mul = _mm_mullo_epi32(a, b).as_i32x4();
transmute(simd_select_bitmask(k, mul, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmulld))]
pub unsafe fn _mm_maskz_mullo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let mul = _mm_mullo_epi32(a, b).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mullox_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_mul(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_mullox_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
b: __m512i,
) -> __m512i {
let mul = _mm512_mullox_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, mul, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmuludq))]
pub unsafe fn _mm512_mul_epu32(a: __m512i, b: __m512i) -> __m512i {
transmute(vpmuludq(a.as_u32x16(), b.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmuludq))]
pub unsafe fn _mm512_mask_mul_epu32(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let mul = _mm512_mul_epu32(a, b).as_u64x8();
transmute(simd_select_bitmask(k, mul, src.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmuludq))]
pub unsafe fn _mm512_maskz_mul_epu32(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let mul = _mm512_mul_epu32(a, b).as_u64x8();
let zero = _mm512_setzero_si512().as_u64x8();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmuludq))]
pub unsafe fn _mm256_mask_mul_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let mul = _mm256_mul_epu32(a, b).as_u64x4();
transmute(simd_select_bitmask(k, mul, src.as_u64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmuludq))]
pub unsafe fn _mm256_maskz_mul_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let mul = _mm256_mul_epu32(a, b).as_u64x4();
let zero = _mm256_setzero_si256().as_u64x4();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmuludq))]
pub unsafe fn _mm_mask_mul_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let mul = _mm_mul_epu32(a, b).as_u64x2();
transmute(simd_select_bitmask(k, mul, src.as_u64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmuludq))]
pub unsafe fn _mm_maskz_mul_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let mul = _mm_mul_epu32(a, b).as_u64x2();
let zero = _mm_setzero_si128().as_u64x2();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulps))]
pub unsafe fn _mm512_mul_ps(a: __m512, b: __m512) -> __m512 {
transmute(simd_mul(a.as_f32x16(), b.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulps))]
pub unsafe fn _mm512_mask_mul_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
let mul = _mm512_mul_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, mul, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulps))]
pub unsafe fn _mm512_maskz_mul_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
let mul = _mm512_mul_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmulps))]
pub unsafe fn _mm256_mask_mul_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
let mul = _mm256_mul_ps(a, b).as_f32x8();
transmute(simd_select_bitmask(k, mul, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmulps))]
pub unsafe fn _mm256_maskz_mul_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
let mul = _mm256_mul_ps(a, b).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmulps))]
pub unsafe fn _mm_mask_mul_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let mul = _mm_mul_ps(a, b).as_f32x4();
transmute(simd_select_bitmask(k, mul, src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmulps))]
pub unsafe fn _mm_maskz_mul_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let mul = _mm_mul_ps(a, b).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulpd))]
pub unsafe fn _mm512_mul_pd(a: __m512d, b: __m512d) -> __m512d {
transmute(simd_mul(a.as_f64x8(), b.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulpd))]
pub unsafe fn _mm512_mask_mul_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let mul = _mm512_mul_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, mul, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulpd))]
pub unsafe fn _mm512_maskz_mul_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let mul = _mm512_mul_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmulpd))]
pub unsafe fn _mm256_mask_mul_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let mul = _mm256_mul_pd(a, b).as_f64x4();
transmute(simd_select_bitmask(k, mul, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmulpd))]
pub unsafe fn _mm256_maskz_mul_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let mul = _mm256_mul_pd(a, b).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmulpd))]
pub unsafe fn _mm_mask_mul_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let mul = _mm_mul_pd(a, b).as_f64x2();
transmute(simd_select_bitmask(k, mul, src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmulpd))]
pub unsafe fn _mm_maskz_mul_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let mul = _mm_mul_pd(a, b).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, mul, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivps))]
pub unsafe fn _mm512_div_ps(a: __m512, b: __m512) -> __m512 {
transmute(simd_div(a.as_f32x16(), b.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivps))]
pub unsafe fn _mm512_mask_div_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
let div = _mm512_div_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, div, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivps))]
pub unsafe fn _mm512_maskz_div_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
let div = _mm512_div_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, div, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vdivps))]
pub unsafe fn _mm256_mask_div_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
let div = _mm256_div_ps(a, b).as_f32x8();
transmute(simd_select_bitmask(k, div, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vdivps))]
pub unsafe fn _mm256_maskz_div_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
let div = _mm256_div_ps(a, b).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, div, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vdivps))]
pub unsafe fn _mm_mask_div_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let div = _mm_div_ps(a, b).as_f32x4();
transmute(simd_select_bitmask(k, div, src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vdivps))]
pub unsafe fn _mm_maskz_div_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let div = _mm_div_ps(a, b).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, div, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivpd))]
pub unsafe fn _mm512_div_pd(a: __m512d, b: __m512d) -> __m512d {
transmute(simd_div(a.as_f64x8(), b.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivpd))]
pub unsafe fn _mm512_mask_div_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let div = _mm512_div_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, div, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivpd))]
pub unsafe fn _mm512_maskz_div_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let div = _mm512_div_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, div, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vdivpd))]
pub unsafe fn _mm256_mask_div_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let div = _mm256_div_pd(a, b).as_f64x4();
transmute(simd_select_bitmask(k, div, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vdivpd))]
pub unsafe fn _mm256_maskz_div_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let div = _mm256_div_pd(a, b).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, div, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vdivpd))]
pub unsafe fn _mm_mask_div_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let div = _mm_div_pd(a, b).as_f64x2();
transmute(simd_select_bitmask(k, div, src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vdivpd))]
pub unsafe fn _mm_maskz_div_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let div = _mm_div_pd(a, b).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, div, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxsd))]
pub unsafe fn _mm512_max_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(vpmaxsd(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxsd))]
pub unsafe fn _mm512_mask_max_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_max_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, max, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxsd))]
pub unsafe fn _mm512_maskz_max_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_max_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxsd))]
pub unsafe fn _mm256_mask_max_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let max = _mm256_max_epi32(a, b).as_i32x8();
transmute(simd_select_bitmask(k, max, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxsd))]
pub unsafe fn _mm256_maskz_max_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let max = _mm256_max_epi32(a, b).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxsd))]
pub unsafe fn _mm_mask_max_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let max = _mm_max_epi32(a, b).as_i32x4();
transmute(simd_select_bitmask(k, max, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxsd))]
pub unsafe fn _mm_maskz_max_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let max = _mm_max_epi32(a, b).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxsq))]
pub unsafe fn _mm512_max_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(vpmaxsq(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxsq))]
pub unsafe fn _mm512_mask_max_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_max_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, max, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxsq))]
pub unsafe fn _mm512_maskz_max_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_max_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxsq))]
pub unsafe fn _mm256_max_epi64(a: __m256i, b: __m256i) -> __m256i {
transmute(vpmaxsq256(a.as_i64x4(), b.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxsq))]
pub unsafe fn _mm256_mask_max_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let max = _mm256_max_epi64(a, b).as_i64x4();
transmute(simd_select_bitmask(k, max, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxsq))]
pub unsafe fn _mm256_maskz_max_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let max = _mm256_max_epi64(a, b).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxsq))]
pub unsafe fn _mm_max_epi64(a: __m128i, b: __m128i) -> __m128i {
transmute(vpmaxsq128(a.as_i64x2(), b.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxsq))]
pub unsafe fn _mm_mask_max_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let max = _mm_max_epi64(a, b).as_i64x2();
transmute(simd_select_bitmask(k, max, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxsq))]
pub unsafe fn _mm_maskz_max_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let max = _mm_max_epi64(a, b).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxps))]
pub unsafe fn _mm512_max_ps(a: __m512, b: __m512) -> __m512 {
transmute(vmaxps(
a.as_f32x16(),
b.as_f32x16(),
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxps))]
pub unsafe fn _mm512_mask_max_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
let max = _mm512_max_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, max, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxps))]
pub unsafe fn _mm512_maskz_max_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
let max = _mm512_max_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmaxps))]
pub unsafe fn _mm256_mask_max_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
let max = _mm256_max_ps(a, b).as_f32x8();
transmute(simd_select_bitmask(k, max, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmaxps))]
pub unsafe fn _mm256_maskz_max_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
let max = _mm256_max_ps(a, b).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmaxps))]
pub unsafe fn _mm_mask_max_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let max = _mm_max_ps(a, b).as_f32x4();
transmute(simd_select_bitmask(k, max, src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmaxps))]
pub unsafe fn _mm_maskz_max_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let max = _mm_max_ps(a, b).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxpd))]
pub unsafe fn _mm512_max_pd(a: __m512d, b: __m512d) -> __m512d {
transmute(vmaxpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxpd))]
pub unsafe fn _mm512_mask_max_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let max = _mm512_max_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, max, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxpd))]
pub unsafe fn _mm512_maskz_max_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let max = _mm512_max_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmaxpd))]
pub unsafe fn _mm256_mask_max_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let max = _mm256_max_pd(a, b).as_f64x4();
transmute(simd_select_bitmask(k, max, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmaxpd))]
pub unsafe fn _mm256_maskz_max_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let max = _mm256_max_pd(a, b).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmaxpd))]
pub unsafe fn _mm_mask_max_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let max = _mm_max_pd(a, b).as_f64x2();
transmute(simd_select_bitmask(k, max, src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmaxpd))]
pub unsafe fn _mm_maskz_max_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let max = _mm_max_pd(a, b).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxud))]
pub unsafe fn _mm512_max_epu32(a: __m512i, b: __m512i) -> __m512i {
transmute(vpmaxud(a.as_u32x16(), b.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxud))]
pub unsafe fn _mm512_mask_max_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_max_epu32(a, b).as_u32x16();
transmute(simd_select_bitmask(k, max, src.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxud))]
pub unsafe fn _mm512_maskz_max_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_max_epu32(a, b).as_u32x16();
let zero = _mm512_setzero_si512().as_u32x16();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxud))]
pub unsafe fn _mm256_mask_max_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let max = _mm256_max_epu32(a, b).as_u32x8();
transmute(simd_select_bitmask(k, max, src.as_u32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxud))]
pub unsafe fn _mm256_maskz_max_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let max = _mm256_max_epu32(a, b).as_u32x8();
let zero = _mm256_setzero_si256().as_u32x8();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxud))]
pub unsafe fn _mm_mask_max_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let max = _mm_max_epu32(a, b).as_u32x4();
transmute(simd_select_bitmask(k, max, src.as_u32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxud))]
pub unsafe fn _mm_maskz_max_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let max = _mm_max_epu32(a, b).as_u32x4();
let zero = _mm_setzero_si128().as_u32x4();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxuq))]
pub unsafe fn _mm512_max_epu64(a: __m512i, b: __m512i) -> __m512i {
transmute(vpmaxuq(a.as_u64x8(), b.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxuq))]
pub unsafe fn _mm512_mask_max_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_max_epu64(a, b).as_u64x8();
transmute(simd_select_bitmask(k, max, src.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmaxuq))]
pub unsafe fn _mm512_maskz_max_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let max = _mm512_max_epu64(a, b).as_u64x8();
let zero = _mm512_setzero_si512().as_u64x8();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxuq))]
pub unsafe fn _mm256_max_epu64(a: __m256i, b: __m256i) -> __m256i {
transmute(vpmaxuq256(a.as_u64x4(), b.as_u64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxuq))]
pub unsafe fn _mm256_mask_max_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let max = _mm256_max_epu64(a, b).as_u64x4();
transmute(simd_select_bitmask(k, max, src.as_u64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxuq))]
pub unsafe fn _mm256_maskz_max_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let max = _mm256_max_epu64(a, b).as_u64x4();
let zero = _mm256_setzero_si256().as_u64x4();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxuq))]
pub unsafe fn _mm_max_epu64(a: __m128i, b: __m128i) -> __m128i {
transmute(vpmaxuq128(a.as_u64x2(), b.as_u64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxuq))]
pub unsafe fn _mm_mask_max_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let max = _mm_max_epu64(a, b).as_u64x2();
transmute(simd_select_bitmask(k, max, src.as_u64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmaxuq))]
pub unsafe fn _mm_maskz_max_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let max = _mm_max_epu64(a, b).as_u64x2();
let zero = _mm_setzero_si128().as_u64x2();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminsd))]
pub unsafe fn _mm512_min_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(vpminsd(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminsd))]
pub unsafe fn _mm512_mask_min_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let min = _mm512_min_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, min, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminsd))]
pub unsafe fn _mm512_maskz_min_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let min = _mm512_min_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, min, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpminsd))]
pub unsafe fn _mm256_mask_min_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let min = _mm256_min_epi32(a, b).as_i32x8();
transmute(simd_select_bitmask(k, min, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpminsd))]
pub unsafe fn _mm256_maskz_min_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let min = _mm256_min_epi32(a, b).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, min, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpminsd))]
pub unsafe fn _mm_mask_min_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let min = _mm_min_epi32(a, b).as_i32x4();
transmute(simd_select_bitmask(k, min, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpminsd))]
pub unsafe fn _mm_maskz_min_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let min = _mm_min_epi32(a, b).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, min, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminsq))]
pub unsafe fn _mm512_min_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(vpminsq(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminsq))]
pub unsafe fn _mm512_mask_min_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let min = _mm512_min_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, min, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminsq))]
pub unsafe fn _mm512_maskz_min_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let min = _mm512_min_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, min, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpminsq))]
pub unsafe fn _mm256_min_epi64(a: __m256i, b: __m256i) -> __m256i {
transmute(vpminsq256(a.as_i64x4(), b.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpminsq))]
pub unsafe fn _mm256_mask_min_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let min = _mm256_min_epi64(a, b).as_i64x4();
transmute(simd_select_bitmask(k, min, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpminsq))]
pub unsafe fn _mm256_maskz_min_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let min = _mm256_min_epi64(a, b).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, min, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminps))]
pub unsafe fn _mm512_min_ps(a: __m512, b: __m512) -> __m512 {
transmute(vminps(
a.as_f32x16(),
b.as_f32x16(),
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminps))]
pub unsafe fn _mm512_mask_min_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
let min = _mm512_min_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, min, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminps))]
pub unsafe fn _mm512_maskz_min_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
let min = _mm512_min_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, min, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vminps))]
pub unsafe fn _mm256_mask_min_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
let min = _mm256_min_ps(a, b).as_f32x8();
transmute(simd_select_bitmask(k, min, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vminps))]
pub unsafe fn _mm256_maskz_min_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
let min = _mm256_min_ps(a, b).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, min, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vminps))]
pub unsafe fn _mm_mask_min_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let min = _mm_min_ps(a, b).as_f32x4();
transmute(simd_select_bitmask(k, min, src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vminps))]
pub unsafe fn _mm_maskz_min_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let min = _mm_min_ps(a, b).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, min, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminpd))]
pub unsafe fn _mm512_min_pd(a: __m512d, b: __m512d) -> __m512d {
transmute(vminpd(a.as_f64x8(), b.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminpd))]
pub unsafe fn _mm512_mask_min_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let min = _mm512_min_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, min, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminpd))]
pub unsafe fn _mm512_maskz_min_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let min = _mm512_min_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, min, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vminpd))]
pub unsafe fn _mm256_mask_min_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let min = _mm256_min_pd(a, b).as_f64x4();
transmute(simd_select_bitmask(k, min, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vminpd))]
pub unsafe fn _mm256_maskz_min_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let min = _mm256_min_pd(a, b).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, min, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vminpd))]
pub unsafe fn _mm_mask_min_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let min = _mm_min_pd(a, b).as_f64x2();
transmute(simd_select_bitmask(k, min, src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vminpd))]
pub unsafe fn _mm_maskz_min_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let min = _mm_min_pd(a, b).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, min, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminud))]
pub unsafe fn _mm512_min_epu32(a: __m512i, b: __m512i) -> __m512i {
transmute(vpminud(a.as_u32x16(), b.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminud))]
pub unsafe fn _mm512_mask_min_epu32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let min = _mm512_min_epu32(a, b).as_u32x16();
transmute(simd_select_bitmask(k, min, src.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminud))]
pub unsafe fn _mm512_maskz_min_epu32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let min = _mm512_min_epu32(a, b).as_u32x16();
let zero = _mm512_setzero_si512().as_u32x16();
transmute(simd_select_bitmask(k, min, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpminud))]
pub unsafe fn _mm256_mask_min_epu32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let min = _mm256_min_epu32(a, b).as_u32x8();
transmute(simd_select_bitmask(k, min, src.as_u32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpminud))]
pub unsafe fn _mm256_maskz_min_epu32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let min = _mm256_min_epu32(a, b).as_u32x8();
let zero = _mm256_setzero_si256().as_u32x8();
transmute(simd_select_bitmask(k, min, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpminud))]
pub unsafe fn _mm_mask_min_epu32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let min = _mm_min_epu32(a, b).as_u32x4();
transmute(simd_select_bitmask(k, min, src.as_u32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpminud))]
pub unsafe fn _mm_maskz_min_epu32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let min = _mm_min_epu32(a, b).as_u32x4();
let zero = _mm_setzero_si128().as_u32x4();
transmute(simd_select_bitmask(k, min, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminuq))]
pub unsafe fn _mm512_min_epu64(a: __m512i, b: __m512i) -> __m512i {
transmute(vpminuq(a.as_u64x8(), b.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminuq))]
pub unsafe fn _mm512_mask_min_epu64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let min = _mm512_min_epu64(a, b).as_u64x8();
transmute(simd_select_bitmask(k, min, src.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpminuq))]
pub unsafe fn _mm512_maskz_min_epu64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let min = _mm512_min_epu64(a, b).as_u64x8();
let zero = _mm512_setzero_si512().as_u64x8();
transmute(simd_select_bitmask(k, min, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpminuq))]
pub unsafe fn _mm256_min_epu64(a: __m256i, b: __m256i) -> __m256i {
transmute(vpminuq256(a.as_u64x4(), b.as_u64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpminuq))]
pub unsafe fn _mm256_mask_min_epu64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let min = _mm256_min_epu64(a, b).as_u64x4();
transmute(simd_select_bitmask(k, min, src.as_u64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpminuq))]
pub unsafe fn _mm256_maskz_min_epu64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let min = _mm256_min_epu64(a, b).as_u64x4();
let zero = _mm256_setzero_si256().as_u64x4();
transmute(simd_select_bitmask(k, min, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpminuq))]
pub unsafe fn _mm_min_epu64(a: __m128i, b: __m128i) -> __m128i {
transmute(vpminuq128(a.as_u64x2(), b.as_u64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpminuq))]
pub unsafe fn _mm_mask_min_epu64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let min = _mm_min_epu64(a, b).as_u64x2();
transmute(simd_select_bitmask(k, min, src.as_u64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpminuq))]
pub unsafe fn _mm_maskz_min_epu64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let min = _mm_min_epu64(a, b).as_u64x2();
let zero = _mm_setzero_si128().as_u64x2();
transmute(simd_select_bitmask(k, min, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub unsafe fn _mm512_sqrt_ps(a: __m512) -> __m512 {
transmute(vsqrtps(a.as_f32x16(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub unsafe fn _mm512_mask_sqrt_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
let sqrt = _mm512_sqrt_ps(a).as_f32x16();
transmute(simd_select_bitmask(k, sqrt, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub unsafe fn _mm512_maskz_sqrt_ps(k: __mmask16, a: __m512) -> __m512 {
let sqrt = _mm512_sqrt_ps(a).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, sqrt, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub unsafe fn _mm256_mask_sqrt_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
let sqrt = _mm256_sqrt_ps(a).as_f32x8();
transmute(simd_select_bitmask(k, sqrt, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub unsafe fn _mm256_maskz_sqrt_ps(k: __mmask8, a: __m256) -> __m256 {
let sqrt = _mm256_sqrt_ps(a).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, sqrt, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub unsafe fn _mm_mask_sqrt_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
let sqrt = _mm_sqrt_ps(a).as_f32x4();
transmute(simd_select_bitmask(k, sqrt, src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vsqrtps))]
pub unsafe fn _mm_maskz_sqrt_ps(k: __mmask8, a: __m128) -> __m128 {
let sqrt = _mm_sqrt_ps(a).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, sqrt, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub unsafe fn _mm512_sqrt_pd(a: __m512d) -> __m512d {
transmute(vsqrtpd(a.as_f64x8(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub unsafe fn _mm512_mask_sqrt_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
let sqrt = _mm512_sqrt_pd(a).as_f64x8();
transmute(simd_select_bitmask(k, sqrt, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub unsafe fn _mm512_maskz_sqrt_pd(k: __mmask8, a: __m512d) -> __m512d {
let sqrt = _mm512_sqrt_pd(a).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, sqrt, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub unsafe fn _mm256_mask_sqrt_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
let sqrt = _mm256_sqrt_pd(a).as_f64x4();
transmute(simd_select_bitmask(k, sqrt, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub unsafe fn _mm256_maskz_sqrt_pd(k: __mmask8, a: __m256d) -> __m256d {
let sqrt = _mm256_sqrt_pd(a).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, sqrt, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub unsafe fn _mm_mask_sqrt_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
let sqrt = _mm_sqrt_pd(a).as_f64x2();
transmute(simd_select_bitmask(k, sqrt, src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vsqrtpd))]
pub unsafe fn _mm_maskz_sqrt_pd(k: __mmask8, a: __m128d) -> __m128d {
let sqrt = _mm_sqrt_pd(a).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, sqrt, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_fmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
transmute(vfmadd132ps(a.as_f32x16(), b.as_f32x16(), c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask_fmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
let fmadd = _mm512_fmadd_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmadd, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_maskz_fmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
let fmadd = _mm512_fmadd_ps(a, b, c).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, fmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask3_fmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
let fmadd = _mm512_fmadd_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmadd, c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm256_mask_fmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
let fmadd = _mm256_fmadd_ps(a, b, c).as_f32x8();
transmute(simd_select_bitmask(k, fmadd, a.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm256_maskz_fmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
let fmadd = _mm256_fmadd_ps(a, b, c).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, fmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm256_mask3_fmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
let fmadd = _mm256_fmadd_ps(a, b, c).as_f32x8();
transmute(simd_select_bitmask(k, fmadd, c.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm_mask_fmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
let fmadd = _mm_fmadd_ps(a, b, c).as_f32x4();
transmute(simd_select_bitmask(k, fmadd, a.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm_maskz_fmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
let fmadd = _mm_fmadd_ps(a, b, c).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, fmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm_mask3_fmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
let fmadd = _mm_fmadd_ps(a, b, c).as_f32x4();
transmute(simd_select_bitmask(k, fmadd, c.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_fmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
transmute(vfmadd132pd(a.as_f64x8(), b.as_f64x8(), c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask_fmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
let fmadd = _mm512_fmadd_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmadd, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_maskz_fmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let fmadd = _mm512_fmadd_pd(a, b, c).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, fmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask3_fmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
let fmadd = _mm512_fmadd_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmadd, c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm256_mask_fmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
let fmadd = _mm256_fmadd_pd(a, b, c).as_f64x4();
transmute(simd_select_bitmask(k, fmadd, a.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm256_maskz_fmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
let fmadd = _mm256_fmadd_pd(a, b, c).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, fmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm256_mask3_fmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
let fmadd = _mm256_fmadd_pd(a, b, c).as_f64x4();
transmute(simd_select_bitmask(k, fmadd, c.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm_mask_fmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
let fmadd = _mm_fmadd_pd(a, b, c).as_f64x2();
transmute(simd_select_bitmask(k, fmadd, a.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm_maskz_fmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let fmadd = _mm_fmadd_pd(a, b, c).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, fmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm_mask3_fmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
let fmadd = _mm_fmadd_pd(a, b, c).as_f64x2();
transmute(simd_select_bitmask(k, fmadd, c.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_fmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, c.as_f32x16());
transmute(vfmadd132ps(a.as_f32x16(), b.as_f32x16(), sub))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask_fmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
let fmsub = _mm512_fmsub_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmsub, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_maskz_fmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
let fmsub = _mm512_fmsub_ps(a, b, c).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, fmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask3_fmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
let fmsub = _mm512_fmsub_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmsub, c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsub))]
pub unsafe fn _mm256_mask_fmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
let fmsub = _mm256_fmsub_ps(a, b, c).as_f32x8();
transmute(simd_select_bitmask(k, fmsub, a.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsub))]
pub unsafe fn _mm256_maskz_fmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
let fmsub = _mm256_fmsub_ps(a, b, c).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, fmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsub))]
pub unsafe fn _mm256_mask3_fmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
let fmsub = _mm256_fmsub_ps(a, b, c).as_f32x8();
transmute(simd_select_bitmask(k, fmsub, c.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsub))]
pub unsafe fn _mm_mask_fmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
let fmsub = _mm_fmsub_ps(a, b, c).as_f32x4();
transmute(simd_select_bitmask(k, fmsub, a.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsub))]
pub unsafe fn _mm_maskz_fmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
let fmsub = _mm_fmsub_ps(a, b, c).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, fmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsub))]
pub unsafe fn _mm_mask3_fmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
let fmsub = _mm_fmsub_ps(a, b, c).as_f32x4();
transmute(simd_select_bitmask(k, fmsub, c.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_fmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, c.as_f64x8());
transmute(vfmadd132pd(a.as_f64x8(), b.as_f64x8(), sub))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask_fmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
let fmsub = _mm512_fmsub_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmsub, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_maskz_fmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let fmsub = _mm512_fmsub_pd(a, b, c).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, fmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask3_fmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
let fmsub = _mm512_fmsub_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmsub, c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsub))]
pub unsafe fn _mm256_mask_fmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
let fmsub = _mm256_fmsub_pd(a, b, c).as_f64x4();
transmute(simd_select_bitmask(k, fmsub, a.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsub))]
pub unsafe fn _mm256_maskz_fmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
let fmsub = _mm256_fmsub_pd(a, b, c).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, fmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsub))]
pub unsafe fn _mm256_mask3_fmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
let fmsub = _mm256_fmsub_pd(a, b, c).as_f64x4();
transmute(simd_select_bitmask(k, fmsub, c.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsub))]
pub unsafe fn _mm_mask_fmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
let fmsub = _mm_fmsub_pd(a, b, c).as_f64x2();
transmute(simd_select_bitmask(k, fmsub, a.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsub))]
pub unsafe fn _mm_maskz_fmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let fmsub = _mm_fmsub_pd(a, b, c).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, fmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsub))]
pub unsafe fn _mm_mask3_fmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
let fmsub = _mm_fmsub_pd(a, b, c).as_f64x2();
transmute(simd_select_bitmask(k, fmsub, c.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_fmaddsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
transmute(vfmaddsub213ps(
a.as_f32x16(),
b.as_f32x16(),
c.as_f32x16(),
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_mask_fmaddsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
let fmaddsub = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmaddsub, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_maskz_fmaddsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
let fmaddsub = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, fmaddsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_mask3_fmaddsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
let fmaddsub = _mm512_fmaddsub_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmaddsub, c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm256_mask_fmaddsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
let fmaddsub = _mm256_fmaddsub_ps(a, b, c).as_f32x8();
transmute(simd_select_bitmask(k, fmaddsub, a.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm256_maskz_fmaddsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
let fmaddsub = _mm256_fmaddsub_ps(a, b, c).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, fmaddsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm256_mask3_fmaddsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
let fmaddsub = _mm256_fmaddsub_ps(a, b, c).as_f32x8();
transmute(simd_select_bitmask(k, fmaddsub, c.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm_mask_fmaddsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
let fmaddsub = _mm_fmaddsub_ps(a, b, c).as_f32x4();
transmute(simd_select_bitmask(k, fmaddsub, a.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm_maskz_fmaddsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
let fmaddsub = _mm_fmaddsub_ps(a, b, c).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, fmaddsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm_mask3_fmaddsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
let fmaddsub = _mm_fmaddsub_ps(a, b, c).as_f32x4();
transmute(simd_select_bitmask(k, fmaddsub, c.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
transmute(vfmaddsub213pd(
a.as_f64x8(),
b.as_f64x8(),
c.as_f64x8(),
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_mask_fmaddsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
let fmaddsub = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmaddsub, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_maskz_fmaddsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let fmaddsub = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, fmaddsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_mask3_fmaddsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
let fmaddsub = _mm512_fmaddsub_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmaddsub, c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm256_mask_fmaddsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
let fmaddsub = _mm256_fmaddsub_pd(a, b, c).as_f64x4();
transmute(simd_select_bitmask(k, fmaddsub, a.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm256_maskz_fmaddsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
let fmaddsub = _mm256_fmaddsub_pd(a, b, c).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, fmaddsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm256_mask3_fmaddsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
let fmaddsub = _mm256_fmaddsub_pd(a, b, c).as_f64x4();
transmute(simd_select_bitmask(k, fmaddsub, c.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm_mask_fmaddsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
let fmaddsub = _mm_fmaddsub_pd(a, b, c).as_f64x2();
transmute(simd_select_bitmask(k, fmaddsub, a.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm_maskz_fmaddsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let fmaddsub = _mm_fmaddsub_pd(a, b, c).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, fmaddsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm_mask3_fmaddsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
let fmaddsub = _mm_fmaddsub_pd(a, b, c).as_f64x2();
transmute(simd_select_bitmask(k, fmaddsub, c.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_fmsubadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, c.as_f32x16());
transmute(vfmaddsub213ps(
a.as_f32x16(),
b.as_f32x16(),
sub,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_mask_fmsubadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
let fmsubadd = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmsubadd, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_maskz_fmsubadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
let fmsubadd = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, fmsubadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_mask3_fmsubadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
let fmsubadd = _mm512_fmsubadd_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fmsubadd, c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsubadd))]
pub unsafe fn _mm256_mask_fmsubadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
let fmsubadd = _mm256_fmsubadd_ps(a, b, c).as_f32x8();
transmute(simd_select_bitmask(k, fmsubadd, a.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsubadd))]
pub unsafe fn _mm256_maskz_fmsubadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
let fmsubadd = _mm256_fmsubadd_ps(a, b, c).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, fmsubadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsubadd))]
pub unsafe fn _mm256_mask3_fmsubadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
let fmsubadd = _mm256_fmsubadd_ps(a, b, c).as_f32x8();
transmute(simd_select_bitmask(k, fmsubadd, c.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsubadd))]
pub unsafe fn _mm_mask_fmsubadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
let fmsubadd = _mm_fmsubadd_ps(a, b, c).as_f32x4();
transmute(simd_select_bitmask(k, fmsubadd, a.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsubadd))]
pub unsafe fn _mm_maskz_fmsubadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
let fmsubadd = _mm_fmsubadd_ps(a, b, c).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, fmsubadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsubadd))]
pub unsafe fn _mm_mask3_fmsubadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
let fmsubadd = _mm_fmsubadd_ps(a, b, c).as_f32x4();
transmute(simd_select_bitmask(k, fmsubadd, c.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, c.as_f64x8());
transmute(vfmaddsub213pd(
a.as_f64x8(),
b.as_f64x8(),
sub,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_mask_fmsubadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
let fmsubadd = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmsubadd, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_maskz_fmsubadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let fmsubadd = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, fmsubadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub))]
pub unsafe fn _mm512_mask3_fmsubadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
let fmsubadd = _mm512_fmsubadd_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fmsubadd, c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsubadd))]
pub unsafe fn _mm256_mask_fmsubadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
let fmsubadd = _mm256_fmsubadd_pd(a, b, c).as_f64x4();
transmute(simd_select_bitmask(k, fmsubadd, a.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsubadd))]
pub unsafe fn _mm256_maskz_fmsubadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
let fmsubadd = _mm256_fmsubadd_pd(a, b, c).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, fmsubadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsubadd))]
pub unsafe fn _mm256_mask3_fmsubadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
let fmsubadd = _mm256_fmsubadd_pd(a, b, c).as_f64x4();
transmute(simd_select_bitmask(k, fmsubadd, c.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsubadd))]
pub unsafe fn _mm_mask_fmsubadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
let fmsubadd = _mm_fmsubadd_pd(a, b, c).as_f64x2();
transmute(simd_select_bitmask(k, fmsubadd, a.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsubadd))]
pub unsafe fn _mm_maskz_fmsubadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let fmsubadd = _mm_fmsubadd_pd(a, b, c).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, fmsubadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfmsubadd))]
pub unsafe fn _mm_mask3_fmsubadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
let fmsubadd = _mm_fmsubadd_pd(a, b, c).as_f64x2();
transmute(simd_select_bitmask(k, fmsubadd, c.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_fnmadd_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, a.as_f32x16());
transmute(vfmadd132ps(sub, b.as_f32x16(), c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask_fnmadd_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
let fnmadd = _mm512_fnmadd_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fnmadd, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_maskz_fnmadd_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
let fnmadd = _mm512_fnmadd_ps(a, b, c).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, fnmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask3_fnmadd_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
let fnmadd = _mm512_fnmadd_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fnmadd, c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmadd))]
pub unsafe fn _mm256_mask_fnmadd_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
let fnmadd = _mm256_fnmadd_ps(a, b, c).as_f32x8();
transmute(simd_select_bitmask(k, fnmadd, a.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmadd))]
pub unsafe fn _mm256_maskz_fnmadd_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
let fnmadd = _mm256_fnmadd_ps(a, b, c).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, fnmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmadd))]
pub unsafe fn _mm256_mask3_fnmadd_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
let fnmadd = _mm256_fnmadd_ps(a, b, c).as_f32x8();
transmute(simd_select_bitmask(k, fnmadd, c.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmadd))]
pub unsafe fn _mm_mask_fnmadd_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
let fnmadd = _mm_fnmadd_ps(a, b, c).as_f32x4();
transmute(simd_select_bitmask(k, fnmadd, a.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmadd))]
pub unsafe fn _mm_maskz_fnmadd_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
let fnmadd = _mm_fnmadd_ps(a, b, c).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, fnmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmadd))]
pub unsafe fn _mm_mask3_fnmadd_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
let fnmadd = _mm_fnmadd_ps(a, b, c).as_f32x4();
transmute(simd_select_bitmask(k, fnmadd, c.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, a.as_f64x8());
transmute(vfmadd132pd(sub, b.as_f64x8(), c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask_fnmadd_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
let fnmadd = _mm512_fnmadd_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fnmadd, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_maskz_fnmadd_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let fnmadd = _mm512_fnmadd_pd(a, b, c).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, fnmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask3_fnmadd_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
let fnmadd = _mm512_fnmadd_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fnmadd, c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmadd))]
pub unsafe fn _mm256_mask_fnmadd_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
let fnmadd = _mm256_fnmadd_pd(a, b, c).as_f64x4();
transmute(simd_select_bitmask(k, fnmadd, a.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmadd))]
pub unsafe fn _mm256_maskz_fnmadd_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
let fnmadd = _mm256_fnmadd_pd(a, b, c).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, fnmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmadd))]
pub unsafe fn _mm256_mask3_fnmadd_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
let fnmadd = _mm256_fnmadd_pd(a, b, c).as_f64x4();
transmute(simd_select_bitmask(k, fnmadd, c.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmadd))]
pub unsafe fn _mm_mask_fnmadd_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
let fnmadd = _mm_fnmadd_pd(a, b, c).as_f64x2();
transmute(simd_select_bitmask(k, fnmadd, a.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmadd))]
pub unsafe fn _mm_maskz_fnmadd_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let fnmadd = _mm_fnmadd_pd(a, b, c).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, fnmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmadd))]
pub unsafe fn _mm_mask3_fnmadd_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
let fnmadd = _mm_fnmadd_pd(a, b, c).as_f64x2();
transmute(simd_select_bitmask(k, fnmadd, c.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_fnmsub_ps(a: __m512, b: __m512, c: __m512) -> __m512 {
let zero: f32x16 = mem::zeroed();
let suba = simd_sub(zero, a.as_f32x16());
let subc = simd_sub(zero, c.as_f32x16());
transmute(vfmadd132ps(suba, b.as_f32x16(), subc))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask_fnmsub_ps(a: __m512, k: __mmask16, b: __m512, c: __m512) -> __m512 {
let fnmsub = _mm512_fnmsub_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fnmsub, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_maskz_fnmsub_ps(k: __mmask16, a: __m512, b: __m512, c: __m512) -> __m512 {
let fnmsub = _mm512_fnmsub_ps(a, b, c).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, fnmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask3_fnmsub_ps(a: __m512, b: __m512, c: __m512, k: __mmask16) -> __m512 {
let fnmsub = _mm512_fnmsub_ps(a, b, c).as_f32x16();
transmute(simd_select_bitmask(k, fnmsub, c.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmsub))]
pub unsafe fn _mm256_mask_fnmsub_ps(a: __m256, k: __mmask8, b: __m256, c: __m256) -> __m256 {
let fnmsub = _mm256_fnmsub_ps(a, b, c).as_f32x8();
transmute(simd_select_bitmask(k, fnmsub, a.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmsub))]
pub unsafe fn _mm256_maskz_fnmsub_ps(k: __mmask8, a: __m256, b: __m256, c: __m256) -> __m256 {
let fnmsub = _mm256_fnmsub_ps(a, b, c).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, fnmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmsub))]
pub unsafe fn _mm256_mask3_fnmsub_ps(a: __m256, b: __m256, c: __m256, k: __mmask8) -> __m256 {
let fnmsub = _mm256_fnmsub_ps(a, b, c).as_f32x8();
transmute(simd_select_bitmask(k, fnmsub, c.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmsub))]
pub unsafe fn _mm_mask_fnmsub_ps(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
let fnmsub = _mm_fnmsub_ps(a, b, c).as_f32x4();
transmute(simd_select_bitmask(k, fnmsub, a.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmsub))]
pub unsafe fn _mm_maskz_fnmsub_ps(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
let fnmsub = _mm_fnmsub_ps(a, b, c).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, fnmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmsub))]
pub unsafe fn _mm_mask3_fnmsub_ps(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
let fnmsub = _mm_fnmsub_ps(a, b, c).as_f32x4();
transmute(simd_select_bitmask(k, fnmsub, c.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let zero: f64x8 = mem::zeroed();
let suba = simd_sub(zero, a.as_f64x8());
let subc = simd_sub(zero, c.as_f64x8());
transmute(vfmadd132pd(suba, b.as_f64x8(), subc))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask_fnmsub_pd(a: __m512d, k: __mmask8, b: __m512d, c: __m512d) -> __m512d {
let fnmsub = _mm512_fnmsub_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fnmsub, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_maskz_fnmsub_pd(k: __mmask8, a: __m512d, b: __m512d, c: __m512d) -> __m512d {
let fnmsub = _mm512_fnmsub_pd(a, b, c).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, fnmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd))]
pub unsafe fn _mm512_mask3_fnmsub_pd(a: __m512d, b: __m512d, c: __m512d, k: __mmask8) -> __m512d {
let fnmsub = _mm512_fnmsub_pd(a, b, c).as_f64x8();
transmute(simd_select_bitmask(k, fnmsub, c.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmsub))]
pub unsafe fn _mm256_mask_fnmsub_pd(a: __m256d, k: __mmask8, b: __m256d, c: __m256d) -> __m256d {
let fnmsub = _mm256_fnmsub_pd(a, b, c).as_f64x4();
transmute(simd_select_bitmask(k, fnmsub, a.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmsub))]
pub unsafe fn _mm256_maskz_fnmsub_pd(k: __mmask8, a: __m256d, b: __m256d, c: __m256d) -> __m256d {
let fnmsub = _mm256_fnmsub_pd(a, b, c).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, fnmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmsub))]
pub unsafe fn _mm256_mask3_fnmsub_pd(a: __m256d, b: __m256d, c: __m256d, k: __mmask8) -> __m256d {
let fnmsub = _mm256_fnmsub_pd(a, b, c).as_f64x4();
transmute(simd_select_bitmask(k, fnmsub, c.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmsub))]
pub unsafe fn _mm_mask_fnmsub_pd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
let fnmsub = _mm_fnmsub_pd(a, b, c).as_f64x2();
transmute(simd_select_bitmask(k, fnmsub, a.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmsub))]
pub unsafe fn _mm_maskz_fnmsub_pd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let fnmsub = _mm_fnmsub_pd(a, b, c).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, fnmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfnmsub))]
pub unsafe fn _mm_mask3_fnmsub_pd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
let fnmsub = _mm_fnmsub_pd(a, b, c).as_f64x2();
transmute(simd_select_bitmask(k, fnmsub, c.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14ps))]
pub unsafe fn _mm512_rcp14_ps(a: __m512) -> __m512 {
transmute(vrcp14ps(
a.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
0b11111111_11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14ps))]
pub unsafe fn _mm512_mask_rcp14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
transmute(vrcp14ps(a.as_f32x16(), src.as_f32x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14ps))]
pub unsafe fn _mm512_maskz_rcp14_ps(k: __mmask16, a: __m512) -> __m512 {
transmute(vrcp14ps(a.as_f32x16(), _mm512_setzero_ps().as_f32x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrcp14ps))]
pub unsafe fn _mm256_rcp14_ps(a: __m256) -> __m256 {
transmute(vrcp14ps256(
a.as_f32x8(),
_mm256_setzero_ps().as_f32x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrcp14ps))]
pub unsafe fn _mm256_mask_rcp14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
transmute(vrcp14ps256(a.as_f32x8(), src.as_f32x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrcp14ps))]
pub unsafe fn _mm256_maskz_rcp14_ps(k: __mmask8, a: __m256) -> __m256 {
transmute(vrcp14ps256(a.as_f32x8(), _mm256_setzero_ps().as_f32x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrcp14ps))]
pub unsafe fn _mm_rcp14_ps(a: __m128) -> __m128 {
transmute(vrcp14ps128(
a.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
0b00001111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrcp14ps))]
pub unsafe fn _mm_mask_rcp14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
transmute(vrcp14ps128(a.as_f32x4(), src.as_f32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrcp14ps))]
pub unsafe fn _mm_maskz_rcp14_ps(k: __mmask8, a: __m128) -> __m128 {
transmute(vrcp14ps128(a.as_f32x4(), _mm_setzero_ps().as_f32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14pd))]
pub unsafe fn _mm512_rcp14_pd(a: __m512d) -> __m512d {
transmute(vrcp14pd(
a.as_f64x8(),
_mm512_setzero_pd().as_f64x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14pd))]
pub unsafe fn _mm512_mask_rcp14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
transmute(vrcp14pd(a.as_f64x8(), src.as_f64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14pd))]
pub unsafe fn _mm512_maskz_rcp14_pd(k: __mmask8, a: __m512d) -> __m512d {
transmute(vrcp14pd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrcp14pd))]
pub unsafe fn _mm256_rcp14_pd(a: __m256d) -> __m256d {
transmute(vrcp14pd256(
a.as_f64x4(),
_mm256_setzero_pd().as_f64x4(),
0b00001111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrcp14pd))]
pub unsafe fn _mm256_mask_rcp14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
transmute(vrcp14pd256(a.as_f64x4(), src.as_f64x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrcp14pd))]
pub unsafe fn _mm256_maskz_rcp14_pd(k: __mmask8, a: __m256d) -> __m256d {
transmute(vrcp14pd256(a.as_f64x4(), _mm256_setzero_pd().as_f64x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrcp14pd))]
pub unsafe fn _mm_rcp14_pd(a: __m128d) -> __m128d {
transmute(vrcp14pd128(
a.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b00000011,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrcp14pd))]
pub unsafe fn _mm_mask_rcp14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
transmute(vrcp14pd128(a.as_f64x2(), src.as_f64x2(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrcp14pd))]
pub unsafe fn _mm_maskz_rcp14_pd(k: __mmask8, a: __m128d) -> __m128d {
transmute(vrcp14pd128(a.as_f64x2(), _mm_setzero_pd().as_f64x2(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14ps))]
pub unsafe fn _mm512_rsqrt14_ps(a: __m512) -> __m512 {
transmute(vrsqrt14ps(
a.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
0b11111111_11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14ps))]
pub unsafe fn _mm512_mask_rsqrt14_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
transmute(vrsqrt14ps(a.as_f32x16(), src.as_f32x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14ps))]
pub unsafe fn _mm512_maskz_rsqrt14_ps(k: __mmask16, a: __m512) -> __m512 {
transmute(vrsqrt14ps(
a.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrsqrt14ps))]
pub unsafe fn _mm256_mask_rsqrt14_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
transmute(vrsqrt14ps256(a.as_f32x8(), src.as_f32x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrsqrt14ps))]
pub unsafe fn _mm256_maskz_rsqrt14_ps(k: __mmask8, a: __m256) -> __m256 {
transmute(vrsqrt14ps256(
a.as_f32x8(),
_mm256_setzero_ps().as_f32x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrsqrt14ps))]
pub unsafe fn _mm_mask_rsqrt14_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
transmute(vrsqrt14ps128(a.as_f32x4(), src.as_f32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrsqrt14ps))]
pub unsafe fn _mm_maskz_rsqrt14_ps(k: __mmask8, a: __m128) -> __m128 {
transmute(vrsqrt14ps128(a.as_f32x4(), _mm_setzero_ps().as_f32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14pd))]
pub unsafe fn _mm512_rsqrt14_pd(a: __m512d) -> __m512d {
transmute(vrsqrt14pd(
a.as_f64x8(),
_mm512_setzero_pd().as_f64x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14pd))]
pub unsafe fn _mm512_mask_rsqrt14_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
transmute(vrsqrt14pd(a.as_f64x8(), src.as_f64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14pd))]
pub unsafe fn _mm512_maskz_rsqrt14_pd(k: __mmask8, a: __m512d) -> __m512d {
transmute(vrsqrt14pd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrsqrt14pd))]
pub unsafe fn _mm256_mask_rsqrt14_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
transmute(vrsqrt14pd256(a.as_f64x4(), src.as_f64x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrsqrt14pd))]
pub unsafe fn _mm256_maskz_rsqrt14_pd(k: __mmask8, a: __m256d) -> __m256d {
transmute(vrsqrt14pd256(
a.as_f64x4(),
_mm256_setzero_pd().as_f64x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrsqrt14pd))]
pub unsafe fn _mm_mask_rsqrt14_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
transmute(vrsqrt14pd128(a.as_f64x2(), src.as_f64x2(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrsqrt14pd))]
pub unsafe fn _mm_maskz_rsqrt14_pd(k: __mmask8, a: __m128d) -> __m128d {
transmute(vrsqrt14pd128(a.as_f64x2(), _mm_setzero_pd().as_f64x2(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpps))]
pub unsafe fn _mm512_getexp_ps(a: __m512) -> __m512 {
transmute(vgetexpps(
a.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpps))]
pub unsafe fn _mm512_mask_getexp_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
transmute(vgetexpps(
a.as_f32x16(),
src.as_f32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpps))]
pub unsafe fn _mm512_maskz_getexp_ps(k: __mmask16, a: __m512) -> __m512 {
transmute(vgetexpps(
a.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetexpps))]
pub unsafe fn _mm256_getexp_ps(a: __m256) -> __m256 {
transmute(vgetexpps256(
a.as_f32x8(),
_mm256_setzero_ps().as_f32x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetexpps))]
pub unsafe fn _mm256_mask_getexp_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
transmute(vgetexpps256(a.as_f32x8(), src.as_f32x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetexpps))]
pub unsafe fn _mm256_maskz_getexp_ps(k: __mmask8, a: __m256) -> __m256 {
transmute(vgetexpps256(
a.as_f32x8(),
_mm256_setzero_ps().as_f32x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetexpps))]
pub unsafe fn _mm_getexp_ps(a: __m128) -> __m128 {
transmute(vgetexpps128(
a.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
0b00001111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetexpps))]
pub unsafe fn _mm_mask_getexp_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
transmute(vgetexpps128(a.as_f32x4(), src.as_f32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetexpps))]
pub unsafe fn _mm_maskz_getexp_ps(k: __mmask8, a: __m128) -> __m128 {
transmute(vgetexpps128(a.as_f32x4(), _mm_setzero_ps().as_f32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexppd))]
pub unsafe fn _mm512_getexp_pd(a: __m512d) -> __m512d {
transmute(vgetexppd(
a.as_f64x8(),
_mm512_setzero_pd().as_f64x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexppd))]
pub unsafe fn _mm512_mask_getexp_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
transmute(vgetexppd(
a.as_f64x8(),
src.as_f64x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexppd))]
pub unsafe fn _mm512_maskz_getexp_pd(k: __mmask8, a: __m512d) -> __m512d {
transmute(vgetexppd(
a.as_f64x8(),
_mm512_setzero_pd().as_f64x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetexppd))]
pub unsafe fn _mm256_getexp_pd(a: __m256d) -> __m256d {
transmute(vgetexppd256(
a.as_f64x4(),
_mm256_setzero_pd().as_f64x4(),
0b00001111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetexppd))]
pub unsafe fn _mm256_mask_getexp_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
transmute(vgetexppd256(a.as_f64x4(), src.as_f64x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetexppd))]
pub unsafe fn _mm256_maskz_getexp_pd(k: __mmask8, a: __m256d) -> __m256d {
transmute(vgetexppd256(
a.as_f64x4(),
_mm256_setzero_pd().as_f64x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetexppd))]
pub unsafe fn _mm_getexp_pd(a: __m128d) -> __m128d {
transmute(vgetexppd128(
a.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b00000011,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetexppd))]
pub unsafe fn _mm_mask_getexp_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
transmute(vgetexppd128(a.as_f64x2(), src.as_f64x2(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetexppd))]
pub unsafe fn _mm_maskz_getexp_pd(k: __mmask8, a: __m128d) -> __m128d {
transmute(vgetexppd128(a.as_f64x2(), _mm_setzero_pd().as_f64x2(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_roundscale_ps(a: __m512, imm8: i32) -> __m512 {
let a = a.as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
macro_rules! call {
($imm8:expr) => {
vrndscaleps(
a,
$imm8,
zero,
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_roundscale_ps(src: __m512, k: __mmask16, a: __m512, imm8: i32) -> __m512 {
let a = a.as_f32x16();
let src = src.as_f32x16();
macro_rules! call {
($imm8:expr) => {
vrndscaleps(a, $imm8, src, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_roundscale_ps(k: __mmask16, a: __m512, imm8: i32) -> __m512 {
let a = a.as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
macro_rules! call {
($imm8:expr) => {
vrndscaleps(a, $imm8, zero, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 250))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm256_roundscale_ps(a: __m256, imm8: i32) -> __m256 {
let a = a.as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
macro_rules! call {
($imm8:expr) => {
vrndscaleps256(a, $imm8, zero, 0b11111111)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_roundscale_ps(src: __m256, k: __mmask8, a: __m256, imm8: i32) -> __m256 {
let a = a.as_f32x8();
let src = src.as_f32x8();
macro_rules! call {
($imm8:expr) => {
vrndscaleps256(a, $imm8, src, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_roundscale_ps(k: __mmask8, a: __m256, imm8: i32) -> __m256 {
let a = a.as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
macro_rules! call {
($imm8:expr) => {
vrndscaleps256(a, $imm8, zero, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 250))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_roundscale_ps(a: __m128, imm8: i32) -> __m128 {
let a = a.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm8:expr) => {
vrndscaleps128(a, $imm8, zero, 0b00001111)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_mask_roundscale_ps(src: __m128, k: __mmask8, a: __m128, imm8: i32) -> __m128 {
let a = a.as_f32x4();
let src = src.as_f32x4();
macro_rules! call {
($imm8:expr) => {
vrndscaleps128(a, $imm8, src, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_maskz_roundscale_ps(k: __mmask8, a: __m128, imm8: i32) -> __m128 {
let a = a.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm8:expr) => {
vrndscaleps128(a, $imm8, zero, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_roundscale_pd(a: __m512d, imm8: i32) -> __m512d {
let a = a.as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
macro_rules! call {
($imm8:expr) => {
vrndscalepd(a, $imm8, zero, 0b11111111, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_roundscale_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
imm8: i32,
) -> __m512d {
let a = a.as_f64x8();
let src = src.as_f64x8();
macro_rules! call {
($imm8:expr) => {
vrndscalepd(a, $imm8, src, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_roundscale_pd(k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
let a = a.as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
macro_rules! call {
($imm8:expr) => {
vrndscalepd(a, $imm8, zero, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm256_roundscale_pd(a: __m256d, imm8: i32) -> __m256d {
let a = a.as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
macro_rules! call {
($imm8:expr) => {
vrndscalepd256(a, $imm8, zero, 0b00001111)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_roundscale_pd(
src: __m256d,
k: __mmask8,
a: __m256d,
imm8: i32,
) -> __m256d {
let a = a.as_f64x4();
let src = src.as_f64x4();
macro_rules! call {
($imm8:expr) => {
vrndscalepd256(a, $imm8, src, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_roundscale_pd(k: __mmask8, a: __m256d, imm8: i32) -> __m256d {
let a = a.as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
macro_rules! call {
($imm8:expr) => {
vrndscalepd256(a, $imm8, zero, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_roundscale_pd(a: __m128d, imm8: i32) -> __m128d {
let a = a.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm8:expr) => {
vrndscalepd128(a, $imm8, zero, 0b00000011)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_mask_roundscale_pd(src: __m128d, k: __mmask8, a: __m128d, imm8: i32) -> __m128d {
let a = a.as_f64x2();
let src = src.as_f64x2();
macro_rules! call {
($imm8:expr) => {
vrndscalepd128(a, $imm8, src, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_maskz_roundscale_pd(k: __mmask8, a: __m128d, imm8: i32) -> __m128d {
let a = a.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm8:expr) => {
vrndscalepd128(a, $imm8, zero, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefps))]
pub unsafe fn _mm512_scalef_ps(a: __m512, b: __m512) -> __m512 {
transmute(vscalefps(
a.as_f32x16(),
b.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefps))]
pub unsafe fn _mm512_mask_scalef_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
transmute(vscalefps(
a.as_f32x16(),
b.as_f32x16(),
src.as_f32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefps))]
pub unsafe fn _mm512_maskz_scalef_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
transmute(vscalefps(
a.as_f32x16(),
b.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vscalefps))]
pub unsafe fn _mm256_scalef_ps(a: __m256, b: __m256) -> __m256 {
transmute(vscalefps256(
a.as_f32x8(),
b.as_f32x8(),
_mm256_setzero_ps().as_f32x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vscalefps))]
pub unsafe fn _mm256_mask_scalef_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
transmute(vscalefps256(a.as_f32x8(), b.as_f32x8(), src.as_f32x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vscalefps))]
pub unsafe fn _mm256_maskz_scalef_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
transmute(vscalefps256(
a.as_f32x8(),
b.as_f32x8(),
_mm256_setzero_ps().as_f32x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vscalefps))]
pub unsafe fn _mm_scalef_ps(a: __m128, b: __m128) -> __m128 {
transmute(vscalefps128(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
0b00001111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vscalefps))]
pub unsafe fn _mm_mask_scalef_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vscalefps128(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vscalefps))]
pub unsafe fn _mm_maskz_scalef_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vscalefps128(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefpd))]
pub unsafe fn _mm512_scalef_pd(a: __m512d, b: __m512d) -> __m512d {
transmute(vscalefpd(
a.as_f64x8(),
b.as_f64x8(),
_mm512_setzero_pd().as_f64x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefpd))]
pub unsafe fn _mm512_mask_scalef_pd(src: __m512d, k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
transmute(vscalefpd(
a.as_f64x8(),
b.as_f64x8(),
src.as_f64x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefpd))]
pub unsafe fn _mm512_maskz_scalef_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
transmute(vscalefpd(
a.as_f64x8(),
b.as_f64x8(),
_mm512_setzero_pd().as_f64x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vscalefpd))]
pub unsafe fn _mm256_scalef_pd(a: __m256d, b: __m256d) -> __m256d {
transmute(vscalefpd256(
a.as_f64x4(),
b.as_f64x4(),
_mm256_setzero_pd().as_f64x4(),
0b00001111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vscalefpd))]
pub unsafe fn _mm256_mask_scalef_pd(src: __m256d, k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
transmute(vscalefpd256(a.as_f64x4(), b.as_f64x4(), src.as_f64x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vscalefpd))]
pub unsafe fn _mm256_maskz_scalef_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
transmute(vscalefpd256(
a.as_f64x4(),
b.as_f64x4(),
_mm256_setzero_pd().as_f64x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vscalefpd))]
pub unsafe fn _mm_scalef_pd(a: __m128d, b: __m128d) -> __m128d {
transmute(vscalefpd128(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b00000011,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vscalefpd))]
pub unsafe fn _mm_mask_scalef_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vscalefpd128(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vscalefpd))]
pub unsafe fn _mm_maskz_scalef_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vscalefpd128(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fixupimm_ps(a: __m512, b: __m512, c: __m512i, imm8: i32) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vfixupimmps(
a,
b,
c,
$imm8,
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fixupimm_ps(
a: __m512,
k: __mmask16,
b: __m512,
c: __m512i,
imm8: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vfixupimmps(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fixupimm_ps(
k: __mmask16,
a: __m512,
b: __m512,
c: __m512i,
imm8: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vfixupimmpsz(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_fixupimm_ps(a: __m256, b: __m256, c: __m256i, imm8: i32) -> __m256 {
let a = a.as_f32x8();
let b = b.as_f32x8();
let c = c.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vfixupimmps256(a, b, c, $imm8, 0b11111111)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm256_mask_fixupimm_ps(
a: __m256,
k: __mmask8,
b: __m256,
c: __m256i,
imm8: i32,
) -> __m256 {
let a = a.as_f32x8();
let b = b.as_f32x8();
let c = c.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vfixupimmps256(a, b, c, $imm8, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm256_maskz_fixupimm_ps(
k: __mmask8,
a: __m256,
b: __m256,
c: __m256i,
imm8: i32,
) -> __m256 {
let a = a.as_f32x8();
let b = b.as_f32x8();
let c = c.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vfixupimmpsz256(a, b, c, $imm8, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fixupimm_ps(a: __m128, b: __m128, c: __m128i, imm8: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let c = c.as_i32x4();
macro_rules! call {
($imm8:expr) => {
vfixupimmps128(a, b, c, $imm8, 0b00001111)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fixupimm_ps(
a: __m128,
k: __mmask8,
b: __m128,
c: __m128i,
imm8: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let c = c.as_i32x4();
macro_rules! call {
($imm8:expr) => {
vfixupimmps128(a, b, c, $imm8, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fixupimm_ps(
k: __mmask8,
a: __m128,
b: __m128,
c: __m128i,
imm8: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let c = c.as_i32x4();
macro_rules! call {
($imm8:expr) => {
vfixupimmpsz128(a, b, c, $imm8, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fixupimm_pd(a: __m512d, b: __m512d, c: __m512i, imm8: i32) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vfixupimmpd(a, b, c, $imm8, 0b11111111, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fixupimm_pd(
a: __m512d,
k: __mmask8,
b: __m512d,
c: __m512i,
imm8: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vfixupimmpd(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fixupimm_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
c: __m512i,
imm8: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vfixupimmpdz(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_fixupimm_pd(a: __m256d, b: __m256d, c: __m256i, imm8: i32) -> __m256d {
let a = a.as_f64x4();
let b = b.as_f64x4();
let c = c.as_i64x4();
macro_rules! call {
($imm8:expr) => {
vfixupimmpd256(a, b, c, $imm8, 0b00001111)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm256_mask_fixupimm_pd(
a: __m256d,
k: __mmask8,
b: __m256d,
c: __m256i,
imm8: i32,
) -> __m256d {
let a = a.as_f64x4();
let b = b.as_f64x4();
let c = c.as_i64x4();
macro_rules! call {
($imm8:expr) => {
vfixupimmpd256(a, b, c, $imm8, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm256_maskz_fixupimm_pd(
k: __mmask8,
a: __m256d,
b: __m256d,
c: __m256i,
imm8: i32,
) -> __m256d {
let a = a.as_f64x4();
let b = b.as_f64x4();
let c = c.as_i64x4();
macro_rules! call {
($imm8:expr) => {
vfixupimmpdz256(a, b, c, $imm8, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fixupimm_pd(a: __m128d, b: __m128d, c: __m128i, imm8: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let c = c.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vfixupimmpd128(a, b, c, $imm8, 0b00000011)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fixupimm_pd(
a: __m128d,
k: __mmask8,
b: __m128d,
c: __m128i,
imm8: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let c = c.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vfixupimmpd128(a, b, c, $imm8, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fixupimm_pd(
k: __mmask8,
a: __m128d,
b: __m128d,
c: __m128i,
imm8: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let c = c.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vfixupimmpdz128(a, b, c, $imm8, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_ternarylogic_epi32(a: __m512i, b: __m512i, c: __m512i, imm8: i32) -> __m512i {
let a = a.as_i32x16();
let b = b.as_i32x16();
let c = c.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpternlogd(a, b, c, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_ternarylogic_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
imm8: i32,
) -> __m512i {
let src = src.as_i32x16();
let a = a.as_i32x16();
let b = b.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpternlogd(src, a, b, $imm8)
};
}
let ternarylogic = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, ternarylogic, src))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_ternarylogic_epi32(
k: __mmask16,
a: __m512i,
b: __m512i,
c: __m512i,
imm8: i32,
) -> __m512i {
let a = a.as_i32x16();
let b = b.as_i32x16();
let c = c.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpternlogd(a, b, c, $imm8)
};
}
let ternarylogic = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, ternarylogic, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_ternarylogic_epi32(a: __m256i, b: __m256i, c: __m256i, imm8: i32) -> __m256i {
let a = a.as_i32x8();
let b = b.as_i32x8();
let c = c.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vpternlogd256(a, b, c, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm256_mask_ternarylogic_epi32(
src: __m256i,
k: __mmask8,
a: __m256i,
b: __m256i,
imm8: i32,
) -> __m256i {
let src = src.as_i32x8();
let a = a.as_i32x8();
let b = b.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vpternlogd256(src, a, b, $imm8)
};
}
let ternarylogic = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, ternarylogic, src))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm256_maskz_ternarylogic_epi32(
k: __mmask8,
a: __m256i,
b: __m256i,
c: __m256i,
imm8: i32,
) -> __m256i {
let a = a.as_i32x8();
let b = b.as_i32x8();
let c = c.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vpternlogd256(a, b, c, $imm8)
};
}
let ternarylogic = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, ternarylogic, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_ternarylogic_epi32(a: __m128i, b: __m128i, c: __m128i, imm8: i32) -> __m128i {
let a = a.as_i32x4();
let b = b.as_i32x4();
let c = c.as_i32x4();
macro_rules! call {
($imm8:expr) => {
vpternlogd128(a, b, c, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_ternarylogic_epi32(
src: __m128i,
k: __mmask8,
a: __m128i,
b: __m128i,
imm8: i32,
) -> __m128i {
let src = src.as_i32x4();
let a = a.as_i32x4();
let b = b.as_i32x4();
macro_rules! call {
($imm8:expr) => {
vpternlogd128(src, a, b, $imm8)
};
}
let ternarylogic = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, ternarylogic, src))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpternlogd, imm8 = 114))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_ternarylogic_epi32(
k: __mmask8,
a: __m128i,
b: __m128i,
c: __m128i,
imm8: i32,
) -> __m128i {
let a = a.as_i32x4();
let b = b.as_i32x4();
let c = c.as_i32x4();
macro_rules! call {
($imm8:expr) => {
vpternlogd128(a, b, c, $imm8)
};
}
let ternarylogic = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, ternarylogic, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_ternarylogic_epi64(a: __m512i, b: __m512i, c: __m512i, imm8: i32) -> __m512i {
let a = a.as_i64x8();
let b = b.as_i64x8();
let c = c.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpternlogq(a, b, c, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_ternarylogic_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
b: __m512i,
imm8: i32,
) -> __m512i {
let src = src.as_i64x8();
let a = a.as_i64x8();
let b = b.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpternlogq(src, a, b, $imm8)
};
}
let ternarylogic = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, ternarylogic, src))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_ternarylogic_epi64(
k: __mmask8,
a: __m512i,
b: __m512i,
c: __m512i,
imm8: i32,
) -> __m512i {
let a = a.as_i64x8();
let b = b.as_i64x8();
let c = c.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpternlogq(a, b, c, $imm8)
};
}
let ternarylogic = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, ternarylogic, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_ternarylogic_epi64(a: __m256i, b: __m256i, c: __m256i, imm8: i32) -> __m256i {
let a = a.as_i64x4();
let b = b.as_i64x4();
let c = c.as_i64x4();
macro_rules! call {
($imm8:expr) => {
vpternlogq256(a, b, c, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm256_mask_ternarylogic_epi64(
src: __m256i,
k: __mmask8,
a: __m256i,
b: __m256i,
imm8: i32,
) -> __m256i {
let src = src.as_i64x4();
let a = a.as_i64x4();
let b = b.as_i64x4();
macro_rules! call {
($imm8:expr) => {
vpternlogq256(src, a, b, $imm8)
};
}
let ternarylogic = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, ternarylogic, src))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm256_maskz_ternarylogic_epi64(
k: __mmask8,
a: __m256i,
b: __m256i,
c: __m256i,
imm8: i32,
) -> __m256i {
let a = a.as_i64x4();
let b = b.as_i64x4();
let c = c.as_i64x4();
macro_rules! call {
($imm8:expr) => {
vpternlogq256(a, b, c, $imm8)
};
}
let ternarylogic = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, ternarylogic, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_ternarylogic_epi64(a: __m128i, b: __m128i, c: __m128i, imm8: i32) -> __m128i {
let a = a.as_i64x2();
let b = b.as_i64x2();
let c = c.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vpternlogq128(a, b, c, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_ternarylogic_epi64(
src: __m128i,
k: __mmask8,
a: __m128i,
b: __m128i,
imm8: i32,
) -> __m128i {
let src = src.as_i64x2();
let a = a.as_i64x2();
let b = b.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vpternlogq128(src, a, b, $imm8)
};
}
let ternarylogic = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, ternarylogic, src))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpternlogq, imm8 = 114))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_ternarylogic_epi64(
k: __mmask8,
a: __m128i,
b: __m128i,
c: __m128i,
imm8: i32,
) -> __m128i {
let a = a.as_i64x2();
let b = b.as_i64x2();
let c = c.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vpternlogq128(a, b, c, $imm8)
};
}
let ternarylogic = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, ternarylogic, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
#[rustc_args_required_const(1, 2)]
pub unsafe fn _mm512_getmant_ps(
a: __m512,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m512 {
let a = a.as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantps(
a,
$imm2 << 2 | $imm4,
zero,
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm512_mask_getmant_ps(
src: __m512,
k: __mmask16,
a: __m512,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m512 {
let a = a.as_f32x16();
let src = src.as_f32x16();
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantps(a, $imm2 << 2 | $imm4, src, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm512_maskz_getmant_ps(
k: __mmask16,
a: __m512,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m512 {
let a = a.as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantps(a, $imm2 << 2 | $imm4, zero, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
#[rustc_args_required_const(1, 2)]
pub unsafe fn _mm256_getmant_ps(
a: __m256,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m256 {
let a = a.as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantps256(a, $imm2 << 2 | $imm4, zero, 0b11111111)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm256_mask_getmant_ps(
src: __m256,
k: __mmask8,
a: __m256,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m256 {
let a = a.as_f32x8();
let src = src.as_f32x8();
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantps256(a, $imm2 << 2 | $imm4, src, k)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm256_maskz_getmant_ps(
k: __mmask8,
a: __m256,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m256 {
let a = a.as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantps256(a, $imm2 << 2 | $imm4, zero, k)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
#[rustc_args_required_const(1, 2)]
pub unsafe fn _mm_getmant_ps(
a: __m128,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m128 {
let a = a.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantps128(a, $imm2 << 2 | $imm4, zero, 0b00001111)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm_mask_getmant_ps(
src: __m128,
k: __mmask8,
a: __m128,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m128 {
let a = a.as_f32x4();
let src = src.as_f32x4();
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantps128(a, $imm2 << 2 | $imm4, src, k)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm_maskz_getmant_ps(
k: __mmask8,
a: __m128,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m128 {
let a = a.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantps128(a, $imm2 << 2 | $imm4, zero, k)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
#[rustc_args_required_const(1, 2)]
pub unsafe fn _mm512_getmant_pd(
a: __m512d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m512d {
let a = a.as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantpd(
a,
$imm2 << 2 | $imm4,
zero,
0b11111111,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm512_mask_getmant_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m512d {
let a = a.as_f64x8();
let src = src.as_f64x8();
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantpd(a, $imm2 << 2 | $imm4, src, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm512_maskz_getmant_pd(
k: __mmask8,
a: __m512d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m512d {
let a = a.as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantpd(a, $imm2 << 2 | $imm4, zero, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
#[rustc_args_required_const(1, 2)]
pub unsafe fn _mm256_getmant_pd(
a: __m256d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m256d {
let a = a.as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantpd256(a, $imm2 << 2 | $imm4, zero, 0b00001111)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm256_mask_getmant_pd(
src: __m256d,
k: __mmask8,
a: __m256d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m256d {
let a = a.as_f64x4();
let src = src.as_f64x4();
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantpd256(a, $imm2 << 2 | $imm4, src, k)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm256_maskz_getmant_pd(
k: __mmask8,
a: __m256d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m256d {
let a = a.as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantpd256(a, $imm2 << 2 | $imm4, zero, k)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
#[rustc_args_required_const(1, 2)]
pub unsafe fn _mm_getmant_pd(
a: __m128d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m128d {
let a = a.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantpd128(a, $imm2 << 2 | $imm4, zero, 0b00000011)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm_mask_getmant_pd(
src: __m128d,
k: __mmask8,
a: __m128d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m128d {
let a = a.as_f64x2();
let src = src.as_f64x2();
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantpd128(a, $imm2 << 2 | $imm4, src, k)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm_maskz_getmant_pd(
k: __mmask8,
a: __m128d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m128d {
let a = a.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr, $imm2:expr) => {
vgetmantpd128(a, $imm2 << 2 | $imm4, zero, k)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddps, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_add_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vaddps(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddps, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_add_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vaddps(a, b, $imm4)
};
}
let addround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, addround, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddps, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_add_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vaddps(a, b, $imm4)
};
}
let addround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, addround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddpd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_add_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vaddpd(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddpd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_add_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vaddpd(a, b, $imm4)
};
}
let addround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, addround, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddpd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_add_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vaddpd(a, b, $imm4)
};
}
let addround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, addround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubps, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_sub_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vsubps(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubps, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_sub_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vsubps(a, b, $imm4)
};
}
let subround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, subround, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubps, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_sub_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vsubps(a, b, $imm4)
};
}
let subround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, subround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubpd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_sub_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vsubpd(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubpd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_sub_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vsubpd(a, b, $imm4)
};
}
let subround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, subround, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubpd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_sub_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vsubpd(a, b, $imm4)
};
}
let subround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, subround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulps, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_mul_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vmulps(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulps, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_mul_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vmulps(a, b, $imm4)
};
}
let mulround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, mulround, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulps, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_mul_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vmulps(a, b, $imm4)
};
}
let mulround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, mulround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulpd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_mul_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vmulpd(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulpd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_mul_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vmulpd(a, b, $imm4)
};
}
let mulround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, mulround, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulpd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_mul_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vmulpd(a, b, $imm4)
};
}
let mulround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, mulround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivps, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_div_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vdivps(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivps, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_div_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vdivps(a, b, $imm4)
};
}
let divround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, divround, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivps, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_div_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vdivps(a, b, $imm4)
};
}
let divround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, divround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivpd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_div_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vdivpd(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivpd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_div_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vdivpd(a, b, $imm4)
};
}
let divround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, divround, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivpd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_div_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vdivpd(a, b, $imm4)
};
}
let divround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, divround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtps, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_sqrt_round_ps(a: __m512, rounding: i32) -> __m512 {
let a = a.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vsqrtps(a, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtps, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_sqrt_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vsqrtps(a, $imm4)
};
}
let sqrtround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, sqrtround, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtps, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_sqrt_round_ps(k: __mmask16, a: __m512, rounding: i32) -> __m512 {
let a = a.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vsqrtps(a, $imm4)
};
}
let sqrtround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, sqrtround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtpd, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_sqrt_round_pd(a: __m512d, rounding: i32) -> __m512d {
let a = a.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vsqrtpd(a, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtpd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_sqrt_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vsqrtpd(a, $imm4)
};
}
let sqrtround = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, sqrtround, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtpd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_sqrt_round_pd(k: __mmask8, a: __m512d, rounding: i32) -> __m512d {
let a = a.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vsqrtpd(a, $imm4)
};
}
let sqrtround = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, sqrtround, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fmadd_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmadd132psround(a, b, c, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fmadd_round_ps(
a: __m512,
k: __mmask16,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmadd132psround(a, b, c, $imm4)
};
}
let fmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmadd, a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fmadd_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmadd132psround(a, b, c, $imm4)
};
}
let fmadd = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, fmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fmadd_round_ps(
a: __m512,
b: __m512,
c: __m512,
k: __mmask16,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmadd132psround(a, b, c, $imm4)
};
}
let fmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmadd, c))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fmadd_round_pd(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmadd132pdround(a, b, c, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fmadd_round_pd(
a: __m512d,
k: __mmask8,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmadd132pdround(a, b, c, $imm4)
};
}
let fmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmadd, a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fmadd_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmadd132pdround(a, b, c, $imm4)
};
}
let fmadd = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, fmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fmadd_round_pd(
a: __m512d,
b: __m512d,
c: __m512d,
k: __mmask8,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmadd132pdround(a, b, c, $imm4)
};
}
let fmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmadd, c))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fmsub_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, c.as_f32x16());
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmadd132psround(a, b, sub, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fmsub_round_ps(
a: __m512,
k: __mmask16,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, c.as_f32x16());
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmadd132psround(a, b, sub, $imm4)
};
}
let fmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsub, a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fmsub_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, c.as_f32x16());
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmadd132psround(a, b, sub, $imm4)
};
}
let fmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fmsub_round_ps(
a: __m512,
b: __m512,
c: __m512,
k: __mmask16,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let c = c.as_f32x16();
let sub = simd_sub(zero, c);
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmadd132psround(a, b, sub, $imm4)
};
}
let fmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsub, c))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fmsub_round_pd(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, c.as_f64x8());
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmadd132pdround(a, b, sub, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fmsub_round_pd(
a: __m512d,
k: __mmask8,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, c.as_f64x8());
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmadd132pdround(a, b, sub, $imm4)
};
}
let fmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsub, a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fmsub_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, c.as_f64x8());
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmadd132pdround(a, b, sub, $imm4)
};
}
let fmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fmsub_round_pd(
a: __m512d,
b: __m512d,
c: __m512d,
k: __mmask8,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let c = c.as_f64x8();
let sub = simd_sub(zero, c);
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmadd132pdround(a, b, sub, $imm4)
};
}
let fmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsub, c))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fmaddsub_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmaddsub213ps(a, b, c, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fmaddsub_round_ps(
a: __m512,
k: __mmask16,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmaddsub213ps(a, b, c, $imm4)
};
}
let fmaddsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmaddsub, a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fmaddsub_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmaddsub213ps(a, b, c, $imm4)
};
}
let fmaddsub = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, fmaddsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fmaddsub_round_ps(
a: __m512,
b: __m512,
c: __m512,
k: __mmask16,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmaddsub213ps(a, b, c, $imm4)
};
}
let fmaddsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmaddsub, c))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fmaddsub_round_pd(
a: __m512d,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmaddsub213pd(a, b, c, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fmaddsub_round_pd(
a: __m512d,
k: __mmask8,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmaddsub213pd(a, b, c, $imm4)
};
}
let fmaddsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmaddsub, a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fmaddsub_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmaddsub213pd(a, b, c, $imm4)
};
}
let fmaddsub = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, fmaddsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fmaddsub_round_pd(
a: __m512d,
b: __m512d,
c: __m512d,
k: __mmask8,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmaddsub213pd(a, b, c, $imm4)
};
}
let fmaddsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmaddsub, c))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fmsubadd_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, c.as_f32x16());
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmaddsub213ps(a, b, sub, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fmsubadd_round_ps(
a: __m512,
k: __mmask16,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, c.as_f32x16());
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmaddsub213ps(a, b, sub, $imm4)
};
}
let fmsubadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsubadd, a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fmsubadd_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, c.as_f32x16());
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmaddsub213ps(a, b, sub, $imm4)
};
}
let fmsubadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsubadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fmsubadd_round_ps(
a: __m512,
b: __m512,
c: __m512,
k: __mmask16,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let c = c.as_f32x16();
let sub = simd_sub(zero, c);
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmaddsub213ps(a, b, sub, $imm4)
};
}
let fmsubadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsubadd, c))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fmsubadd_round_pd(
a: __m512d,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, c.as_f64x8());
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmaddsub213pd(a, b, sub, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fmsubadd_round_pd(
a: __m512d,
k: __mmask8,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, c.as_f64x8());
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmaddsub213pd(a, b, sub, $imm4)
};
}
let fmsubadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsubadd, a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fmsubadd_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, c.as_f64x8());
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmaddsub213pd(a, b, sub, $imm4)
};
}
let fmsubadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsubadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmaddsub, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fmsubadd_round_pd(
a: __m512d,
b: __m512d,
c: __m512d,
k: __mmask8,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let c = c.as_f64x8();
let sub = simd_sub(zero, c);
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmaddsub213pd(a, b, sub, $imm4)
};
}
let fmsubadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fmsubadd, c))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fnmadd_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, a.as_f32x16());
let b = b.as_f32x16();
let c = c.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmadd132psround(sub, b, c, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fnmadd_round_ps(
a: __m512,
k: __mmask16,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, a.as_f32x16());
let b = b.as_f32x16();
let c = c.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmadd132psround(sub, b, c, $imm4)
};
}
let fnmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmadd, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fnmadd_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, a.as_f32x16());
let b = b.as_f32x16();
let c = c.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmadd132psround(sub, b, c, $imm4)
};
}
let fnmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fnmadd_round_ps(
a: __m512,
b: __m512,
c: __m512,
k: __mmask16,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let sub = simd_sub(zero, a.as_f32x16());
let b = b.as_f32x16();
let c = c.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmadd132psround(sub, b, c, $imm4)
};
}
let fnmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmadd, c))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fnmadd_round_pd(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, a.as_f64x8());
let b = b.as_f64x8();
let c = c.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmadd132pdround(sub, b, c, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fnmadd_round_pd(
a: __m512d,
k: __mmask8,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let a = a.as_f64x8();
let sub = simd_sub(zero, a);
let b = b.as_f64x8();
let c = c.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmadd132pdround(sub, b, c, $imm4)
};
}
let fnmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmadd, a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fnmadd_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, a.as_f64x8());
let b = b.as_f64x8();
let c = c.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmadd132pdround(sub, b, c, $imm4)
};
}
let fnmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmadd, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fnmadd_round_pd(
a: __m512d,
b: __m512d,
c: __m512d,
k: __mmask8,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let sub = simd_sub(zero, a.as_f64x8());
let b = b.as_f64x8();
let c = c.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmadd132pdround(sub, b, c, $imm4)
};
}
let fnmadd = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmadd, c))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fnmsub_round_ps(a: __m512, b: __m512, c: __m512, rounding: i32) -> __m512 {
let zero: f32x16 = mem::zeroed();
let suba = simd_sub(zero, a.as_f32x16());
let subc = simd_sub(zero, c.as_f32x16());
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmadd132psround(suba, b, subc, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fnmsub_round_ps(
a: __m512,
k: __mmask16,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let a = a.as_f32x16();
let suba = simd_sub(zero, a);
let subc = simd_sub(zero, c.as_f32x16());
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmadd132psround(suba, b, subc, $imm4)
};
}
let fnmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmsub, a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fnmsub_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
c: __m512,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let suba = simd_sub(zero, a.as_f32x16());
let subc = simd_sub(zero, c.as_f32x16());
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmadd132psround(suba, b, subc, $imm4)
};
}
let fnmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fnmsub_round_ps(
a: __m512,
b: __m512,
c: __m512,
k: __mmask16,
rounding: i32,
) -> __m512 {
let zero: f32x16 = mem::zeroed();
let suba = simd_sub(zero, a.as_f32x16());
let c = c.as_f32x16();
let subc = simd_sub(zero, c);
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vfmadd132psround(suba, b, subc, $imm4)
};
}
let fnmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmsub, c))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_fnmsub_round_pd(a: __m512d, b: __m512d, c: __m512d, rounding: i32) -> __m512d {
let zero: f64x8 = mem::zeroed();
let suba = simd_sub(zero, a.as_f64x8());
let subc = simd_sub(zero, c.as_f64x8());
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmadd132pdround(suba, b, subc, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_fnmsub_round_pd(
a: __m512d,
k: __mmask8,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let a = a.as_f64x8();
let suba = simd_sub(zero, a);
let subc = simd_sub(zero, c.as_f64x8());
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmadd132pdround(suba, b, subc, $imm4)
};
}
let fnmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmsub, a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_maskz_fnmsub_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
c: __m512d,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let suba = simd_sub(zero, a.as_f64x8());
let subc = simd_sub(zero, c.as_f64x8());
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmadd132pdround(suba, b, subc, $imm4)
};
}
let fnmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmsub, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask3_fnmsub_round_pd(
a: __m512d,
b: __m512d,
c: __m512d,
k: __mmask8,
rounding: i32,
) -> __m512d {
let zero: f64x8 = mem::zeroed();
let suba = simd_sub(zero, a.as_f64x8());
let c = c.as_f64x8();
let subc = simd_sub(zero, c);
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vfmadd132pdround(suba, b, subc, $imm4)
};
}
let fnmsub = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, fnmsub, c))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxps, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_max_round_ps(a: __m512, b: __m512, sae: i32) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vmaxps(a, b, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxps, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_max_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512,
sae: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vmaxps(a, b, $imm4)
};
}
let max = constify_imm4_sae!(sae, call);
transmute(simd_select_bitmask(k, max, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxps, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_max_round_ps(k: __mmask16, a: __m512, b: __m512, sae: i32) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vmaxps(a, b, $imm4)
};
}
let max = constify_imm4_sae!(sae, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxpd, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_max_round_pd(a: __m512d, b: __m512d, sae: i32) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vmaxpd(a, b, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxpd, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_max_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
sae: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vmaxpd(a, b, $imm4)
};
}
let max = constify_imm4_sae!(sae, call);
transmute(simd_select_bitmask(k, max, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxpd, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_max_round_pd(k: __mmask8, a: __m512d, b: __m512d, sae: i32) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vmaxpd(a, b, $imm4)
};
}
let max = constify_imm4_sae!(sae, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminps, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_min_round_ps(a: __m512, b: __m512, sae: i32) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vminps(a, b, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminps, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_min_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512,
sae: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vminps(a, b, $imm4)
};
}
let max = constify_imm4_sae!(sae, call);
transmute(simd_select_bitmask(k, max, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminps, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_min_round_ps(k: __mmask16, a: __m512, b: __m512, sae: i32) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vminps(a, b, $imm4)
};
}
let max = constify_imm4_sae!(sae, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminpd, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_min_round_pd(a: __m512d, b: __m512d, sae: i32) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vminpd(a, b, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminpd, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_min_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
sae: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vminpd(a, b, $imm4)
};
}
let max = constify_imm4_sae!(sae, call);
transmute(simd_select_bitmask(k, max, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminpd, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_min_round_pd(k: __mmask8, a: __m512d, b: __m512d, sae: i32) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vminpd(a, b, $imm4)
};
}
let max = constify_imm4_sae!(sae, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, max, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpps, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_getexp_round_ps(a: __m512, sae: i32) -> __m512 {
let a = a.as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
macro_rules! call {
($imm4:expr) => {
vgetexpps(a, zero, 0b11111111_11111111, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpps, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_getexp_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
sae: i32,
) -> __m512 {
let a = a.as_f32x16();
let src = src.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vgetexpps(a, src, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpps, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_getexp_round_ps(k: __mmask16, a: __m512, sae: i32) -> __m512 {
let a = a.as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
macro_rules! call {
($imm4:expr) => {
vgetexpps(a, zero, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexppd, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_getexp_round_pd(a: __m512d, sae: i32) -> __m512d {
let a = a.as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
macro_rules! call {
($imm4:expr) => {
vgetexppd(a, zero, 0b11111111, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexppd, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_getexp_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
sae: i32,
) -> __m512d {
let a = a.as_f64x8();
let src = src.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vgetexppd(a, src, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexppd, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_getexp_round_pd(k: __mmask8, a: __m512d, sae: i32) -> __m512d {
let a = a.as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
macro_rules! call {
($imm4:expr) => {
vgetexppd(a, zero, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0, sae = 8))]
#[rustc_args_required_const(1, 2)]
pub unsafe fn _mm512_roundscale_round_ps(a: __m512, imm8: i32, sae: i32) -> __m512 {
let a = a.as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscaleps(a, $imm8, zero, 0b11111111_11111111, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0, sae = 8))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm512_mask_roundscale_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
imm8: i32,
sae: i32,
) -> __m512 {
let a = a.as_f32x16();
let src = src.as_f32x16();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscaleps(a, $imm8, src, k, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaleps, imm8 = 0, sae = 8))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm512_maskz_roundscale_round_ps(
k: __mmask16,
a: __m512,
imm8: i32,
sae: i32,
) -> __m512 {
let a = a.as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscaleps(a, $imm8, zero, k, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(1, 2)]
pub unsafe fn _mm512_roundscale_round_pd(a: __m512d, imm8: i32, sae: i32) -> __m512d {
let a = a.as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscalepd(a, $imm8, zero, 0b11111111, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm512_mask_roundscale_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
imm8: i32,
sae: i32,
) -> __m512d {
let a = a.as_f64x8();
let src = src.as_f64x8();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscalepd(a, $imm8, src, k, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalepd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm512_maskz_roundscale_round_pd(
k: __mmask8,
a: __m512d,
imm8: i32,
sae: i32,
) -> __m512d {
let a = a.as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscalepd(a, $imm8, zero, k, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefps, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_scalef_round_ps(a: __m512, b: __m512, rounding: i32) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
macro_rules! call {
($imm4:expr) => {
vscalefps(a, b, zero, 0b11111111_11111111, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefps, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_scalef_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let src = src.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vscalefps(a, b, src, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefps, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_scalef_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
rounding: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
macro_rules! call {
($imm4:expr) => {
vscalefps(a, b, zero, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefpd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_scalef_round_pd(a: __m512d, b: __m512d, rounding: i32) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
macro_rules! call {
($imm4:expr) => {
vscalefpd(a, b, zero, 0b11111111, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefpd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_scalef_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let src = src.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vscalefpd(a, b, src, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefpd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_scalef_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
rounding: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
macro_rules! call {
($imm4:expr) => {
vscalefpd(a, b, zero, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0, sae = 8))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm512_fixupimm_round_ps(
a: __m512,
b: __m512,
c: __m512i,
imm8: i32,
sae: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_i32x16();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmps(a, b, c, $imm8, 0b11111111_11111111, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm512_mask_fixupimm_round_ps(
a: __m512,
k: __mmask16,
b: __m512,
c: __m512i,
imm8: i32,
sae: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_i32x16();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmps(a, b, c, $imm8, k, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmps, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm512_maskz_fixupimm_round_ps(
k: __mmask16,
a: __m512,
b: __m512,
c: __m512i,
imm8: i32,
sae: i32,
) -> __m512 {
let a = a.as_f32x16();
let b = b.as_f32x16();
let c = c.as_i32x16();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmpsz(a, b, c, $imm8, k, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm512_fixupimm_round_pd(
a: __m512d,
b: __m512d,
c: __m512i,
imm8: i32,
sae: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_i64x8();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmpd(a, b, c, $imm8, 0b11111111, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm512_mask_fixupimm_round_pd(
a: __m512d,
k: __mmask8,
b: __m512d,
c: __m512i,
imm8: i32,
sae: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_i64x8();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmpd(a, b, c, $imm8, k, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmpd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm512_maskz_fixupimm_round_pd(
k: __mmask8,
a: __m512d,
b: __m512d,
c: __m512i,
imm8: i32,
sae: i32,
) -> __m512d {
let a = a.as_f64x8();
let b = b.as_f64x8();
let c = c.as_i64x8();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmpdz(a, b, c, $imm8, k, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(1, 2, 3)]
pub unsafe fn _mm512_getmant_round_ps(
a: __m512,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m512 {
let a = a.as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantps(a, $imm2 << 2 | $imm4_1, zero, 0b11111111_11111111, $imm4_2)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(3, 4, 5)]
pub unsafe fn _mm512_mask_getmant_round_ps(
src: __m512,
k: __mmask16,
a: __m512,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m512 {
let a = a.as_f32x16();
let src = src.as_f32x16();
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantps(a, $imm2 << 2 | $imm4_1, src, k, $imm4_2)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantps, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(2, 3, 4)]
pub unsafe fn _mm512_maskz_getmant_round_ps(
k: __mmask16,
a: __m512,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m512 {
let a = a.as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantps(a, $imm2 << 2 | $imm4_1, zero, k, $imm4_2)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(1, 2, 3)]
pub unsafe fn _mm512_getmant_round_pd(
a: __m512d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m512d {
let a = a.as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantpd(a, $imm2 << 2 | $imm4_1, zero, 0b11111111, $imm4_2)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(3, 4, 5)]
pub unsafe fn _mm512_mask_getmant_round_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m512d {
let a = a.as_f64x8();
let src = src.as_f64x8();
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantpd(a, $imm2 << 2 | $imm4_1, src, k, $imm4_2)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantpd, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(2, 3, 4)]
pub unsafe fn _mm512_maskz_getmant_round_pd(
k: __mmask8,
a: __m512d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m512d {
let a = a.as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantpd(a, $imm2 << 2 | $imm4_1, zero, k, $imm4_2)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2dq))]
pub unsafe fn _mm512_cvtps_epi32(a: __m512) -> __m512i {
transmute(vcvtps2dq(
a.as_f32x16(),
_mm512_setzero_si512().as_i32x16(),
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2dq))]
pub unsafe fn _mm512_mask_cvtps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
transmute(vcvtps2dq(
a.as_f32x16(),
src.as_i32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2dq))]
pub unsafe fn _mm512_maskz_cvtps_epi32(k: __mmask16, a: __m512) -> __m512i {
transmute(vcvtps2dq(
a.as_f32x16(),
_mm512_setzero_si512().as_i32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtps2dq))]
pub unsafe fn _mm256_mask_cvtps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
let convert = _mm256_cvtps_epi32(a);
transmute(simd_select_bitmask(k, convert.as_i32x8(), src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtps2dq))]
pub unsafe fn _mm256_maskz_cvtps_epi32(k: __mmask8, a: __m256) -> __m256i {
let convert = _mm256_cvtps_epi32(a);
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, convert.as_i32x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtps2dq))]
pub unsafe fn _mm_mask_cvtps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
let convert = _mm_cvtps_epi32(a);
transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtps2dq))]
pub unsafe fn _mm_maskz_cvtps_epi32(k: __mmask8, a: __m128) -> __m128i {
let convert = _mm_cvtps_epi32(a);
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, convert.as_i32x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2udq))]
pub unsafe fn _mm512_cvtps_epu32(a: __m512) -> __m512i {
transmute(vcvtps2udq(
a.as_f32x16(),
_mm512_setzero_si512().as_u32x16(),
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2udq))]
pub unsafe fn _mm512_mask_cvtps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
transmute(vcvtps2udq(
a.as_f32x16(),
src.as_u32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2udq))]
pub unsafe fn _mm512_maskz_cvtps_epu32(k: __mmask16, a: __m512) -> __m512i {
transmute(vcvtps2udq(
a.as_f32x16(),
_mm512_setzero_si512().as_u32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtps2udq))]
pub unsafe fn _mm256_cvtps_epu32(a: __m256) -> __m256i {
transmute(vcvtps2udq256(
a.as_f32x8(),
_mm256_setzero_si256().as_u32x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtps2udq))]
pub unsafe fn _mm256_mask_cvtps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
transmute(vcvtps2udq256(a.as_f32x8(), src.as_u32x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtps2udq))]
pub unsafe fn _mm256_maskz_cvtps_epu32(k: __mmask8, a: __m256) -> __m256i {
transmute(vcvtps2udq256(
a.as_f32x8(),
_mm256_setzero_si256().as_u32x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtps2udq))]
pub unsafe fn _mm_cvtps_epu32(a: __m128) -> __m128i {
transmute(vcvtps2udq128(
a.as_f32x4(),
_mm_setzero_si128().as_u32x4(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtps2udq))]
pub unsafe fn _mm_mask_cvtps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
transmute(vcvtps2udq128(a.as_f32x4(), src.as_u32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtps2udq))]
pub unsafe fn _mm_maskz_cvtps_epu32(k: __mmask8, a: __m128) -> __m128i {
transmute(vcvtps2udq128(
a.as_f32x4(),
_mm_setzero_si128().as_u32x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2pd))]
pub unsafe fn _mm512_cvtps_pd(a: __m256) -> __m512d {
transmute(vcvtps2pd(
a.as_f32x8(),
_mm512_setzero_pd().as_f64x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2pd))]
pub unsafe fn _mm512_mask_cvtps_pd(src: __m512d, k: __mmask8, a: __m256) -> __m512d {
transmute(vcvtps2pd(
a.as_f32x8(),
src.as_f64x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2pd))]
pub unsafe fn _mm512_maskz_cvtps_pd(k: __mmask8, a: __m256) -> __m512d {
transmute(vcvtps2pd(
a.as_f32x8(),
_mm512_setzero_pd().as_f64x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2pd))]
pub unsafe fn _mm512_cvtpslo_pd(v2: __m512) -> __m512d {
transmute(vcvtps2pd(
_mm512_castps512_ps256(v2).as_f32x8(),
_mm512_setzero_pd().as_f64x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2pd))]
pub unsafe fn _mm512_mask_cvtpslo_pd(src: __m512d, k: __mmask8, v2: __m512) -> __m512d {
transmute(vcvtps2pd(
_mm512_castps512_ps256(v2).as_f32x8(),
src.as_f64x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2ps))]
pub unsafe fn _mm512_cvtpd_ps(a: __m512d) -> __m256 {
transmute(vcvtpd2ps(
a.as_f64x8(),
_mm256_setzero_ps().as_f32x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2ps))]
pub unsafe fn _mm512_mask_cvtpd_ps(src: __m256, k: __mmask8, a: __m512d) -> __m256 {
transmute(vcvtpd2ps(
a.as_f64x8(),
src.as_f32x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2ps))]
pub unsafe fn _mm512_maskz_cvtpd_ps(k: __mmask8, a: __m512d) -> __m256 {
transmute(vcvtpd2ps(
a.as_f64x8(),
_mm256_setzero_ps().as_f32x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtpd2ps))]
pub unsafe fn _mm256_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m256d) -> __m128 {
let convert = _mm256_cvtpd_ps(a);
transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtpd2ps))]
pub unsafe fn _mm256_maskz_cvtpd_ps(k: __mmask8, a: __m256d) -> __m128 {
let convert = _mm256_cvtpd_ps(a);
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, convert.as_f32x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtpd2ps))]
pub unsafe fn _mm_mask_cvtpd_ps(src: __m128, k: __mmask8, a: __m128d) -> __m128 {
let convert = _mm_cvtpd_ps(a);
transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtpd2ps))]
pub unsafe fn _mm_maskz_cvtpd_ps(k: __mmask8, a: __m128d) -> __m128 {
let convert = _mm_cvtpd_ps(a);
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, convert.as_f32x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2dq))]
pub unsafe fn _mm512_cvtpd_epi32(a: __m512d) -> __m256i {
transmute(vcvtpd2dq(
a.as_f64x8(),
_mm256_setzero_si256().as_i32x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2dq))]
pub unsafe fn _mm512_mask_cvtpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
transmute(vcvtpd2dq(
a.as_f64x8(),
src.as_i32x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2dq))]
pub unsafe fn _mm512_maskz_cvtpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
transmute(vcvtpd2dq(
a.as_f64x8(),
_mm256_setzero_si256().as_i32x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtpd2dq))]
pub unsafe fn _mm256_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
let convert = _mm256_cvtpd_epi32(a);
transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtpd2dq))]
pub unsafe fn _mm256_maskz_cvtpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
let convert = _mm256_cvtpd_epi32(a);
transmute(simd_select_bitmask(
k,
convert.as_i32x4(),
_mm_setzero_si128().as_i32x4(),
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtpd2dq))]
pub unsafe fn _mm_mask_cvtpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
let convert = _mm_cvtpd_epi32(a);
transmute(simd_select_bitmask(k, convert.as_i32x4(), src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtpd2dq))]
pub unsafe fn _mm_maskz_cvtpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
let convert = _mm_cvtpd_epi32(a);
transmute(simd_select_bitmask(
k,
convert.as_i32x4(),
_mm_setzero_si128().as_i32x4(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2udq))]
pub unsafe fn _mm512_cvtpd_epu32(a: __m512d) -> __m256i {
transmute(vcvtpd2udq(
a.as_f64x8(),
_mm256_setzero_si256().as_u32x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2udq))]
pub unsafe fn _mm512_mask_cvtpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
transmute(vcvtpd2udq(
a.as_f64x8(),
src.as_u32x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2udq))]
pub unsafe fn _mm512_maskz_cvtpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
transmute(vcvtpd2udq(
a.as_f64x8(),
_mm256_setzero_si256().as_u32x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtpd2udq))]
pub unsafe fn _mm256_cvtpd_epu32(a: __m256d) -> __m128i {
transmute(vcvtpd2udq256(
a.as_f64x4(),
_mm_setzero_si128().as_u32x4(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtpd2udq))]
pub unsafe fn _mm256_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
transmute(vcvtpd2udq256(a.as_f64x4(), src.as_u32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtpd2udq))]
pub unsafe fn _mm256_maskz_cvtpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
transmute(vcvtpd2udq256(
a.as_f64x4(),
_mm_setzero_si128().as_u32x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtpd2udq))]
pub unsafe fn _mm_cvtpd_epu32(a: __m128d) -> __m128i {
transmute(vcvtpd2udq128(
a.as_f64x2(),
_mm_setzero_si128().as_u32x4(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtpd2udq))]
pub unsafe fn _mm_mask_cvtpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
transmute(vcvtpd2udq128(a.as_f64x2(), src.as_u32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtpd2udq))]
pub unsafe fn _mm_maskz_cvtpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
transmute(vcvtpd2udq128(
a.as_f64x2(),
_mm_setzero_si128().as_u32x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2ps))]
pub unsafe fn _mm512_cvtpd_pslo(v2: __m512d) -> __m512 {
let r: f32x8 = vcvtpd2ps(
v2.as_f64x8(),
_mm256_setzero_ps().as_f32x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
);
simd_shuffle16(
r,
_mm256_setzero_ps().as_f32x8(),
[0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2ps))]
pub unsafe fn _mm512_mask_cvtpd_pslo(src: __m512, k: __mmask8, v2: __m512d) -> __m512 {
let r: f32x8 = vcvtpd2ps(
v2.as_f64x8(),
_mm512_castps512_ps256(src).as_f32x8(),
k,
_MM_FROUND_CUR_DIRECTION,
);
simd_shuffle16(
r,
_mm256_setzero_ps().as_f32x8(),
[0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxbd))]
pub unsafe fn _mm512_cvtepi8_epi32(a: __m128i) -> __m512i {
let a = a.as_i8x16();
transmute::<i32x16, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxbd))]
pub unsafe fn _mm512_mask_cvtepi8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxbd))]
pub unsafe fn _mm512_maskz_cvtepi8_epi32(k: __mmask16, a: __m128i) -> __m512i {
let convert = _mm512_cvtepi8_epi32(a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxbd))]
pub unsafe fn _mm256_mask_cvtepi8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxbd))]
pub unsafe fn _mm256_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepi8_epi32(a).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxbd))]
pub unsafe fn _mm_mask_cvtepi8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepi8_epi32(a).as_i32x4();
transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxbd))]
pub unsafe fn _mm_maskz_cvtepi8_epi32(k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepi8_epi32(a).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxbq))]
pub unsafe fn _mm512_cvtepi8_epi64(a: __m128i) -> __m512i {
let a = a.as_i8x16();
let v64: i8x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
transmute::<i64x8, _>(simd_cast(v64))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxbq))]
pub unsafe fn _mm512_mask_cvtepi8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxbq))]
pub unsafe fn _mm512_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m512i {
let convert = _mm512_cvtepi8_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxbq))]
pub unsafe fn _mm256_mask_cvtepi8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxbq))]
pub unsafe fn _mm256_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepi8_epi64(a).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxbq))]
pub unsafe fn _mm_mask_cvtepi8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepi8_epi64(a).as_i64x2();
transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxbq))]
pub unsafe fn _mm_maskz_cvtepi8_epi64(k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepi8_epi64(a).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxbd))]
pub unsafe fn _mm512_cvtepu8_epi32(a: __m128i) -> __m512i {
let a = a.as_u8x16();
transmute::<i32x16, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxbd))]
pub unsafe fn _mm512_mask_cvtepu8_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxbd))]
pub unsafe fn _mm512_maskz_cvtepu8_epi32(k: __mmask16, a: __m128i) -> __m512i {
let convert = _mm512_cvtepu8_epi32(a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxbd))]
pub unsafe fn _mm256_mask_cvtepu8_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxbd))]
pub unsafe fn _mm256_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepu8_epi32(a).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxbd))]
pub unsafe fn _mm_mask_cvtepu8_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepu8_epi32(a).as_i32x4();
transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxbd))]
pub unsafe fn _mm_maskz_cvtepu8_epi32(k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepu8_epi32(a).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxbq))]
pub unsafe fn _mm512_cvtepu8_epi64(a: __m128i) -> __m512i {
let a = a.as_u8x16();
let v64: u8x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7]);
transmute::<i64x8, _>(simd_cast(v64))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxbq))]
pub unsafe fn _mm512_mask_cvtepu8_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxbq))]
pub unsafe fn _mm512_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m512i {
let convert = _mm512_cvtepu8_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxbq))]
pub unsafe fn _mm256_mask_cvtepu8_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxbq))]
pub unsafe fn _mm256_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepu8_epi64(a).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxbq))]
pub unsafe fn _mm_mask_cvtepu8_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepu8_epi64(a).as_i64x2();
transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxbq))]
pub unsafe fn _mm_maskz_cvtepu8_epi64(k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepu8_epi64(a).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxwd))]
pub unsafe fn _mm512_cvtepi16_epi32(a: __m256i) -> __m512i {
let a = a.as_i16x16();
transmute::<i32x16, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxwd))]
pub unsafe fn _mm512_mask_cvtepi16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxwd))]
pub unsafe fn _mm512_maskz_cvtepi16_epi32(k: __mmask16, a: __m256i) -> __m512i {
let convert = _mm512_cvtepi16_epi32(a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxwd))]
pub unsafe fn _mm256_mask_cvtepi16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxwd))]
pub unsafe fn _mm256_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepi16_epi32(a).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxwd))]
pub unsafe fn _mm_mask_cvtepi16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepi16_epi32(a).as_i32x4();
transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxwd))]
pub unsafe fn _mm_maskz_cvtepi16_epi32(k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepi16_epi32(a).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxwq))]
pub unsafe fn _mm512_cvtepi16_epi64(a: __m128i) -> __m512i {
let a = a.as_i16x8();
transmute::<i64x8, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxwq))]
pub unsafe fn _mm512_mask_cvtepi16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxwq))]
pub unsafe fn _mm512_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m512i {
let convert = _mm512_cvtepi16_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxwq))]
pub unsafe fn _mm256_mask_cvtepi16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxwq))]
pub unsafe fn _mm256_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepi16_epi64(a).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxwq))]
pub unsafe fn _mm_mask_cvtepi16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepi16_epi64(a).as_i64x2();
transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxwq))]
pub unsafe fn _mm_maskz_cvtepi16_epi64(k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepi16_epi64(a).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxwd))]
pub unsafe fn _mm512_cvtepu16_epi32(a: __m256i) -> __m512i {
let a = a.as_u16x16();
transmute::<i32x16, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxwd))]
pub unsafe fn _mm512_mask_cvtepu16_epi32(src: __m512i, k: __mmask16, a: __m256i) -> __m512i {
let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, convert, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxwd))]
pub unsafe fn _mm512_maskz_cvtepu16_epi32(k: __mmask16, a: __m256i) -> __m512i {
let convert = _mm512_cvtepu16_epi32(a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxwd))]
pub unsafe fn _mm256_mask_cvtepu16_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxwd))]
pub unsafe fn _mm256_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepu16_epi32(a).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxwd))]
pub unsafe fn _mm_mask_cvtepu16_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepu16_epi32(a).as_i32x4();
transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxwd))]
pub unsafe fn _mm_maskz_cvtepu16_epi32(k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepu16_epi32(a).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxwq))]
pub unsafe fn _mm512_cvtepu16_epi64(a: __m128i) -> __m512i {
let a = a.as_u16x8();
transmute::<i64x8, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxwq))]
pub unsafe fn _mm512_mask_cvtepu16_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxwq))]
pub unsafe fn _mm512_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m512i {
let convert = _mm512_cvtepu16_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxwq))]
pub unsafe fn _mm256_mask_cvtepu16_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxwq))]
pub unsafe fn _mm256_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepu16_epi64(a).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxwq))]
pub unsafe fn _mm_mask_cvtepu16_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepu16_epi64(a).as_i64x2();
transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxwq))]
pub unsafe fn _mm_maskz_cvtepu16_epi64(k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepu16_epi64(a).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxdq))]
pub unsafe fn _mm512_cvtepi32_epi64(a: __m256i) -> __m512i {
let a = a.as_i32x8();
transmute::<i64x8, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxdq))]
pub unsafe fn _mm512_mask_cvtepi32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsxdq))]
pub unsafe fn _mm512_maskz_cvtepi32_epi64(k: __mmask8, a: __m256i) -> __m512i {
let convert = _mm512_cvtepi32_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxdq))]
pub unsafe fn _mm256_mask_cvtepi32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxdq))]
pub unsafe fn _mm256_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepi32_epi64(a).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxdq))]
pub unsafe fn _mm_mask_cvtepi32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepi32_epi64(a).as_i64x2();
transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsxdq))]
pub unsafe fn _mm_maskz_cvtepi32_epi64(k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepi32_epi64(a).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxdq))]
pub unsafe fn _mm512_cvtepu32_epi64(a: __m256i) -> __m512i {
let a = a.as_u32x8();
transmute::<i64x8, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxdq))]
pub unsafe fn _mm512_mask_cvtepu32_epi64(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, convert, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovzxdq))]
pub unsafe fn _mm512_maskz_cvtepu32_epi64(k: __mmask8, a: __m256i) -> __m512i {
let convert = _mm512_cvtepu32_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxdq))]
pub unsafe fn _mm256_mask_cvtepu32_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
transmute(simd_select_bitmask(k, convert, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxdq))]
pub unsafe fn _mm256_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m256i {
let convert = _mm256_cvtepu32_epi64(a).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxdq))]
pub unsafe fn _mm_mask_cvtepu32_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepu32_epi64(a).as_i64x2();
transmute(simd_select_bitmask(k, convert, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovzxdq))]
pub unsafe fn _mm_maskz_cvtepu32_epi64(k: __mmask8, a: __m128i) -> __m128i {
let convert = _mm_cvtepu32_epi64(a).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2ps))]
pub unsafe fn _mm512_cvtepi32_ps(a: __m512i) -> __m512 {
let a = a.as_i32x16();
transmute::<f32x16, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2ps))]
pub unsafe fn _mm512_mask_cvtepi32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
let convert = _mm512_cvtepi32_ps(a).as_f32x16();
transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2ps))]
pub unsafe fn _mm512_maskz_cvtepi32_ps(k: __mmask16, a: __m512i) -> __m512 {
let convert = _mm512_cvtepi32_ps(a).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtdq2ps))]
pub unsafe fn _mm256_mask_cvtepi32_ps(src: __m256, k: __mmask8, a: __m256i) -> __m256 {
let convert = _mm256_cvtepi32_ps(a).as_f32x8();
transmute(simd_select_bitmask(k, convert, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtdq2ps))]
pub unsafe fn _mm256_maskz_cvtepi32_ps(k: __mmask8, a: __m256i) -> __m256 {
let convert = _mm256_cvtepi32_ps(a).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtdq2ps))]
pub unsafe fn _mm_mask_cvtepi32_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
let convert = _mm_cvtepi32_ps(a).as_f32x4();
transmute(simd_select_bitmask(k, convert, src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtdq2ps))]
pub unsafe fn _mm_maskz_cvtepi32_ps(k: __mmask8, a: __m128i) -> __m128 {
let convert = _mm_cvtepi32_ps(a).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2pd))]
pub unsafe fn _mm512_cvtepi32_pd(a: __m256i) -> __m512d {
let a = a.as_i32x8();
transmute::<f64x8, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2pd))]
pub unsafe fn _mm512_mask_cvtepi32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
let convert = _mm512_cvtepi32_pd(a).as_f64x8();
transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2pd))]
pub unsafe fn _mm512_maskz_cvtepi32_pd(k: __mmask8, a: __m256i) -> __m512d {
let convert = _mm512_cvtepi32_pd(a).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtdq2pd))]
pub unsafe fn _mm256_mask_cvtepi32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
let convert = _mm256_cvtepi32_pd(a).as_f64x4();
transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtdq2pd))]
pub unsafe fn _mm256_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m256d {
let convert = _mm256_cvtepi32_pd(a).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtdq2pd))]
pub unsafe fn _mm_mask_cvtepi32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
let convert = _mm_cvtepi32_pd(a).as_f64x2();
transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtdq2pd))]
pub unsafe fn _mm_maskz_cvtepi32_pd(k: __mmask8, a: __m128i) -> __m128d {
let convert = _mm_cvtepi32_pd(a).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2ps))]
pub unsafe fn _mm512_cvtepu32_ps(a: __m512i) -> __m512 {
let a = a.as_u32x16();
transmute::<f32x16, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2ps))]
pub unsafe fn _mm512_mask_cvtepu32_ps(src: __m512, k: __mmask16, a: __m512i) -> __m512 {
let convert = _mm512_cvtepu32_ps(a).as_f32x16();
transmute(simd_select_bitmask(k, convert, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2ps))]
pub unsafe fn _mm512_maskz_cvtepu32_ps(k: __mmask16, a: __m512i) -> __m512 {
let convert = _mm512_cvtepu32_ps(a).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2pd))]
pub unsafe fn _mm512_cvtepu32_pd(a: __m256i) -> __m512d {
let a = a.as_u32x8();
transmute::<f64x8, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2pd))]
pub unsafe fn _mm512_mask_cvtepu32_pd(src: __m512d, k: __mmask8, a: __m256i) -> __m512d {
let convert = _mm512_cvtepu32_pd(a).as_f64x8();
transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2pd))]
pub unsafe fn _mm512_maskz_cvtepu32_pd(k: __mmask8, a: __m256i) -> __m512d {
let convert = _mm512_cvtepu32_pd(a).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtudq2pd))]
pub unsafe fn _mm256_cvtepu32_pd(a: __m128i) -> __m256d {
let a = a.as_u32x4();
transmute::<f64x4, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtudq2pd))]
pub unsafe fn _mm256_mask_cvtepu32_pd(src: __m256d, k: __mmask8, a: __m128i) -> __m256d {
let convert = _mm256_cvtepu32_pd(a).as_f64x4();
transmute(simd_select_bitmask(k, convert, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtudq2pd))]
pub unsafe fn _mm256_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m256d {
let convert = _mm256_cvtepu32_pd(a).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtudq2pd))]
pub unsafe fn _mm_cvtepu32_pd(a: __m128i) -> __m128d {
let a = a.as_u32x4();
let u64: u32x2 = simd_shuffle2(a, a, [0, 1]);
transmute::<f64x2, _>(simd_cast(u64))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtudq2pd))]
pub unsafe fn _mm_mask_cvtepu32_pd(src: __m128d, k: __mmask8, a: __m128i) -> __m128d {
let convert = _mm_cvtepu32_pd(a).as_f64x2();
transmute(simd_select_bitmask(k, convert, src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtudq2pd))]
pub unsafe fn _mm_maskz_cvtepu32_pd(k: __mmask8, a: __m128i) -> __m128d {
let convert = _mm_cvtepu32_pd(a).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2pd))]
pub unsafe fn _mm512_cvtepi32lo_pd(v2: __m512i) -> __m512d {
let v2 = v2.as_i32x16();
let v256: i32x8 = simd_shuffle8(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
transmute::<f64x8, _>(simd_cast(v256))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2pd))]
pub unsafe fn _mm512_mask_cvtepi32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
let convert = _mm512_cvtepi32lo_pd(v2).as_f64x8();
transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2pd))]
pub unsafe fn _mm512_cvtepu32lo_pd(v2: __m512i) -> __m512d {
let v2 = v2.as_u32x16();
let v256: u32x8 = simd_shuffle8(v2, v2, [0, 1, 2, 3, 4, 5, 6, 7]);
transmute::<f64x8, _>(simd_cast(v256))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2pd))]
pub unsafe fn _mm512_mask_cvtepu32lo_pd(src: __m512d, k: __mmask8, v2: __m512i) -> __m512d {
let convert = _mm512_cvtepu32lo_pd(v2).as_f64x8();
transmute(simd_select_bitmask(k, convert, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovdw))]
pub unsafe fn _mm512_cvtepi32_epi16(a: __m512i) -> __m256i {
let a = a.as_i32x16();
transmute::<i16x16, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovdw))]
pub unsafe fn _mm512_mask_cvtepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
transmute(simd_select_bitmask(k, convert, src.as_i16x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovdw))]
pub unsafe fn _mm512_maskz_cvtepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
let convert = _mm512_cvtepi32_epi16(a).as_i16x16();
let zero = _mm256_setzero_si256().as_i16x16();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovdw))]
pub unsafe fn _mm256_cvtepi32_epi16(a: __m256i) -> __m128i {
let a = a.as_i32x8();
transmute::<i16x8, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovdw))]
pub unsafe fn _mm256_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovdw))]
pub unsafe fn _mm256_maskz_cvtepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
let convert = _mm256_cvtepi32_epi16(a).as_i16x8();
let zero = _mm_setzero_si128().as_i16x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovdw))]
pub unsafe fn _mm_cvtepi32_epi16(a: __m128i) -> __m128i {
transmute(vpmovdw128(
a.as_i32x4(),
_mm_setzero_si128().as_i16x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovdw))]
pub unsafe fn _mm_mask_cvtepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovdw128(a.as_i32x4(), src.as_i16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovdw))]
pub unsafe fn _mm_maskz_cvtepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovdw128(a.as_i32x4(), _mm_setzero_si128().as_i16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovdb))]
pub unsafe fn _mm512_cvtepi32_epi8(a: __m512i) -> __m128i {
let a = a.as_i32x16();
transmute::<i8x16, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovdb))]
pub unsafe fn _mm512_mask_cvtepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
transmute(simd_select_bitmask(k, convert, src.as_i8x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovdb))]
pub unsafe fn _mm512_maskz_cvtepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
let convert = _mm512_cvtepi32_epi8(a).as_i8x16();
let zero = _mm_setzero_si128().as_i8x16();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovdb))]
pub unsafe fn _mm256_cvtepi32_epi8(a: __m256i) -> __m128i {
transmute(vpmovdb256(
a.as_i32x8(),
_mm_setzero_si128().as_i8x16(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovdb))]
pub unsafe fn _mm256_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovdb256(a.as_i32x8(), src.as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovdb))]
pub unsafe fn _mm256_maskz_cvtepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovdb256(a.as_i32x8(), _mm_setzero_si128().as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovdb))]
pub unsafe fn _mm_cvtepi32_epi8(a: __m128i) -> __m128i {
transmute(vpmovdb128(
a.as_i32x4(),
_mm_setzero_si128().as_i8x16(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovdb))]
pub unsafe fn _mm_mask_cvtepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovdb128(a.as_i32x4(), src.as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovdb))]
pub unsafe fn _mm_maskz_cvtepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovdb128(a.as_i32x4(), _mm_setzero_si128().as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqd))]
pub unsafe fn _mm512_cvtepi64_epi32(a: __m512i) -> __m256i {
let a = a.as_i64x8();
transmute::<i32x8, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqd))]
pub unsafe fn _mm512_mask_cvtepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, convert, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqd))]
pub unsafe fn _mm512_maskz_cvtepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
let convert = _mm512_cvtepi64_epi32(a).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqd))]
pub unsafe fn _mm256_cvtepi64_epi32(a: __m256i) -> __m128i {
let a = a.as_i64x4();
transmute::<i32x4, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqd))]
pub unsafe fn _mm256_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
transmute(simd_select_bitmask(k, convert, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqd))]
pub unsafe fn _mm256_maskz_cvtepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
let convert = _mm256_cvtepi64_epi32(a).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqd))]
pub unsafe fn _mm_cvtepi64_epi32(a: __m128i) -> __m128i {
transmute(vpmovqd128(
a.as_i64x2(),
_mm_setzero_si128().as_i32x4(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqd))]
pub unsafe fn _mm_mask_cvtepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovqd128(a.as_i64x2(), src.as_i32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqd))]
pub unsafe fn _mm_maskz_cvtepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovqd128(a.as_i64x2(), _mm_setzero_si128().as_i32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqw))]
pub unsafe fn _mm512_cvtepi64_epi16(a: __m512i) -> __m128i {
let a = a.as_i64x8();
transmute::<i16x8, _>(simd_cast(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqw))]
pub unsafe fn _mm512_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
transmute(simd_select_bitmask(k, convert, src.as_i16x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqw))]
pub unsafe fn _mm512_maskz_cvtepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
let convert = _mm512_cvtepi64_epi16(a).as_i16x8();
let zero = _mm_setzero_si128().as_i16x8();
transmute(simd_select_bitmask(k, convert, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqw))]
pub unsafe fn _mm256_cvtepi64_epi16(a: __m256i) -> __m128i {
transmute(vpmovqw256(
a.as_i64x4(),
_mm_setzero_si128().as_i16x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqw))]
pub unsafe fn _mm256_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovqw256(a.as_i64x4(), src.as_i16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqw))]
pub unsafe fn _mm256_maskz_cvtepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovqw256(a.as_i64x4(), _mm_setzero_si128().as_i16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqw))]
pub unsafe fn _mm_cvtepi64_epi16(a: __m128i) -> __m128i {
transmute(vpmovqw128(
a.as_i64x2(),
_mm_setzero_si128().as_i16x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqw))]
pub unsafe fn _mm_mask_cvtepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovqw128(a.as_i64x2(), src.as_i16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqw))]
pub unsafe fn _mm_maskz_cvtepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovqw128(a.as_i64x2(), _mm_setzero_si128().as_i16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqb))]
pub unsafe fn _mm512_cvtepi64_epi8(a: __m512i) -> __m128i {
transmute(vpmovqb(
a.as_i64x8(),
_mm_setzero_si128().as_i8x16(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqb))]
pub unsafe fn _mm512_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovqb(a.as_i64x8(), src.as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqb))]
pub unsafe fn _mm512_maskz_cvtepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovqb(a.as_i64x8(), _mm_setzero_si128().as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqb))]
pub unsafe fn _mm256_cvtepi64_epi8(a: __m256i) -> __m128i {
transmute(vpmovqb256(
a.as_i64x4(),
_mm_setzero_si128().as_i8x16(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqb))]
pub unsafe fn _mm256_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovqb256(a.as_i64x4(), src.as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqb))]
pub unsafe fn _mm256_maskz_cvtepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovqb256(a.as_i64x4(), _mm_setzero_si128().as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqb))]
pub unsafe fn _mm_cvtepi64_epi8(a: __m128i) -> __m128i {
transmute(vpmovqb128(
a.as_i64x2(),
_mm_setzero_si128().as_i8x16(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqb))]
pub unsafe fn _mm_mask_cvtepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovqb128(a.as_i64x2(), src.as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqb))]
pub unsafe fn _mm_maskz_cvtepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovqb128(a.as_i64x2(), _mm_setzero_si128().as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsdw))]
pub unsafe fn _mm512_cvtsepi32_epi16(a: __m512i) -> __m256i {
transmute(vpmovsdw(
a.as_i32x16(),
_mm256_setzero_si256().as_i16x16(),
0b11111111_11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsdw))]
pub unsafe fn _mm512_mask_cvtsepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
transmute(vpmovsdw(a.as_i32x16(), src.as_i16x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsdw))]
pub unsafe fn _mm512_maskz_cvtsepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
transmute(vpmovsdw(
a.as_i32x16(),
_mm256_setzero_si256().as_i16x16(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsdw))]
pub unsafe fn _mm256_cvtsepi32_epi16(a: __m256i) -> __m128i {
transmute(vpmovsdw256(
a.as_i32x8(),
_mm_setzero_si128().as_i16x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsdw))]
pub unsafe fn _mm256_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovsdw256(a.as_i32x8(), src.as_i16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsdw))]
pub unsafe fn _mm256_maskz_cvtsepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovsdw256(a.as_i32x8(), _mm_setzero_si128().as_i16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsdw))]
pub unsafe fn _mm_cvtsepi32_epi16(a: __m128i) -> __m128i {
transmute(vpmovsdw128(
a.as_i32x4(),
_mm_setzero_si128().as_i16x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsdw))]
pub unsafe fn _mm_mask_cvtsepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovsdw128(a.as_i32x4(), src.as_i16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsdw))]
pub unsafe fn _mm_maskz_cvtsepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovsdw128(a.as_i32x4(), _mm_setzero_si128().as_i16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsdb))]
pub unsafe fn _mm512_cvtsepi32_epi8(a: __m512i) -> __m128i {
transmute(vpmovsdb(
a.as_i32x16(),
_mm_setzero_si128().as_i8x16(),
0b11111111_11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsdb))]
pub unsafe fn _mm512_mask_cvtsepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
transmute(vpmovsdb(a.as_i32x16(), src.as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsdb))]
pub unsafe fn _mm512_maskz_cvtsepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
transmute(vpmovsdb(a.as_i32x16(), _mm_setzero_si128().as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsdb))]
pub unsafe fn _mm256_cvtsepi32_epi8(a: __m256i) -> __m128i {
transmute(vpmovsdb256(
a.as_i32x8(),
_mm_setzero_si128().as_i8x16(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsdb))]
pub unsafe fn _mm256_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovsdb256(a.as_i32x8(), src.as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsdb))]
pub unsafe fn _mm256_maskz_cvtsepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovsdb256(a.as_i32x8(), _mm_setzero_si128().as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsdb))]
pub unsafe fn _mm_cvtsepi32_epi8(a: __m128i) -> __m128i {
transmute(vpmovsdb128(
a.as_i32x4(),
_mm_setzero_si128().as_i8x16(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsdb))]
pub unsafe fn _mm_mask_cvtsepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovsdb128(a.as_i32x4(), src.as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsdb))]
pub unsafe fn _mm_maskz_cvtsepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovsdb128(a.as_i32x4(), _mm_setzero_si128().as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqd))]
pub unsafe fn _mm512_cvtsepi64_epi32(a: __m512i) -> __m256i {
transmute(vpmovsqd(
a.as_i64x8(),
_mm256_setzero_si256().as_i32x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqd))]
pub unsafe fn _mm512_mask_cvtsepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
transmute(vpmovsqd(a.as_i64x8(), src.as_i32x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqd))]
pub unsafe fn _mm512_maskz_cvtsepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
transmute(vpmovsqd(a.as_i64x8(), _mm256_setzero_si256().as_i32x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqd))]
pub unsafe fn _mm256_cvtsepi64_epi32(a: __m256i) -> __m128i {
transmute(vpmovsqd256(
a.as_i64x4(),
_mm_setzero_si128().as_i32x4(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqd))]
pub unsafe fn _mm256_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovsqd256(a.as_i64x4(), src.as_i32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqd))]
pub unsafe fn _mm256_maskz_cvtsepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovsqd256(a.as_i64x4(), _mm_setzero_si128().as_i32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqd))]
pub unsafe fn _mm_cvtsepi64_epi32(a: __m128i) -> __m128i {
transmute(vpmovsqd128(
a.as_i64x2(),
_mm_setzero_si128().as_i32x4(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqd))]
pub unsafe fn _mm_mask_cvtsepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovsqd128(a.as_i64x2(), src.as_i32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqd))]
pub unsafe fn _mm_maskz_cvtsepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovsqd128(a.as_i64x2(), _mm_setzero_si128().as_i32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqw))]
pub unsafe fn _mm512_cvtsepi64_epi16(a: __m512i) -> __m128i {
transmute(vpmovsqw(
a.as_i64x8(),
_mm_setzero_si128().as_i16x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqw))]
pub unsafe fn _mm512_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovsqw(a.as_i64x8(), src.as_i16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqw))]
pub unsafe fn _mm512_maskz_cvtsepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovsqw(a.as_i64x8(), _mm_setzero_si128().as_i16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqw))]
pub unsafe fn _mm256_cvtsepi64_epi16(a: __m256i) -> __m128i {
transmute(vpmovsqw256(
a.as_i64x4(),
_mm_setzero_si128().as_i16x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqw))]
pub unsafe fn _mm256_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovsqw256(a.as_i64x4(), src.as_i16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqw))]
pub unsafe fn _mm256_maskz_cvtsepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovsqw256(a.as_i64x4(), _mm_setzero_si128().as_i16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqw))]
pub unsafe fn _mm_cvtsepi64_epi16(a: __m128i) -> __m128i {
transmute(vpmovsqw128(
a.as_i64x2(),
_mm_setzero_si128().as_i16x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqw))]
pub unsafe fn _mm_mask_cvtsepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovsqw128(a.as_i64x2(), src.as_i16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqw))]
pub unsafe fn _mm_maskz_cvtsepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovsqw128(a.as_i64x2(), _mm_setzero_si128().as_i16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqb))]
pub unsafe fn _mm512_cvtsepi64_epi8(a: __m512i) -> __m128i {
transmute(vpmovsqb(
a.as_i64x8(),
_mm_setzero_si128().as_i8x16(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqb))]
pub unsafe fn _mm512_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovsqb(a.as_i64x8(), src.as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqb))]
pub unsafe fn _mm512_maskz_cvtsepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovsqb(a.as_i64x8(), _mm_setzero_si128().as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqb))]
pub unsafe fn _mm256_cvtsepi64_epi8(a: __m256i) -> __m128i {
transmute(vpmovsqb256(
a.as_i64x4(),
_mm_setzero_si128().as_i8x16(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqb))]
pub unsafe fn _mm256_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovsqb256(a.as_i64x4(), src.as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqb))]
pub unsafe fn _mm256_maskz_cvtsepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovsqb256(a.as_i64x4(), _mm_setzero_si128().as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqb))]
pub unsafe fn _mm_cvtsepi64_epi8(a: __m128i) -> __m128i {
transmute(vpmovsqb128(
a.as_i64x2(),
_mm_setzero_si128().as_i8x16(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqb))]
pub unsafe fn _mm_mask_cvtsepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovsqb128(a.as_i64x2(), src.as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqb))]
pub unsafe fn _mm_maskz_cvtsepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovsqb128(a.as_i64x2(), _mm_setzero_si128().as_i8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusdw))]
pub unsafe fn _mm512_cvtusepi32_epi16(a: __m512i) -> __m256i {
transmute(vpmovusdw(
a.as_u32x16(),
_mm256_setzero_si256().as_u16x16(),
0b11111111_11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusdw))]
pub unsafe fn _mm512_mask_cvtusepi32_epi16(src: __m256i, k: __mmask16, a: __m512i) -> __m256i {
transmute(vpmovusdw(a.as_u32x16(), src.as_u16x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusdw))]
pub unsafe fn _mm512_maskz_cvtusepi32_epi16(k: __mmask16, a: __m512i) -> __m256i {
transmute(vpmovusdw(
a.as_u32x16(),
_mm256_setzero_si256().as_u16x16(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusdw))]
pub unsafe fn _mm256_cvtusepi32_epi16(a: __m256i) -> __m128i {
transmute(vpmovusdw256(
a.as_u32x8(),
_mm_setzero_si128().as_u16x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusdw))]
pub unsafe fn _mm256_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovusdw256(a.as_u32x8(), src.as_u16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusdw))]
pub unsafe fn _mm256_maskz_cvtusepi32_epi16(k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovusdw256(
a.as_u32x8(),
_mm_setzero_si128().as_u16x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusdw))]
pub unsafe fn _mm_cvtusepi32_epi16(a: __m128i) -> __m128i {
transmute(vpmovusdw128(
a.as_u32x4(),
_mm_setzero_si128().as_u16x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusdw))]
pub unsafe fn _mm_mask_cvtusepi32_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovusdw128(a.as_u32x4(), src.as_u16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusdw))]
pub unsafe fn _mm_maskz_cvtusepi32_epi16(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovusdw128(
a.as_u32x4(),
_mm_setzero_si128().as_u16x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusdb))]
pub unsafe fn _mm512_cvtusepi32_epi8(a: __m512i) -> __m128i {
transmute(vpmovusdb(
a.as_u32x16(),
_mm_setzero_si128().as_u8x16(),
0b11111111_11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusdb))]
pub unsafe fn _mm512_mask_cvtusepi32_epi8(src: __m128i, k: __mmask16, a: __m512i) -> __m128i {
transmute(vpmovusdb(a.as_u32x16(), src.as_u8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusdb))]
pub unsafe fn _mm512_maskz_cvtusepi32_epi8(k: __mmask16, a: __m512i) -> __m128i {
transmute(vpmovusdb(a.as_u32x16(), _mm_setzero_si128().as_u8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusdb))]
pub unsafe fn _mm256_cvtusepi32_epi8(a: __m256i) -> __m128i {
transmute(vpmovusdb256(
a.as_u32x8(),
_mm_setzero_si128().as_u8x16(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusdb))]
pub unsafe fn _mm256_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovusdb256(a.as_u32x8(), src.as_u8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusdb))]
pub unsafe fn _mm256_maskz_cvtusepi32_epi8(k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovusdb256(
a.as_u32x8(),
_mm_setzero_si128().as_u8x16(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusdb))]
pub unsafe fn _mm_cvtusepi32_epi8(a: __m128i) -> __m128i {
transmute(vpmovusdb128(
a.as_u32x4(),
_mm_setzero_si128().as_u8x16(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusdb))]
pub unsafe fn _mm_mask_cvtusepi32_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovusdb128(a.as_u32x4(), src.as_u8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusdb))]
pub unsafe fn _mm_maskz_cvtusepi32_epi8(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovusdb128(
a.as_u32x4(),
_mm_setzero_si128().as_u8x16(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqd))]
pub unsafe fn _mm512_cvtusepi64_epi32(a: __m512i) -> __m256i {
transmute(vpmovusqd(
a.as_u64x8(),
_mm256_setzero_si256().as_u32x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqd))]
pub unsafe fn _mm512_mask_cvtusepi64_epi32(src: __m256i, k: __mmask8, a: __m512i) -> __m256i {
transmute(vpmovusqd(a.as_u64x8(), src.as_u32x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqd))]
pub unsafe fn _mm512_maskz_cvtusepi64_epi32(k: __mmask8, a: __m512i) -> __m256i {
transmute(vpmovusqd(
a.as_u64x8(),
_mm256_setzero_si256().as_u32x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqd))]
pub unsafe fn _mm256_cvtusepi64_epi32(a: __m256i) -> __m128i {
transmute(vpmovusqd256(
a.as_u64x4(),
_mm_setzero_si128().as_u32x4(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqd))]
pub unsafe fn _mm256_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovusqd256(a.as_u64x4(), src.as_u32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqd))]
pub unsafe fn _mm256_maskz_cvtusepi64_epi32(k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovusqd256(
a.as_u64x4(),
_mm_setzero_si128().as_u32x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqd))]
pub unsafe fn _mm_cvtusepi64_epi32(a: __m128i) -> __m128i {
transmute(vpmovusqd128(
a.as_u64x2(),
_mm_setzero_si128().as_u32x4(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqd))]
pub unsafe fn _mm_mask_cvtusepi64_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovusqd128(a.as_u64x2(), src.as_u32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqd))]
pub unsafe fn _mm_maskz_cvtusepi64_epi32(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovusqd128(
a.as_u64x2(),
_mm_setzero_si128().as_u32x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqw))]
pub unsafe fn _mm512_cvtusepi64_epi16(a: __m512i) -> __m128i {
transmute(vpmovusqw(
a.as_u64x8(),
_mm_setzero_si128().as_u16x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqw))]
pub unsafe fn _mm512_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovusqw(a.as_u64x8(), src.as_u16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqw))]
pub unsafe fn _mm512_maskz_cvtusepi64_epi16(k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovusqw(a.as_u64x8(), _mm_setzero_si128().as_u16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqw))]
pub unsafe fn _mm256_cvtusepi64_epi16(a: __m256i) -> __m128i {
transmute(vpmovusqw256(
a.as_u64x4(),
_mm_setzero_si128().as_u16x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqw))]
pub unsafe fn _mm256_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovusqw256(a.as_u64x4(), src.as_u16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqw))]
pub unsafe fn _mm256_maskz_cvtusepi64_epi16(k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovusqw256(
a.as_u64x4(),
_mm_setzero_si128().as_u16x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqw))]
pub unsafe fn _mm_cvtusepi64_epi16(a: __m128i) -> __m128i {
transmute(vpmovusqw128(
a.as_u64x2(),
_mm_setzero_si128().as_u16x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqw))]
pub unsafe fn _mm_mask_cvtusepi64_epi16(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovusqw128(a.as_u64x2(), src.as_u16x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqw))]
pub unsafe fn _mm_maskz_cvtusepi64_epi16(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovusqw128(
a.as_u64x2(),
_mm_setzero_si128().as_u16x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqb))]
pub unsafe fn _mm512_cvtusepi64_epi8(a: __m512i) -> __m128i {
transmute(vpmovusqb(
a.as_u64x8(),
_mm_setzero_si128().as_u8x16(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqb))]
pub unsafe fn _mm512_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovusqb(a.as_u64x8(), src.as_u8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqb))]
pub unsafe fn _mm512_maskz_cvtusepi64_epi8(k: __mmask8, a: __m512i) -> __m128i {
transmute(vpmovusqb(a.as_u64x8(), _mm_setzero_si128().as_u8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqb))]
pub unsafe fn _mm256_cvtusepi64_epi8(a: __m256i) -> __m128i {
transmute(vpmovusqb256(
a.as_u64x4(),
_mm_setzero_si128().as_u8x16(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqb))]
pub unsafe fn _mm256_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovusqb256(a.as_u64x4(), src.as_u8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqb))]
pub unsafe fn _mm256_maskz_cvtusepi64_epi8(k: __mmask8, a: __m256i) -> __m128i {
transmute(vpmovusqb256(
a.as_u64x4(),
_mm_setzero_si128().as_u8x16(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqb))]
pub unsafe fn _mm_cvtusepi64_epi8(a: __m128i) -> __m128i {
transmute(vpmovusqb128(
a.as_u64x2(),
_mm_setzero_si128().as_u8x16(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqb))]
pub unsafe fn _mm_mask_cvtusepi64_epi8(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovusqb128(a.as_u64x2(), src.as_u8x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqb))]
pub unsafe fn _mm_maskz_cvtusepi64_epi8(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpmovusqb128(
a.as_u64x2(),
_mm_setzero_si128().as_u8x16(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundps_epi32(a: __m512, rounding: i32) -> __m512i {
let a = a.as_f32x16();
let zero = _mm512_setzero_si512().as_i32x16();
macro_rules! call {
($imm4:expr) => {
vcvtps2dq(a, zero, 0b11111111_11111111, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundps_epi32(
src: __m512i,
k: __mmask16,
a: __m512,
rounding: i32,
) -> __m512i {
let a = a.as_f32x16();
let src = src.as_i32x16();
macro_rules! call {
($imm4:expr) => {
vcvtps2dq(a, src, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2dq, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundps_epi32(k: __mmask16, a: __m512, rounding: i32) -> __m512i {
let a = a.as_f32x16();
let zero = _mm512_setzero_si512().as_i32x16();
macro_rules! call {
($imm4:expr) => {
vcvtps2dq(a, zero, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundps_epu32(a: __m512, rounding: i32) -> __m512i {
let a = a.as_f32x16();
let zero = _mm512_setzero_si512().as_u32x16();
macro_rules! call {
($imm4:expr) => {
vcvtps2udq(a, zero, 0b11111111_11111111, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundps_epu32(
src: __m512i,
k: __mmask16,
a: __m512,
rounding: i32,
) -> __m512i {
let a = a.as_f32x16();
let src = src.as_u32x16();
macro_rules! call {
($imm4:expr) => {
vcvtps2udq(a, src, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2udq, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundps_epu32(k: __mmask16, a: __m512, rounding: i32) -> __m512i {
let a = a.as_f32x16();
let zero = _mm512_setzero_si512().as_u32x16();
macro_rules! call {
($imm4:expr) => {
vcvtps2udq(a, zero, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundps_pd(a: __m256, sae: i32) -> __m512d {
let a = a.as_f32x8();
let zero = _mm512_setzero_pd().as_f64x8();
macro_rules! call {
($imm4:expr) => {
vcvtps2pd(a, zero, 0b11111111, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundps_pd(
src: __m512d,
k: __mmask8,
a: __m256,
sae: i32,
) -> __m512d {
let a = a.as_f32x8();
let src = src.as_f64x8();
macro_rules! call {
($imm4:expr) => {
vcvtps2pd(a, src, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2pd, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundps_pd(k: __mmask8, a: __m256, sae: i32) -> __m512d {
let a = a.as_f32x8();
let zero = _mm512_setzero_pd().as_f64x8();
macro_rules! call {
($imm4:expr) => {
vcvtps2pd(a, zero, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2dq, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundpd_epi32(a: __m512d, rounding: i32) -> __m256i {
let a = a.as_f64x8();
let zero = _mm256_setzero_si256().as_i32x8();
macro_rules! call {
($imm4:expr) => {
vcvtpd2dq(a, zero, 0b11111111, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2dq, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundpd_epi32(
src: __m256i,
k: __mmask8,
a: __m512d,
rounding: i32,
) -> __m256i {
let a = a.as_f64x8();
let src = src.as_i32x8();
macro_rules! call {
($imm4:expr) => {
vcvtpd2dq(a, src, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2dq, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundpd_epi32(k: __mmask8, a: __m512d, rounding: i32) -> __m256i {
let a = a.as_f64x8();
let zero = _mm256_setzero_si256().as_i32x8();
macro_rules! call {
($imm4:expr) => {
vcvtpd2dq(a, zero, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2udq, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundpd_epu32(a: __m512d, rounding: i32) -> __m256i {
let a = a.as_f64x8();
let zero = _mm256_setzero_si256().as_u32x8();
macro_rules! call {
($imm4:expr) => {
vcvtpd2udq(a, zero, 0b11111111, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2udq, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundpd_epu32(
src: __m256i,
k: __mmask8,
a: __m512d,
rounding: i32,
) -> __m256i {
let a = a.as_f64x8();
let src = src.as_u32x8();
macro_rules! call {
($imm4:expr) => {
vcvtpd2udq(a, src, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2udq, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundpd_epu32(k: __mmask8, a: __m512d, rounding: i32) -> __m256i {
let a = a.as_f64x8();
let zero = _mm256_setzero_si256().as_u32x8();
macro_rules! call {
($imm4:expr) => {
vcvtpd2udq(a, zero, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2ps, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundpd_ps(a: __m512d, rounding: i32) -> __m256 {
let a = a.as_f64x8();
let zero = _mm256_setzero_ps().as_f32x8();
macro_rules! call {
($imm4:expr) => {
vcvtpd2ps(a, zero, 0b11111111, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2ps, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundpd_ps(
src: __m256,
k: __mmask8,
a: __m512d,
rounding: i32,
) -> __m256 {
let a = a.as_f64x8();
let src = src.as_f32x8();
macro_rules! call {
($imm4:expr) => {
vcvtpd2ps(a, src, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtpd2ps, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundpd_ps(k: __mmask8, a: __m512d, rounding: i32) -> __m256 {
let a = a.as_f64x8();
let zero = _mm256_setzero_ps().as_f32x8();
macro_rules! call {
($imm4:expr) => {
vcvtpd2ps(a, zero, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2ps, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundepi32_ps(a: __m512i, rounding: i32) -> __m512 {
let a = a.as_i32x16();
macro_rules! call {
($imm4:expr) => {
vcvtdq2ps(a, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2ps, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundepi32_ps(
src: __m512,
k: __mmask16,
a: __m512i,
rounding: i32,
) -> __m512 {
let a = a.as_i32x16();
macro_rules! call {
($imm4:expr) => {
vcvtdq2ps(a, $imm4)
};
}
let r: f32x16 = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, r, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtdq2ps, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundepi32_ps(k: __mmask16, a: __m512i, rounding: i32) -> __m512 {
let a = a.as_i32x16();
macro_rules! call {
($imm4:expr) => {
vcvtdq2ps(a, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, r, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2ps, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundepu32_ps(a: __m512i, rounding: i32) -> __m512 {
let a = a.as_u32x16();
macro_rules! call {
($imm4:expr) => {
vcvtudq2ps(a, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2ps, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundepu32_ps(
src: __m512,
k: __mmask16,
a: __m512i,
rounding: i32,
) -> __m512 {
let a = a.as_u32x16();
macro_rules! call {
($imm4:expr) => {
vcvtudq2ps(a, $imm4)
};
}
let r: f32x16 = constify_imm4_round!(rounding, call);
transmute(simd_select_bitmask(k, r, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtudq2ps, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundepu32_ps(k: __mmask16, a: __m512i, rounding: i32) -> __m512 {
let a = a.as_u32x16();
macro_rules! call {
($imm4:expr) => {
vcvtudq2ps(a, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, r, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundps_ph(a: __m512, sae: i32) -> __m256i {
let a = a.as_f32x16();
let zero = _mm256_setzero_si256().as_i16x16();
macro_rules! call {
($imm4:expr) => {
vcvtps2ph(a, $imm4, zero, 0b11111111_11111111)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundps_ph(
src: __m256i,
k: __mmask16,
a: __m512,
sae: i32,
) -> __m256i {
let a = a.as_f32x16();
let src = src.as_i16x16();
macro_rules! call {
($imm4:expr) => {
vcvtps2ph(a, $imm4, src, k)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundps_ph(k: __mmask16, a: __m512, sae: i32) -> __m256i {
let a = a.as_f32x16();
let zero = _mm256_setzero_si256().as_i16x16();
macro_rules! call {
($imm4:expr) => {
vcvtps2ph(a, $imm4, zero, k)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_cvt_roundps_ph(
src: __m128i,
k: __mmask8,
a: __m256,
imm8: i32,
) -> __m128i {
let a = a.as_f32x8();
let src = src.as_i16x8();
macro_rules! call {
($imm8:expr) => {
vcvtps2ph256(a, $imm8, src, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_cvt_roundps_ph(k: __mmask8, a: __m256, imm8: i32) -> __m128i {
let a = a.as_f32x8();
let zero = _mm_setzero_si128().as_i16x8();
macro_rules! call {
($imm8:expr) => {
vcvtps2ph256(a, $imm8, zero, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_mask_cvt_roundps_ph(src: __m128i, k: __mmask8, a: __m128, imm8: i32) -> __m128i {
let a = a.as_f32x4();
let src = src.as_i16x8();
macro_rules! call {
($imm8:expr) => {
vcvtps2ph128(a, $imm8, src, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_maskz_cvt_roundps_ph(k: __mmask8, a: __m128, imm8: i32) -> __m128i {
let a = a.as_f32x4();
let zero = _mm_setzero_si128().as_i16x8();
macro_rules! call {
($imm8:expr) => {
vcvtps2ph128(a, $imm8, zero, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvtps_ph(a: __m512, sae: i32) -> __m256i {
let a = a.as_f32x16();
let zero = _mm256_setzero_si256().as_i16x16();
macro_rules! call {
($imm4:expr) => {
vcvtps2ph(a, $imm4, zero, 0b11111111_11111111)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvtps_ph(src: __m256i, k: __mmask16, a: __m512, sae: i32) -> __m256i {
let a = a.as_f32x16();
let src = src.as_i16x16();
macro_rules! call {
($imm4:expr) => {
vcvtps2ph(a, $imm4, src, k)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtps2ph, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvtps_ph(k: __mmask16, a: __m512, sae: i32) -> __m256i {
let a = a.as_f32x16();
let zero = _mm256_setzero_si256().as_i16x16();
macro_rules! call {
($imm4:expr) => {
vcvtps2ph(a, $imm4, zero, k)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_cvtps_ph(src: __m128i, k: __mmask8, a: __m256, imm8: i32) -> __m128i {
let a = a.as_f32x8();
let src = src.as_i16x8();
macro_rules! call {
($imm8:expr) => {
vcvtps2ph256(a, $imm8, src, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_cvtps_ph(k: __mmask8, a: __m256, imm8: i32) -> __m128i {
let a = a.as_f32x8();
let zero = _mm_setzero_si128().as_i16x8();
macro_rules! call {
($imm8:expr) => {
vcvtps2ph256(a, $imm8, zero, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_mask_cvtps_ph(src: __m128i, k: __mmask8, a: __m128, imm8: i32) -> __m128i {
let a = a.as_f32x4();
let src = src.as_i16x8();
macro_rules! call {
($imm8:expr) => {
vcvtps2ph128(a, $imm8, src, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtps2ph, imm8 = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_maskz_cvtps_ph(k: __mmask8, a: __m128, imm8: i32) -> __m128i {
let a = a.as_f32x4();
let zero = _mm_setzero_si128().as_i16x8();
macro_rules! call {
($imm8:expr) => {
vcvtps2ph128(a, $imm8, zero, k)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtph2ps, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvt_roundph_ps(a: __m256i, sae: i32) -> __m512 {
let a = a.as_i16x16();
let zero = _mm512_setzero_ps().as_f32x16();
macro_rules! call {
($imm4:expr) => {
vcvtph2ps(a, zero, 0b11111111_11111111, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtph2ps, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvt_roundph_ps(
src: __m512,
k: __mmask16,
a: __m256i,
sae: i32,
) -> __m512 {
let a = a.as_i16x16();
let src = src.as_f32x16();
macro_rules! call {
($imm4:expr) => {
vcvtph2ps(a, src, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtph2ps, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvt_roundph_ps(k: __mmask16, a: __m256i, sae: i32) -> __m512 {
let a = a.as_i16x16();
let zero = _mm512_setzero_ps().as_f32x16();
macro_rules! call {
($imm4:expr) => {
vcvtph2ps(a, zero, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtph2ps))]
pub unsafe fn _mm512_cvtph_ps(a: __m256i) -> __m512 {
transmute(vcvtph2ps(
a.as_i16x16(),
_mm512_setzero_ps().as_f32x16(),
0b11111111_11111111,
_MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtph2ps))]
pub unsafe fn _mm512_mask_cvtph_ps(src: __m512, k: __mmask16, a: __m256i) -> __m512 {
transmute(vcvtph2ps(
a.as_i16x16(),
src.as_f32x16(),
k,
_MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtph2ps))]
pub unsafe fn _mm512_maskz_cvtph_ps(k: __mmask16, a: __m256i) -> __m512 {
transmute(vcvtph2ps(
a.as_i16x16(),
_mm512_setzero_ps().as_f32x16(),
k,
_MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtph2ps))]
pub unsafe fn _mm256_mask_cvtph_ps(src: __m256, k: __mmask8, a: __m128i) -> __m256 {
let convert = _mm256_cvtph_ps(a);
transmute(simd_select_bitmask(k, convert.as_f32x8(), src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtph2ps))]
pub unsafe fn _mm256_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m256 {
let convert = _mm256_cvtph_ps(a);
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, convert.as_f32x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtph2ps))]
pub unsafe fn _mm_mask_cvtph_ps(src: __m128, k: __mmask8, a: __m128i) -> __m128 {
let convert = _mm_cvtph_ps(a);
transmute(simd_select_bitmask(k, convert.as_f32x4(), src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvtph2ps))]
pub unsafe fn _mm_maskz_cvtph_ps(k: __mmask8, a: __m128i) -> __m128 {
let convert = _mm_cvtph_ps(a);
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, convert.as_f32x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvtt_roundps_epi32(a: __m512, sae: i32) -> __m512i {
let a = a.as_f32x16();
let zero = _mm512_setzero_si512().as_i32x16();
macro_rules! call {
($imm4:expr) => {
vcvttps2dq(a, zero, 0b11111111_11111111, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvtt_roundps_epi32(
src: __m512i,
k: __mmask16,
a: __m512,
sae: i32,
) -> __m512i {
let a = a.as_f32x16();
let src = src.as_i32x16();
macro_rules! call {
($imm4:expr) => {
vcvttps2dq(a, src, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2dq, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvtt_roundps_epi32(k: __mmask16, a: __m512, sae: i32) -> __m512i {
let a = a.as_f32x16();
let zero = _mm512_setzero_si512().as_i32x16();
macro_rules! call {
($imm4:expr) => {
vcvttps2dq(a, zero, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvtt_roundps_epu32(a: __m512, sae: i32) -> __m512i {
let a = a.as_f32x16();
let zero = _mm512_setzero_si512().as_u32x16();
macro_rules! call {
($imm4:expr) => {
vcvttps2udq(a, zero, 0b11111111_11111111, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvtt_roundps_epu32(
src: __m512i,
k: __mmask16,
a: __m512,
sae: i32,
) -> __m512i {
let a = a.as_f32x16();
let src = src.as_u32x16();
macro_rules! call {
($imm4:expr) => {
vcvttps2udq(a, src, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2udq, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvtt_roundps_epu32(k: __mmask16, a: __m512, sae: i32) -> __m512i {
let a = a.as_f32x16();
let zero = _mm512_setzero_si512().as_u32x16();
macro_rules! call {
($imm4:expr) => {
vcvttps2udq(a, zero, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvtt_roundpd_epi32(a: __m512d, sae: i32) -> __m256i {
let a = a.as_f64x8();
let zero = _mm256_setzero_si256().as_i32x8();
macro_rules! call {
($imm4:expr) => {
vcvttpd2dq(a, zero, 0b11111111, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvtt_roundpd_epi32(
src: __m256i,
k: __mmask8,
a: __m512d,
sae: i32,
) -> __m256i {
let a = a.as_f64x8();
let src = src.as_i32x8();
macro_rules! call {
($imm4:expr) => {
vcvttpd2dq(a, src, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2dq, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvtt_roundpd_epi32(k: __mmask8, a: __m512d, sae: i32) -> __m256i {
let a = a.as_f64x8();
let zero = _mm256_setzero_si256().as_i32x8();
macro_rules! call {
($imm4:expr) => {
vcvttpd2dq(a, zero, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_cvtt_roundpd_epu32(a: __m512d, sae: i32) -> __m256i {
let a = a.as_f64x8();
let zero = _mm256_setzero_si256().as_i32x8();
macro_rules! call {
($imm4:expr) => {
vcvttpd2udq(a, zero, 0b11111111, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_cvtt_roundpd_epu32(
src: __m256i,
k: __mmask8,
a: __m512d,
sae: i32,
) -> __m256i {
let a = a.as_f64x8();
let src = src.as_i32x8();
macro_rules! call {
($imm4:expr) => {
vcvttpd2udq(a, src, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2dq))]
pub unsafe fn _mm512_cvttps_epi32(a: __m512) -> __m512i {
transmute(vcvttps2dq(
a.as_f32x16(),
_mm512_setzero_si512().as_i32x16(),
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2dq))]
pub unsafe fn _mm512_mask_cvttps_epi32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
transmute(vcvttps2dq(
a.as_f32x16(),
src.as_i32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2dq))]
pub unsafe fn _mm512_maskz_cvttps_epi32(k: __mmask16, a: __m512) -> __m512i {
transmute(vcvttps2dq(
a.as_f32x16(),
_mm512_setzero_si512().as_i32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttps2dq))]
pub unsafe fn _mm256_mask_cvttps_epi32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
transmute(vcvttps2dq256(a.as_f32x8(), src.as_i32x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttps2dq))]
pub unsafe fn _mm256_maskz_cvttps_epi32(k: __mmask8, a: __m256) -> __m256i {
transmute(vcvttps2dq256(
a.as_f32x8(),
_mm256_setzero_si256().as_i32x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttps2dq))]
pub unsafe fn _mm_mask_cvttps_epi32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
transmute(vcvttps2dq128(a.as_f32x4(), src.as_i32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttps2dq))]
pub unsafe fn _mm_maskz_cvttps_epi32(k: __mmask8, a: __m128) -> __m128i {
transmute(vcvttps2dq128(
a.as_f32x4(),
_mm_setzero_si128().as_i32x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2udq))]
pub unsafe fn _mm512_cvttps_epu32(a: __m512) -> __m512i {
transmute(vcvttps2udq(
a.as_f32x16(),
_mm512_setzero_si512().as_u32x16(),
0b11111111_11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2udq))]
pub unsafe fn _mm512_mask_cvttps_epu32(src: __m512i, k: __mmask16, a: __m512) -> __m512i {
transmute(vcvttps2udq(
a.as_f32x16(),
src.as_u32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttps2udq))]
pub unsafe fn _mm512_maskz_cvttps_epu32(k: __mmask16, a: __m512) -> __m512i {
transmute(vcvttps2udq(
a.as_f32x16(),
_mm512_setzero_si512().as_u32x16(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttps2udq))]
pub unsafe fn _mm256_cvttps_epu32(a: __m256) -> __m256i {
transmute(vcvttps2udq256(
a.as_f32x8(),
_mm256_setzero_si256().as_u32x8(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttps2udq))]
pub unsafe fn _mm256_mask_cvttps_epu32(src: __m256i, k: __mmask8, a: __m256) -> __m256i {
transmute(vcvttps2udq256(a.as_f32x8(), src.as_u32x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttps2udq))]
pub unsafe fn _mm256_maskz_cvttps_epu32(k: __mmask8, a: __m256) -> __m256i {
transmute(vcvttps2udq256(
a.as_f32x8(),
_mm256_setzero_si256().as_u32x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttps2udq))]
pub unsafe fn _mm_cvttps_epu32(a: __m128) -> __m128i {
transmute(vcvttps2udq128(
a.as_f32x4(),
_mm_setzero_si128().as_u32x4(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttps2udq))]
pub unsafe fn _mm_mask_cvttps_epu32(src: __m128i, k: __mmask8, a: __m128) -> __m128i {
transmute(vcvttps2udq128(a.as_f32x4(), src.as_u32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttps2udq))]
pub unsafe fn _mm_maskz_cvttps_epu32(k: __mmask8, a: __m128) -> __m128i {
transmute(vcvttps2udq128(
a.as_f32x4(),
_mm_setzero_si128().as_u32x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2udq, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_cvtt_roundpd_epu32(k: __mmask8, a: __m512d, sae: i32) -> __m256i {
let a = a.as_f64x8();
let zero = _mm256_setzero_si256().as_i32x8();
macro_rules! call {
($imm4:expr) => {
vcvttpd2udq(a, zero, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2dq))]
pub unsafe fn _mm512_cvttpd_epi32(a: __m512d) -> __m256i {
transmute(vcvttpd2dq(
a.as_f64x8(),
_mm256_setzero_si256().as_i32x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2dq))]
pub unsafe fn _mm512_mask_cvttpd_epi32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
transmute(vcvttpd2dq(
a.as_f64x8(),
src.as_i32x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2dq))]
pub unsafe fn _mm512_maskz_cvttpd_epi32(k: __mmask8, a: __m512d) -> __m256i {
transmute(vcvttpd2dq(
a.as_f64x8(),
_mm256_setzero_si256().as_i32x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttpd2dq))]
pub unsafe fn _mm256_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
transmute(vcvttpd2dq256(a.as_f64x4(), src.as_i32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttpd2dq))]
pub unsafe fn _mm256_maskz_cvttpd_epi32(k: __mmask8, a: __m256d) -> __m128i {
transmute(vcvttpd2dq256(
a.as_f64x4(),
_mm_setzero_si128().as_i32x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttpd2dq))]
pub unsafe fn _mm_mask_cvttpd_epi32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
transmute(vcvttpd2dq128(a.as_f64x2(), src.as_i32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttpd2dq))]
pub unsafe fn _mm_maskz_cvttpd_epi32(k: __mmask8, a: __m128d) -> __m128i {
transmute(vcvttpd2dq128(
a.as_f64x2(),
_mm_setzero_si128().as_i32x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2udq))]
pub unsafe fn _mm512_cvttpd_epu32(a: __m512d) -> __m256i {
transmute(vcvttpd2udq(
a.as_f64x8(),
_mm256_setzero_si256().as_i32x8(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2udq))]
pub unsafe fn _mm512_mask_cvttpd_epu32(src: __m256i, k: __mmask8, a: __m512d) -> __m256i {
transmute(vcvttpd2udq(
a.as_f64x8(),
src.as_i32x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvttpd2udq))]
pub unsafe fn _mm512_maskz_cvttpd_epu32(k: __mmask8, a: __m512d) -> __m256i {
transmute(vcvttpd2udq(
a.as_f64x8(),
_mm256_setzero_si256().as_i32x8(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttpd2udq))]
pub unsafe fn _mm256_cvttpd_epu32(a: __m256d) -> __m128i {
transmute(vcvttpd2udq256(
a.as_f64x4(),
_mm_setzero_si128().as_i32x4(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttpd2udq))]
pub unsafe fn _mm256_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m256d) -> __m128i {
transmute(vcvttpd2udq256(a.as_f64x4(), src.as_i32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttpd2udq))]
pub unsafe fn _mm256_maskz_cvttpd_epu32(k: __mmask8, a: __m256d) -> __m128i {
transmute(vcvttpd2udq256(
a.as_f64x4(),
_mm_setzero_si128().as_i32x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttpd2udq))]
pub unsafe fn _mm_cvttpd_epu32(a: __m128d) -> __m128i {
transmute(vcvttpd2udq128(
a.as_f64x2(),
_mm_setzero_si128().as_i32x4(),
0b11111111,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttpd2udq))]
pub unsafe fn _mm_mask_cvttpd_epu32(src: __m128i, k: __mmask8, a: __m128d) -> __m128i {
transmute(vcvttpd2udq128(a.as_f64x2(), src.as_i32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcvttpd2udq))]
pub unsafe fn _mm_maskz_cvttpd_epu32(k: __mmask8, a: __m128d) -> __m128i {
transmute(vcvttpd2udq128(
a.as_f64x2(),
_mm_setzero_si128().as_i32x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_setzero_pd() -> __m512d {
mem::zeroed()
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_setzero_ps() -> __m512 {
mem::zeroed()
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_setzero() -> __m512 {
mem::zeroed()
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_setzero_si512() -> __m512i {
mem::zeroed()
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vxorps))]
pub unsafe fn _mm512_setzero_epi32() -> __m512i {
mem::zeroed()
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr_epi32(
e15: i32,
e14: i32,
e13: i32,
e12: i32,
e11: i32,
e10: i32,
e9: i32,
e8: i32,
e7: i32,
e6: i32,
e5: i32,
e4: i32,
e3: i32,
e2: i32,
e1: i32,
e0: i32,
) -> __m512i {
let r = i32x16(
e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set_epi8(
e63: i8,
e62: i8,
e61: i8,
e60: i8,
e59: i8,
e58: i8,
e57: i8,
e56: i8,
e55: i8,
e54: i8,
e53: i8,
e52: i8,
e51: i8,
e50: i8,
e49: i8,
e48: i8,
e47: i8,
e46: i8,
e45: i8,
e44: i8,
e43: i8,
e42: i8,
e41: i8,
e40: i8,
e39: i8,
e38: i8,
e37: i8,
e36: i8,
e35: i8,
e34: i8,
e33: i8,
e32: i8,
e31: i8,
e30: i8,
e29: i8,
e28: i8,
e27: i8,
e26: i8,
e25: i8,
e24: i8,
e23: i8,
e22: i8,
e21: i8,
e20: i8,
e19: i8,
e18: i8,
e17: i8,
e16: i8,
e15: i8,
e14: i8,
e13: i8,
e12: i8,
e11: i8,
e10: i8,
e9: i8,
e8: i8,
e7: i8,
e6: i8,
e5: i8,
e4: i8,
e3: i8,
e2: i8,
e1: i8,
e0: i8,
) -> __m512i {
let r = i8x64(
e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31, e32, e33, e34, e35, e36, e37,
e38, e39, e40, e41, e42, e43, e44, e45, e46, e47, e48, e49, e50, e51, e52, e53, e54, e55,
e56, e57, e58, e59, e60, e61, e62, e63,
);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set_epi16(
e31: i16,
e30: i16,
e29: i16,
e28: i16,
e27: i16,
e26: i16,
e25: i16,
e24: i16,
e23: i16,
e22: i16,
e21: i16,
e20: i16,
e19: i16,
e18: i16,
e17: i16,
e16: i16,
e15: i16,
e14: i16,
e13: i16,
e12: i16,
e11: i16,
e10: i16,
e9: i16,
e8: i16,
e7: i16,
e6: i16,
e5: i16,
e4: i16,
e3: i16,
e2: i16,
e1: i16,
e0: i16,
) -> __m512i {
let r = i16x32(
e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15, e16, e17, e18, e19,
e20, e21, e22, e23, e24, e25, e26, e27, e28, e29, e30, e31,
);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
_mm512_set_epi32(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
_mm512_set_ps(d, c, b, a, d, c, b, a, d, c, b, a, d, c, b, a)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
_mm512_set_pd(d, c, b, a, d, c, b, a)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr4_epi32(d: i32, c: i32, b: i32, a: i32) -> __m512i {
_mm512_set_epi32(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr4_ps(d: f32, c: f32, b: f32, a: f32) -> __m512 {
_mm512_set_ps(a, b, c, d, a, b, c, d, a, b, c, d, a, b, c, d)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr4_pd(d: f64, c: f64, b: f64, a: f64) -> __m512d {
_mm512_set_pd(a, b, c, d, a, b, c, d)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set_epi64(
e0: i64,
e1: i64,
e2: i64,
e3: i64,
e4: i64,
e5: i64,
e6: i64,
e7: i64,
) -> __m512i {
_mm512_setr_epi64(e7, e6, e5, e4, e3, e2, e1, e0)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr_epi64(
e0: i64,
e1: i64,
e2: i64,
e3: i64,
e4: i64,
e5: i64,
e6: i64,
e7: i64,
) -> __m512i {
let r = i64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_i32gather_pd(offsets: __m256i, slice: *const u8, scale: i32) -> __m512d {
let zero = _mm512_setzero_pd().as_f64x8();
let neg_one = -1;
let slice = slice as *const i8;
let offsets = offsets.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vgatherdpd(zero, slice, offsets, neg_one, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgatherdpd, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i32gather_pd(
src: __m512d,
mask: __mmask8,
offsets: __m256i,
slice: *const u8,
scale: i32,
) -> __m512d {
let src = src.as_f64x8();
let slice = slice as *const i8;
let offsets = offsets.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vgatherdpd(src, slice, offsets, mask as i8, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_i64gather_pd(offsets: __m512i, slice: *const u8, scale: i32) -> __m512d {
let zero = _mm512_setzero_pd().as_f64x8();
let neg_one = -1;
let slice = slice as *const i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vgatherqpd(zero, slice, offsets, neg_one, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgatherqpd, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i64gather_pd(
src: __m512d,
mask: __mmask8,
offsets: __m512i,
slice: *const u8,
scale: i32,
) -> __m512d {
let src = src.as_f64x8();
let slice = slice as *const i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vgatherqpd(src, slice, offsets, mask as i8, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_i64gather_ps(offsets: __m512i, slice: *const u8, scale: i32) -> __m256 {
let zero = _mm256_setzero_ps().as_f32x8();
let neg_one = -1;
let slice = slice as *const i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vgatherqps(zero, slice, offsets, neg_one, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgatherqps, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i64gather_ps(
src: __m256,
mask: __mmask8,
offsets: __m512i,
slice: *const u8,
scale: i32,
) -> __m256 {
let src = src.as_f32x8();
let slice = slice as *const i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vgatherqps(src, slice, offsets, mask as i8, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_i32gather_ps(offsets: __m512i, slice: *const u8, scale: i32) -> __m512 {
let zero = _mm512_setzero_ps().as_f32x16();
let neg_one = -1;
let slice = slice as *const i8;
let offsets = offsets.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vgatherdps(zero, slice, offsets, neg_one, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgatherdps, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i32gather_ps(
src: __m512,
mask: __mmask16,
offsets: __m512i,
slice: *const u8,
scale: i32,
) -> __m512 {
let src = src.as_f32x16();
let slice = slice as *const i8;
let offsets = offsets.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vgatherdps(src, slice, offsets, mask as i16, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_i32gather_epi32(offsets: __m512i, slice: *const u8, scale: i32) -> __m512i {
let zero = _mm512_setzero_si512().as_i32x16();
let neg_one = -1;
let slice = slice as *const i8;
let offsets = offsets.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpgatherdd(zero, slice, offsets, neg_one, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpgatherdd, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i32gather_epi32(
src: __m512i,
mask: __mmask16,
offsets: __m512i,
slice: *const u8,
scale: i32,
) -> __m512i {
let src = src.as_i32x16();
let mask = mask as i16;
let slice = slice as *const i8;
let offsets = offsets.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpgatherdd(src, slice, offsets, mask, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_i32gather_epi64(offsets: __m256i, slice: *const u8, scale: i32) -> __m512i {
let zero = _mm512_setzero_si512().as_i64x8();
let neg_one = -1;
let slice = slice as *const i8;
let offsets = offsets.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vpgatherdq(zero, slice, offsets, neg_one, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpgatherdq, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i32gather_epi64(
src: __m512i,
mask: __mmask8,
offsets: __m256i,
slice: *const u8,
scale: i32,
) -> __m512i {
let src = src.as_i64x8();
let mask = mask as i8;
let slice = slice as *const i8;
let offsets = offsets.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vpgatherdq(src, slice, offsets, mask, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_i64gather_epi64(offsets: __m512i, slice: *const u8, scale: i32) -> __m512i {
let zero = _mm512_setzero_si512().as_i64x8();
let neg_one = -1;
let slice = slice as *const i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpgatherqq(zero, slice, offsets, neg_one, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpgatherqq, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i64gather_epi64(
src: __m512i,
mask: __mmask8,
offsets: __m512i,
slice: *const u8,
scale: i32,
) -> __m512i {
let src = src.as_i64x8();
let mask = mask as i8;
let slice = slice as *const i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpgatherqq(src, slice, offsets, mask, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_i64gather_epi32(offsets: __m512i, slice: *const u8, scale: i32) -> __m256i {
let zeros = _mm256_setzero_si256().as_i32x8();
let neg_one = -1;
let slice = slice as *const i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpgatherqd(zeros, slice, offsets, neg_one, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpgatherqd, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i64gather_epi32(
src: __m256i,
mask: __mmask8,
offsets: __m512i,
slice: *const u8,
scale: i32,
) -> __m256i {
let src = src.as_i32x8();
let mask = mask as i8;
let slice = slice as *const i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpgatherqd(src, slice, offsets, mask, $imm8)
};
}
let r = constify_imm8_gather!(scale, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscatterdpd, scale = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_i32scatter_pd(slice: *mut u8, offsets: __m256i, src: __m512d, scale: i32) {
let src = src.as_f64x8();
let neg_one = -1;
let slice = slice as *mut i8;
let offsets = offsets.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vscatterdpd(slice, neg_one, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscatterdpd, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i32scatter_pd(
slice: *mut u8,
mask: __mmask8,
offsets: __m256i,
src: __m512d,
scale: i32,
) {
let src = src.as_f64x8();
let slice = slice as *mut i8;
let offsets = offsets.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vscatterdpd(slice, mask as i8, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscatterqpd, scale = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_i64scatter_pd(slice: *mut u8, offsets: __m512i, src: __m512d, scale: i32) {
let src = src.as_f64x8();
let neg_one = -1;
let slice = slice as *mut i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vscatterqpd(slice, neg_one, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscatterqpd, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i64scatter_pd(
slice: *mut u8,
mask: __mmask8,
offsets: __m512i,
src: __m512d,
scale: i32,
) {
let src = src.as_f64x8();
let slice = slice as *mut i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vscatterqpd(slice, mask as i8, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscatterdps, scale = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_i32scatter_ps(slice: *mut u8, offsets: __m512i, src: __m512, scale: i32) {
let src = src.as_f32x16();
let neg_one = -1;
let slice = slice as *mut i8;
let offsets = offsets.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vscatterdps(slice, neg_one, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscatterdps, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i32scatter_ps(
slice: *mut u8,
mask: __mmask16,
offsets: __m512i,
src: __m512,
scale: i32,
) {
let src = src.as_f32x16();
let slice = slice as *mut i8;
let offsets = offsets.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vscatterdps(slice, mask as i16, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscatterqps, scale = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_i64scatter_ps(slice: *mut u8, offsets: __m512i, src: __m256, scale: i32) {
let src = src.as_f32x8();
let neg_one = -1;
let slice = slice as *mut i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vscatterqps(slice, neg_one, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscatterqps, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i64scatter_ps(
slice: *mut u8,
mask: __mmask8,
offsets: __m512i,
src: __m256,
scale: i32,
) {
let src = src.as_f32x8();
let slice = slice as *mut i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vscatterqps(slice, mask as i8, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpscatterdq, scale = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_i32scatter_epi64(slice: *mut u8, offsets: __m256i, src: __m512i, scale: i32) {
let src = src.as_i64x8();
let neg_one = -1;
let slice = slice as *mut i8;
let offsets = offsets.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vpscatterdq(slice, neg_one, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpscatterdq, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i32scatter_epi64(
slice: *mut u8,
mask: __mmask8,
offsets: __m256i,
src: __m512i,
scale: i32,
) {
let src = src.as_i64x8();
let mask = mask as i8;
let slice = slice as *mut i8;
let offsets = offsets.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vpscatterdq(slice, mask, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpscatterqq, scale = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_i64scatter_epi64(slice: *mut u8, offsets: __m512i, src: __m512i, scale: i32) {
let src = src.as_i64x8();
let neg_one = -1;
let slice = slice as *mut i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpscatterqq(slice, neg_one, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpscatterqq, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i64scatter_epi64(
slice: *mut u8,
mask: __mmask8,
offsets: __m512i,
src: __m512i,
scale: i32,
) {
let src = src.as_i64x8();
let mask = mask as i8;
let slice = slice as *mut i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpscatterqq(slice, mask, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpscatterdd, scale = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_i32scatter_epi32(slice: *mut u8, offsets: __m512i, src: __m512i, scale: i32) {
let src = src.as_i32x16();
let neg_one = -1;
let slice = slice as *mut i8;
let offsets = offsets.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpscatterdd(slice, neg_one, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpscatterdd, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i32scatter_epi32(
slice: *mut u8,
mask: __mmask16,
offsets: __m512i,
src: __m512i,
scale: i32,
) {
let src = src.as_i32x16();
let mask = mask as i16;
let slice = slice as *mut i8;
let offsets = offsets.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpscatterdd(slice, mask, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpscatterqd, scale = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_i64scatter_epi32(slice: *mut u8, offsets: __m512i, src: __m256i, scale: i32) {
let src = src.as_i32x8();
let neg_one = -1;
let slice = slice as *mut i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpscatterqd(slice, neg_one, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpscatterqd, scale = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_i64scatter_epi32(
slice: *mut u8,
mask: __mmask8,
offsets: __m512i,
src: __m256i,
scale: i32,
) {
let src = src.as_i32x8();
let mask = mask as i8;
let slice = slice as *mut i8;
let offsets = offsets.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpscatterqd(slice, mask, offsets, src, $imm8)
};
}
constify_imm8_gather!(scale, call);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcompressd))]
pub unsafe fn _mm512_mask_compress_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
transmute(vpcompressd(a.as_i32x16(), src.as_i32x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcompressd))]
pub unsafe fn _mm512_maskz_compress_epi32(k: __mmask16, a: __m512i) -> __m512i {
transmute(vpcompressd(
a.as_i32x16(),
_mm512_setzero_si512().as_i32x16(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcompressd))]
pub unsafe fn _mm256_mask_compress_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
transmute(vpcompressd256(a.as_i32x8(), src.as_i32x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcompressd))]
pub unsafe fn _mm256_maskz_compress_epi32(k: __mmask8, a: __m256i) -> __m256i {
transmute(vpcompressd256(
a.as_i32x8(),
_mm256_setzero_si256().as_i32x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcompressd))]
pub unsafe fn _mm_mask_compress_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpcompressd128(a.as_i32x4(), src.as_i32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcompressd))]
pub unsafe fn _mm_maskz_compress_epi32(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpcompressd128(
a.as_i32x4(),
_mm_setzero_si128().as_i32x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcompressq))]
pub unsafe fn _mm512_mask_compress_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
transmute(vpcompressq(a.as_i64x8(), src.as_i64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcompressq))]
pub unsafe fn _mm512_maskz_compress_epi64(k: __mmask8, a: __m512i) -> __m512i {
transmute(vpcompressq(
a.as_i64x8(),
_mm512_setzero_si512().as_i64x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcompressq))]
pub unsafe fn _mm256_mask_compress_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
transmute(vpcompressq256(a.as_i64x4(), src.as_i64x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcompressq))]
pub unsafe fn _mm256_maskz_compress_epi64(k: __mmask8, a: __m256i) -> __m256i {
transmute(vpcompressq256(
a.as_i64x4(),
_mm256_setzero_si256().as_i64x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcompressq))]
pub unsafe fn _mm_mask_compress_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpcompressq128(a.as_i64x2(), src.as_i64x2(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcompressq))]
pub unsafe fn _mm_maskz_compress_epi64(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpcompressq128(
a.as_i64x2(),
_mm_setzero_si128().as_i64x2(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcompressps))]
pub unsafe fn _mm512_mask_compress_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
transmute(vcompressps(a.as_f32x16(), src.as_f32x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcompressps))]
pub unsafe fn _mm512_maskz_compress_ps(k: __mmask16, a: __m512) -> __m512 {
transmute(vcompressps(
a.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcompressps))]
pub unsafe fn _mm256_mask_compress_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
transmute(vcompressps256(a.as_f32x8(), src.as_f32x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcompressps))]
pub unsafe fn _mm256_maskz_compress_ps(k: __mmask8, a: __m256) -> __m256 {
transmute(vcompressps256(
a.as_f32x8(),
_mm256_setzero_ps().as_f32x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcompressps))]
pub unsafe fn _mm_mask_compress_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
transmute(vcompressps128(a.as_f32x4(), src.as_f32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcompressps))]
pub unsafe fn _mm_maskz_compress_ps(k: __mmask8, a: __m128) -> __m128 {
transmute(vcompressps128(a.as_f32x4(), _mm_setzero_ps().as_f32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcompresspd))]
pub unsafe fn _mm512_mask_compress_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
transmute(vcompresspd(a.as_f64x8(), src.as_f64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcompresspd))]
pub unsafe fn _mm512_maskz_compress_pd(k: __mmask8, a: __m512d) -> __m512d {
transmute(vcompresspd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcompresspd))]
pub unsafe fn _mm256_mask_compress_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
transmute(vcompresspd256(a.as_f64x4(), src.as_f64x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcompresspd))]
pub unsafe fn _mm256_maskz_compress_pd(k: __mmask8, a: __m256d) -> __m256d {
transmute(vcompresspd256(
a.as_f64x4(),
_mm256_setzero_pd().as_f64x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcompresspd))]
pub unsafe fn _mm_mask_compress_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
transmute(vcompresspd128(a.as_f64x2(), src.as_f64x2(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vcompresspd))]
pub unsafe fn _mm_maskz_compress_pd(k: __mmask8, a: __m128d) -> __m128d {
transmute(vcompresspd128(a.as_f64x2(), _mm_setzero_pd().as_f64x2(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpexpandd))]
pub unsafe fn _mm512_mask_expand_epi32(src: __m512i, k: __mmask16, a: __m512i) -> __m512i {
transmute(vpexpandd(a.as_i32x16(), src.as_i32x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpexpandd))]
pub unsafe fn _mm512_maskz_expand_epi32(k: __mmask16, a: __m512i) -> __m512i {
transmute(vpexpandd(
a.as_i32x16(),
_mm512_setzero_si512().as_i32x16(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpexpandd))]
pub unsafe fn _mm256_mask_expand_epi32(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
transmute(vpexpandd256(a.as_i32x8(), src.as_i32x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpexpandd))]
pub unsafe fn _mm256_maskz_expand_epi32(k: __mmask8, a: __m256i) -> __m256i {
transmute(vpexpandd256(
a.as_i32x8(),
_mm256_setzero_si256().as_i32x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpexpandd))]
pub unsafe fn _mm_mask_expand_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpexpandd128(a.as_i32x4(), src.as_i32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpexpandd))]
pub unsafe fn _mm_maskz_expand_epi32(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpexpandd128(
a.as_i32x4(),
_mm_setzero_si128().as_i32x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpexpandq))]
pub unsafe fn _mm512_mask_expand_epi64(src: __m512i, k: __mmask8, a: __m512i) -> __m512i {
transmute(vpexpandq(a.as_i64x8(), src.as_i64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpexpandq))]
pub unsafe fn _mm512_maskz_expand_epi64(k: __mmask8, a: __m512i) -> __m512i {
transmute(vpexpandq(
a.as_i64x8(),
_mm512_setzero_si512().as_i64x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpexpandq))]
pub unsafe fn _mm256_mask_expand_epi64(src: __m256i, k: __mmask8, a: __m256i) -> __m256i {
transmute(vpexpandq256(a.as_i64x4(), src.as_i64x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpexpandq))]
pub unsafe fn _mm256_maskz_expand_epi64(k: __mmask8, a: __m256i) -> __m256i {
transmute(vpexpandq256(
a.as_i64x4(),
_mm256_setzero_si256().as_i64x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpexpandq))]
pub unsafe fn _mm_mask_expand_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
transmute(vpexpandq128(a.as_i64x2(), src.as_i64x2(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpexpandq))]
pub unsafe fn _mm_maskz_expand_epi64(k: __mmask8, a: __m128i) -> __m128i {
transmute(vpexpandq128(
a.as_i64x2(),
_mm_setzero_si128().as_i64x2(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vexpandps))]
pub unsafe fn _mm512_mask_expand_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
transmute(vexpandps(a.as_f32x16(), src.as_f32x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vexpandps))]
pub unsafe fn _mm512_maskz_expand_ps(k: __mmask16, a: __m512) -> __m512 {
transmute(vexpandps(a.as_f32x16(), _mm512_setzero_ps().as_f32x16(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vexpandps))]
pub unsafe fn _mm256_mask_expand_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
transmute(vexpandps256(a.as_f32x8(), src.as_f32x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vexpandps))]
pub unsafe fn _mm256_maskz_expand_ps(k: __mmask8, a: __m256) -> __m256 {
transmute(vexpandps256(
a.as_f32x8(),
_mm256_setzero_ps().as_f32x8(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vexpandps))]
pub unsafe fn _mm_mask_expand_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
transmute(vexpandps128(a.as_f32x4(), src.as_f32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vexpandps))]
pub unsafe fn _mm_maskz_expand_ps(k: __mmask8, a: __m128) -> __m128 {
transmute(vexpandps128(a.as_f32x4(), _mm_setzero_ps().as_f32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vexpandpd))]
pub unsafe fn _mm512_mask_expand_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
transmute(vexpandpd(a.as_f64x8(), src.as_f64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vexpandpd))]
pub unsafe fn _mm512_maskz_expand_pd(k: __mmask8, a: __m512d) -> __m512d {
transmute(vexpandpd(a.as_f64x8(), _mm512_setzero_pd().as_f64x8(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vexpandpd))]
pub unsafe fn _mm256_mask_expand_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
transmute(vexpandpd256(a.as_f64x4(), src.as_f64x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vexpandpd))]
pub unsafe fn _mm256_maskz_expand_pd(k: __mmask8, a: __m256d) -> __m256d {
transmute(vexpandpd256(
a.as_f64x4(),
_mm256_setzero_pd().as_f64x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vexpandpd))]
pub unsafe fn _mm_mask_expand_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
transmute(vexpandpd128(a.as_f64x2(), src.as_f64x2(), k))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vexpandpd))]
pub unsafe fn _mm_maskz_expand_pd(k: __mmask8, a: __m128d) -> __m128d {
transmute(vexpandpd128(a.as_f64x2(), _mm_setzero_pd().as_f64x2(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_rol_epi32(a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vprold(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_rol_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vprold(a, $imm8)
};
}
let rol = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_rol_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vprold(a, $imm8)
};
}
let rol = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, rol, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm256_rol_epi32(a: __m256i, imm8: i32) -> __m256i {
let a = a.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vprold256(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_rol_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8: i32) -> __m256i {
let a = a.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vprold256(a, $imm8)
};
}
let rol = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, rol, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_rol_epi32(k: __mmask8, a: __m256i, imm8: i32) -> __m256i {
let a = a.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vprold256(a, $imm8)
};
}
let rol = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, rol, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_rol_epi32(a: __m128i, imm8: i32) -> __m128i {
let a = a.as_i32x4();
macro_rules! call {
($imm8:expr) => {
vprold128(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_mask_rol_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: i32) -> __m128i {
let a = a.as_i32x4();
macro_rules! call {
($imm8:expr) => {
vprold128(a, $imm8)
};
}
let rol = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, rol, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_maskz_rol_epi32(k: __mmask8, a: __m128i, imm8: i32) -> __m128i {
let a = a.as_i32x4();
macro_rules! call {
($imm8:expr) => {
vprold128(a, $imm8)
};
}
let rol = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, rol, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_ror_epi32(a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vprord(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 123))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_ror_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vprord(a, $imm8)
};
}
let ror = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 123))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_ror_epi32(k: __mmask16, a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vprord(a, $imm8)
};
}
let ror = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, ror, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm256_ror_epi32(a: __m256i, imm8: i32) -> __m256i {
let a = a.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vprord256(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 123))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_ror_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8: i32) -> __m256i {
let a = a.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vprord256(a, $imm8)
};
}
let ror = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, ror, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 123))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_ror_epi32(k: __mmask8, a: __m256i, imm8: i32) -> __m256i {
let a = a.as_i32x8();
macro_rules! call {
($imm8:expr) => {
vprord256(a, $imm8)
};
}
let ror = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, ror, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_ror_epi32(a: __m128i, imm8: i32) -> __m128i {
let a = a.as_i32x4();
macro_rules! call {
($imm8:expr) => {
vprord128(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 123))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_mask_ror_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: i32) -> __m128i {
let a = a.as_i32x4();
macro_rules! call {
($imm8:expr) => {
vprord128(a, $imm8)
};
}
let ror = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, ror, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprold, imm8 = 123))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_maskz_ror_epi32(k: __mmask8, a: __m128i, imm8: i32) -> __m128i {
let a = a.as_i32x4();
macro_rules! call {
($imm8:expr) => {
vprord128(a, $imm8)
};
}
let ror = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, ror, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_rol_epi64(a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vprolq(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_rol_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vprolq(a, $imm8)
};
}
let rol = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_rol_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vprolq(a, $imm8)
};
}
let rol = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, rol, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm256_rol_epi64(a: __m256i, imm8: i32) -> __m256i {
let a = a.as_i64x4();
macro_rules! call {
($imm8:expr) => {
vprolq256(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_rol_epi64(src: __m256i, k: __mmask8, a: __m256i, imm8: i32) -> __m256i {
let a = a.as_i64x4();
macro_rules! call {
($imm8:expr) => {
vprolq256(a, $imm8)
};
}
let rol = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, rol, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_rol_epi64(k: __mmask8, a: __m256i, imm8: i32) -> __m256i {
let a = a.as_i64x4();
macro_rules! call {
($imm8:expr) => {
vprolq256(a, $imm8)
};
}
let rol = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, rol, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_rol_epi64(a: __m128i, imm8: i32) -> __m128i {
let a = a.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vprolq128(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_mask_rol_epi64(src: __m128i, k: __mmask8, a: __m128i, imm8: i32) -> __m128i {
let a = a.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vprolq128(a, $imm8)
};
}
let rol = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, rol, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_maskz_rol_epi64(k: __mmask8, a: __m128i, imm8: i32) -> __m128i {
let a = a.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vprolq128(a, $imm8)
};
}
let rol = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, rol, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_ror_epi64(a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vprorq(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_ror_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vprorq(a, $imm8)
};
}
let ror = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_ror_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vprorq(a, $imm8)
};
}
let ror = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, ror, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm256_ror_epi64(a: __m256i, imm8: i32) -> __m256i {
let a = a.as_i64x4();
macro_rules! call {
($imm8:expr) => {
vprorq256(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_ror_epi64(src: __m256i, k: __mmask8, a: __m256i, imm8: i32) -> __m256i {
let a = a.as_i64x4();
macro_rules! call {
($imm8:expr) => {
vprorq256(a, $imm8)
};
}
let ror = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, ror, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_ror_epi64(k: __mmask8, a: __m256i, imm8: i32) -> __m256i {
let a = a.as_i64x4();
macro_rules! call {
($imm8:expr) => {
vprorq256(a, $imm8)
};
}
let ror = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, ror, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_ror_epi64(a: __m128i, imm8: i32) -> __m128i {
let a = a.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vprorq128(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_mask_ror_epi64(src: __m128i, k: __mmask8, a: __m128i, imm8: i32) -> __m128i {
let a = a.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vprorq128(a, $imm8)
};
}
let ror = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, ror, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolq, imm8 = 15))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_maskz_ror_epi64(k: __mmask8, a: __m128i, imm8: i32) -> __m128i {
let a = a.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vprorq128(a, $imm8)
};
}
let ror = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, ror, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_slli_epi32(a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpsllid(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_slli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpsllid(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_slli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpsllid(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_slli_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_slli_epi32(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf.as_i32x8(), src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_slli_epi32(k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_slli_epi32(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, shf.as_i32x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_mask_slli_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_slli_epi32(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf.as_i32x4(), src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpslld, imm8 = 5))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_maskz_slli_epi32(k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_slli_epi32(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, shf.as_i32x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_srli_epi32(a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpsrlid(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_srli_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpsrlid(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_srli_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpsrlid(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_srli_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_srli_epi32(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf.as_i32x8(), src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_srli_epi32(k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_srli_epi32(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, shf.as_i32x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_mask_srli_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_srli_epi32(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf.as_i32x4(), src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrld, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_maskz_srli_epi32(k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_srli_epi32(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, shf.as_i32x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_slli_epi64(a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpslliq(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_slli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpslliq(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_slli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpslliq(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_slli_epi64(src: __m256i, k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_slli_epi64(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf.as_i64x4(), src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_slli_epi64(k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_slli_epi64(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, shf.as_i64x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_mask_slli_epi64(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_slli_epi64(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf.as_i64x2(), src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsllq, imm8 = 5))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_maskz_slli_epi64(k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_slli_epi64(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, shf.as_i64x2(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_srli_epi64(a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpsrliq(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_srli_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpsrliq(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_srli_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpsrliq(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_srli_epi64(src: __m256i, k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_srli_epi64(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf.as_i64x4(), src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_srli_epi64(k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_srli_epi64(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, shf.as_i64x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_mask_srli_epi64(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_srli_epi64(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf.as_i64x2(), src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrlq, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_maskz_srli_epi64(k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_srli_epi64(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, shf.as_i64x2(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpslld))]
pub unsafe fn _mm512_sll_epi32(a: __m512i, count: __m128i) -> __m512i {
transmute(vpslld(a.as_i32x16(), count.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpslld))]
pub unsafe fn _mm512_mask_sll_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
count: __m128i,
) -> __m512i {
let shf = _mm512_sll_epi32(a, count).as_i32x16();
transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpslld))]
pub unsafe fn _mm512_maskz_sll_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
let shf = _mm512_sll_epi32(a, count).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpslld))]
pub unsafe fn _mm256_mask_sll_epi32(
src: __m256i,
k: __mmask8,
a: __m256i,
count: __m128i,
) -> __m256i {
let shf = _mm256_sll_epi32(a, count).as_i32x8();
transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpslld))]
pub unsafe fn _mm256_maskz_sll_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
let shf = _mm256_sll_epi32(a, count).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpslld))]
pub unsafe fn _mm_mask_sll_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
let shf = _mm_sll_epi32(a, count).as_i32x4();
transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpslld))]
pub unsafe fn _mm_maskz_sll_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
let shf = _mm_sll_epi32(a, count).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrld))]
pub unsafe fn _mm512_srl_epi32(a: __m512i, count: __m128i) -> __m512i {
transmute(vpsrld(a.as_i32x16(), count.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrld))]
pub unsafe fn _mm512_mask_srl_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
count: __m128i,
) -> __m512i {
let shf = _mm512_srl_epi32(a, count).as_i32x16();
transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrld))]
pub unsafe fn _mm512_maskz_srl_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
let shf = _mm512_srl_epi32(a, count).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrld))]
pub unsafe fn _mm256_mask_srl_epi32(
src: __m256i,
k: __mmask8,
a: __m256i,
count: __m128i,
) -> __m256i {
let shf = _mm256_srl_epi32(a, count).as_i32x8();
transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrld))]
pub unsafe fn _mm256_maskz_srl_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
let shf = _mm256_srl_epi32(a, count).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrld))]
pub unsafe fn _mm_mask_srl_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
let shf = _mm_srl_epi32(a, count).as_i32x4();
transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrld))]
pub unsafe fn _mm_maskz_srl_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
let shf = _mm_srl_epi32(a, count).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllq))]
pub unsafe fn _mm512_sll_epi64(a: __m512i, count: __m128i) -> __m512i {
transmute(vpsllq(a.as_i64x8(), count.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllq))]
pub unsafe fn _mm512_mask_sll_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
count: __m128i,
) -> __m512i {
let shf = _mm512_sll_epi64(a, count).as_i64x8();
transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllq))]
pub unsafe fn _mm512_maskz_sll_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
let shf = _mm512_sll_epi64(a, count).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsllq))]
pub unsafe fn _mm256_mask_sll_epi64(
src: __m256i,
k: __mmask8,
a: __m256i,
count: __m128i,
) -> __m256i {
let shf = _mm256_sll_epi64(a, count).as_i64x4();
transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsllq))]
pub unsafe fn _mm256_maskz_sll_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
let shf = _mm256_sll_epi64(a, count).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsllq))]
pub unsafe fn _mm_mask_sll_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
let shf = _mm_sll_epi64(a, count).as_i64x2();
transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsllq))]
pub unsafe fn _mm_maskz_sll_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
let shf = _mm_sll_epi64(a, count).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlq))]
pub unsafe fn _mm512_srl_epi64(a: __m512i, count: __m128i) -> __m512i {
transmute(vpsrlq(a.as_i64x8(), count.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlq))]
pub unsafe fn _mm512_mask_srl_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
count: __m128i,
) -> __m512i {
let shf = _mm512_srl_epi64(a, count).as_i64x8();
transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlq))]
pub unsafe fn _mm512_maskz_srl_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
let shf = _mm512_srl_epi64(a, count).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrlq))]
pub unsafe fn _mm256_mask_srl_epi64(
src: __m256i,
k: __mmask8,
a: __m256i,
count: __m128i,
) -> __m256i {
let shf = _mm256_srl_epi64(a, count).as_i64x4();
transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrlq))]
pub unsafe fn _mm256_maskz_srl_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
let shf = _mm256_srl_epi64(a, count).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrlq))]
pub unsafe fn _mm_mask_srl_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
let shf = _mm_srl_epi64(a, count).as_i64x2();
transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrlq))]
pub unsafe fn _mm_maskz_srl_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
let shf = _mm_srl_epi64(a, count).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrad))]
pub unsafe fn _mm512_sra_epi32(a: __m512i, count: __m128i) -> __m512i {
transmute(vpsrad(a.as_i32x16(), count.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrad))]
pub unsafe fn _mm512_mask_sra_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
count: __m128i,
) -> __m512i {
let shf = _mm512_sra_epi32(a, count).as_i32x16();
transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrad))]
pub unsafe fn _mm512_maskz_sra_epi32(k: __mmask16, a: __m512i, count: __m128i) -> __m512i {
let shf = _mm512_sra_epi32(a, count).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrad))]
pub unsafe fn _mm256_mask_sra_epi32(
src: __m256i,
k: __mmask8,
a: __m256i,
count: __m128i,
) -> __m256i {
let shf = _mm256_sra_epi32(a, count).as_i32x8();
transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrad))]
pub unsafe fn _mm256_maskz_sra_epi32(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
let shf = _mm256_sra_epi32(a, count).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrad))]
pub unsafe fn _mm_mask_sra_epi32(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
let shf = _mm_sra_epi32(a, count).as_i32x4();
transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrad))]
pub unsafe fn _mm_maskz_sra_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
let shf = _mm_sra_epi32(a, count).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsraq))]
pub unsafe fn _mm512_sra_epi64(a: __m512i, count: __m128i) -> __m512i {
transmute(vpsraq(a.as_i64x8(), count.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsraq))]
pub unsafe fn _mm512_mask_sra_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
count: __m128i,
) -> __m512i {
let shf = _mm512_sra_epi64(a, count).as_i64x8();
transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsraq))]
pub unsafe fn _mm512_maskz_sra_epi64(k: __mmask8, a: __m512i, count: __m128i) -> __m512i {
let shf = _mm512_sra_epi64(a, count).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsraq))]
pub unsafe fn _mm256_sra_epi64(a: __m256i, count: __m128i) -> __m256i {
transmute(vpsraq256(a.as_i64x4(), count.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsraq))]
pub unsafe fn _mm256_mask_sra_epi64(
src: __m256i,
k: __mmask8,
a: __m256i,
count: __m128i,
) -> __m256i {
let shf = _mm256_sra_epi64(a, count).as_i64x4();
transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsraq))]
pub unsafe fn _mm256_maskz_sra_epi64(k: __mmask8, a: __m256i, count: __m128i) -> __m256i {
let shf = _mm256_sra_epi64(a, count).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsraq))]
pub unsafe fn _mm_sra_epi64(a: __m128i, count: __m128i) -> __m128i {
transmute(vpsraq128(a.as_i64x2(), count.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsraq))]
pub unsafe fn _mm_mask_sra_epi64(src: __m128i, k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
let shf = _mm_sra_epi64(a, count).as_i64x2();
transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsraq))]
pub unsafe fn _mm_maskz_sra_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
let shf = _mm_sra_epi64(a, count).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_srai_epi32(a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpsraid(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_srai_epi32(src: __m512i, k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpsraid(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_srai_epi32(k: __mmask16, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i32x16();
macro_rules! call {
($imm8:expr) => {
vpsraid(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_srai_epi32(src: __m256i, k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_srai_epi32(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf.as_i32x8(), src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_srai_epi32(k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_srai_epi32(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, shf.as_i32x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_mask_srai_epi32(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_srai_epi32(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf.as_i32x4(), src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrad, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_maskz_srai_epi32(k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_srai_epi32(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, shf.as_i32x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_srai_epi64(a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpsraiq(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_srai_epi64(src: __m512i, k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpsraiq(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_srai_epi64(k: __mmask8, a: __m512i, imm8: u32) -> __m512i {
let a = a.as_i64x8();
macro_rules! call {
($imm8:expr) => {
vpsraiq(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm256_srai_epi64(a: __m256i, imm8: u32) -> __m256i {
let a = a.as_i64x4();
macro_rules! call {
($imm8:expr) => {
vpsraiq256(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_srai_epi64(src: __m256i, k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
let a = a.as_i64x4();
macro_rules! call {
($imm8:expr) => {
vpsraiq256(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_srai_epi64(k: __mmask8, a: __m256i, imm8: u32) -> __m256i {
let a = a.as_i64x4();
macro_rules! call {
($imm8:expr) => {
vpsraiq256(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_srai_epi64(a: __m128i, imm8: u32) -> __m128i {
let a = a.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vpsraiq128(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_mask_srai_epi64(src: __m128i, k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
let a = a.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vpsraiq128(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsraq, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_maskz_srai_epi64(k: __mmask8, a: __m128i, imm8: u32) -> __m128i {
let a = a.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vpsraiq128(a, $imm8)
};
}
let shf = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsravd))]
pub unsafe fn _mm512_srav_epi32(a: __m512i, count: __m512i) -> __m512i {
transmute(vpsravd(a.as_i32x16(), count.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsravd))]
pub unsafe fn _mm512_mask_srav_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
count: __m512i,
) -> __m512i {
let shf = _mm512_srav_epi32(a, count).as_i32x16();
transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsravd))]
pub unsafe fn _mm512_maskz_srav_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
let shf = _mm512_srav_epi32(a, count).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsravd))]
pub unsafe fn _mm256_mask_srav_epi32(
src: __m256i,
k: __mmask8,
a: __m256i,
count: __m256i,
) -> __m256i {
let shf = _mm256_srav_epi32(a, count).as_i32x8();
transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsravd))]
pub unsafe fn _mm256_maskz_srav_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
let shf = _mm256_srav_epi32(a, count).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsravd))]
pub unsafe fn _mm_mask_srav_epi32(
src: __m128i,
k: __mmask8,
a: __m128i,
count: __m128i,
) -> __m128i {
let shf = _mm_srav_epi32(a, count).as_i32x4();
transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsravd))]
pub unsafe fn _mm_maskz_srav_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
let shf = _mm_srav_epi32(a, count).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsravq))]
pub unsafe fn _mm512_srav_epi64(a: __m512i, count: __m512i) -> __m512i {
transmute(vpsravq(a.as_i64x8(), count.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsravq))]
pub unsafe fn _mm512_mask_srav_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
count: __m512i,
) -> __m512i {
let shf = _mm512_srav_epi64(a, count).as_i64x8();
transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsravq))]
pub unsafe fn _mm512_maskz_srav_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
let shf = _mm512_srav_epi64(a, count).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsravq))]
pub unsafe fn _mm256_srav_epi64(a: __m256i, count: __m256i) -> __m256i {
transmute(vpsravq256(a.as_i64x4(), count.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsravq))]
pub unsafe fn _mm256_mask_srav_epi64(
src: __m256i,
k: __mmask8,
a: __m256i,
count: __m256i,
) -> __m256i {
let shf = _mm256_srav_epi64(a, count).as_i64x4();
transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsravq))]
pub unsafe fn _mm256_maskz_srav_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
let shf = _mm256_srav_epi64(a, count).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsravq))]
pub unsafe fn _mm_srav_epi64(a: __m128i, count: __m128i) -> __m128i {
transmute(vpsravq128(a.as_i64x2(), count.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsravq))]
pub unsafe fn _mm_mask_srav_epi64(
src: __m128i,
k: __mmask8,
a: __m128i,
count: __m128i,
) -> __m128i {
let shf = _mm_srav_epi64(a, count).as_i64x2();
transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsravq))]
pub unsafe fn _mm_maskz_srav_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
let shf = _mm_srav_epi64(a, count).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolvd))]
pub unsafe fn _mm512_rolv_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(vprolvd(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolvd))]
pub unsafe fn _mm512_mask_rolv_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
) -> __m512i {
let rol = _mm512_rolv_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, rol, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolvd))]
pub unsafe fn _mm512_maskz_rolv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let rol = _mm512_rolv_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, rol, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolvd))]
pub unsafe fn _mm256_rolv_epi32(a: __m256i, b: __m256i) -> __m256i {
transmute(vprolvd256(a.as_i32x8(), b.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolvd))]
pub unsafe fn _mm256_mask_rolv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let rol = _mm256_rolv_epi32(a, b).as_i32x8();
transmute(simd_select_bitmask(k, rol, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolvd))]
pub unsafe fn _mm256_maskz_rolv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let rol = _mm256_rolv_epi32(a, b).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, rol, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolvd))]
pub unsafe fn _mm_rolv_epi32(a: __m128i, b: __m128i) -> __m128i {
transmute(vprolvd128(a.as_i32x4(), b.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolvd))]
pub unsafe fn _mm_mask_rolv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let rol = _mm_rolv_epi32(a, b).as_i32x4();
transmute(simd_select_bitmask(k, rol, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolvd))]
pub unsafe fn _mm_maskz_rolv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let rol = _mm_rolv_epi32(a, b).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, rol, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprorvd))]
pub unsafe fn _mm512_rorv_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(vprorvd(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprorvd))]
pub unsafe fn _mm512_mask_rorv_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
) -> __m512i {
let ror = _mm512_rorv_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, ror, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprorvd))]
pub unsafe fn _mm512_maskz_rorv_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let ror = _mm512_rorv_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, ror, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprorvd))]
pub unsafe fn _mm256_rorv_epi32(a: __m256i, b: __m256i) -> __m256i {
transmute(vprorvd256(a.as_i32x8(), b.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprorvd))]
pub unsafe fn _mm256_mask_rorv_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let ror = _mm256_rorv_epi32(a, b).as_i32x8();
transmute(simd_select_bitmask(k, ror, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprorvd))]
pub unsafe fn _mm256_maskz_rorv_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let ror = _mm256_rorv_epi32(a, b).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, ror, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprorvd))]
pub unsafe fn _mm_rorv_epi32(a: __m128i, b: __m128i) -> __m128i {
transmute(vprorvd128(a.as_i32x4(), b.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprorvd))]
pub unsafe fn _mm_mask_rorv_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let ror = _mm_rorv_epi32(a, b).as_i32x4();
transmute(simd_select_bitmask(k, ror, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprorvd))]
pub unsafe fn _mm_maskz_rorv_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let ror = _mm_rorv_epi32(a, b).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, ror, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolvq))]
pub unsafe fn _mm512_rolv_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(vprolvq(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolvq))]
pub unsafe fn _mm512_mask_rolv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let rol = _mm512_rolv_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, rol, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprolvq))]
pub unsafe fn _mm512_maskz_rolv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let rol = _mm512_rolv_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, rol, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolvq))]
pub unsafe fn _mm256_rolv_epi64(a: __m256i, b: __m256i) -> __m256i {
transmute(vprolvq256(a.as_i64x4(), b.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolvq))]
pub unsafe fn _mm256_mask_rolv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let rol = _mm256_rolv_epi64(a, b).as_i64x4();
transmute(simd_select_bitmask(k, rol, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolvq))]
pub unsafe fn _mm256_maskz_rolv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let rol = _mm256_rolv_epi64(a, b).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, rol, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolvq))]
pub unsafe fn _mm_rolv_epi64(a: __m128i, b: __m128i) -> __m128i {
transmute(vprolvq128(a.as_i64x2(), b.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolvq))]
pub unsafe fn _mm_mask_rolv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let rol = _mm_rolv_epi64(a, b).as_i64x2();
transmute(simd_select_bitmask(k, rol, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprolvq))]
pub unsafe fn _mm_maskz_rolv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let rol = _mm_rolv_epi64(a, b).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, rol, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprorvq))]
pub unsafe fn _mm512_rorv_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(vprorvq(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprorvq))]
pub unsafe fn _mm512_mask_rorv_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let ror = _mm512_rorv_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, ror, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vprorvq))]
pub unsafe fn _mm512_maskz_rorv_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let ror = _mm512_rorv_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, ror, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprorvq))]
pub unsafe fn _mm256_rorv_epi64(a: __m256i, b: __m256i) -> __m256i {
transmute(vprorvq256(a.as_i64x4(), b.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprorvq))]
pub unsafe fn _mm256_mask_rorv_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let ror = _mm256_rorv_epi64(a, b).as_i64x4();
transmute(simd_select_bitmask(k, ror, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprorvq))]
pub unsafe fn _mm256_maskz_rorv_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let ror = _mm256_rorv_epi64(a, b).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, ror, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprorvq))]
pub unsafe fn _mm_rorv_epi64(a: __m128i, b: __m128i) -> __m128i {
transmute(vprorvq128(a.as_i64x2(), b.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprorvq))]
pub unsafe fn _mm_mask_rorv_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let ror = _mm_rorv_epi64(a, b).as_i64x2();
transmute(simd_select_bitmask(k, ror, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vprorvq))]
pub unsafe fn _mm_maskz_rorv_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let ror = _mm_rorv_epi64(a, b).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, ror, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllvd))]
pub unsafe fn _mm512_sllv_epi32(a: __m512i, count: __m512i) -> __m512i {
transmute(vpsllvd(a.as_i32x16(), count.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllvd))]
pub unsafe fn _mm512_mask_sllv_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
count: __m512i,
) -> __m512i {
let shf = _mm512_sllv_epi32(a, count).as_i32x16();
transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllvd))]
pub unsafe fn _mm512_maskz_sllv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
let shf = _mm512_sllv_epi32(a, count).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsllvd))]
pub unsafe fn _mm256_mask_sllv_epi32(
src: __m256i,
k: __mmask8,
a: __m256i,
count: __m256i,
) -> __m256i {
let shf = _mm256_sllv_epi32(a, count).as_i32x8();
transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsllvd))]
pub unsafe fn _mm256_maskz_sllv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
let shf = _mm256_sllv_epi32(a, count).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsllvd))]
pub unsafe fn _mm_mask_sllv_epi32(
src: __m128i,
k: __mmask8,
a: __m128i,
count: __m128i,
) -> __m128i {
let shf = _mm_sllv_epi32(a, count).as_i32x4();
transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsllvd))]
pub unsafe fn _mm_maskz_sllv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
let shf = _mm_sllv_epi32(a, count).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlvd))]
pub unsafe fn _mm512_srlv_epi32(a: __m512i, count: __m512i) -> __m512i {
transmute(vpsrlvd(a.as_i32x16(), count.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlvd))]
pub unsafe fn _mm512_mask_srlv_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
count: __m512i,
) -> __m512i {
let shf = _mm512_srlv_epi32(a, count).as_i32x16();
transmute(simd_select_bitmask(k, shf, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlvd))]
pub unsafe fn _mm512_maskz_srlv_epi32(k: __mmask16, a: __m512i, count: __m512i) -> __m512i {
let shf = _mm512_srlv_epi32(a, count).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrlvd))]
pub unsafe fn _mm256_mask_srlv_epi32(
src: __m256i,
k: __mmask8,
a: __m256i,
count: __m256i,
) -> __m256i {
let shf = _mm256_srlv_epi32(a, count).as_i32x8();
transmute(simd_select_bitmask(k, shf, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrlvd))]
pub unsafe fn _mm256_maskz_srlv_epi32(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
let shf = _mm256_srlv_epi32(a, count).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrlvd))]
pub unsafe fn _mm_mask_srlv_epi32(
src: __m128i,
k: __mmask8,
a: __m128i,
count: __m128i,
) -> __m128i {
let shf = _mm_srlv_epi32(a, count).as_i32x4();
transmute(simd_select_bitmask(k, shf, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrlvd))]
pub unsafe fn _mm_maskz_srlv_epi32(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
let shf = _mm_srlv_epi32(a, count).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllvq))]
pub unsafe fn _mm512_sllv_epi64(a: __m512i, count: __m512i) -> __m512i {
transmute(vpsllvq(a.as_i64x8(), count.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllvq))]
pub unsafe fn _mm512_mask_sllv_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
count: __m512i,
) -> __m512i {
let shf = _mm512_sllv_epi64(a, count).as_i64x8();
transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsllvq))]
pub unsafe fn _mm512_maskz_sllv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
let shf = _mm512_sllv_epi64(a, count).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsllvq))]
pub unsafe fn _mm256_mask_sllv_epi64(
src: __m256i,
k: __mmask8,
a: __m256i,
count: __m256i,
) -> __m256i {
let shf = _mm256_sllv_epi64(a, count).as_i64x4();
transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsllvq))]
pub unsafe fn _mm256_maskz_sllv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
let shf = _mm256_sllv_epi64(a, count).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsllvq))]
pub unsafe fn _mm_mask_sllv_epi64(
src: __m128i,
k: __mmask8,
a: __m128i,
count: __m128i,
) -> __m128i {
let shf = _mm_sllv_epi64(a, count).as_i64x2();
transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsllvq))]
pub unsafe fn _mm_maskz_sllv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
let shf = _mm_sllv_epi64(a, count).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlvq))]
pub unsafe fn _mm512_srlv_epi64(a: __m512i, count: __m512i) -> __m512i {
transmute(vpsrlvq(a.as_i64x8(), count.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlvq))]
pub unsafe fn _mm512_mask_srlv_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
count: __m512i,
) -> __m512i {
let shf = _mm512_srlv_epi64(a, count).as_i64x8();
transmute(simd_select_bitmask(k, shf, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpsrlvq))]
pub unsafe fn _mm512_maskz_srlv_epi64(k: __mmask8, a: __m512i, count: __m512i) -> __m512i {
let shf = _mm512_srlv_epi64(a, count).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrlvq))]
pub unsafe fn _mm256_mask_srlv_epi64(
src: __m256i,
k: __mmask8,
a: __m256i,
count: __m256i,
) -> __m256i {
let shf = _mm256_srlv_epi64(a, count).as_i64x4();
transmute(simd_select_bitmask(k, shf, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrlvq))]
pub unsafe fn _mm256_maskz_srlv_epi64(k: __mmask8, a: __m256i, count: __m256i) -> __m256i {
let shf = _mm256_srlv_epi64(a, count).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrlvq))]
pub unsafe fn _mm_mask_srlv_epi64(
src: __m128i,
k: __mmask8,
a: __m128i,
count: __m128i,
) -> __m128i {
let shf = _mm_srlv_epi64(a, count).as_i64x2();
transmute(simd_select_bitmask(k, shf, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpsrlvq))]
pub unsafe fn _mm_maskz_srlv_epi64(k: __mmask8, a: __m128i, count: __m128i) -> __m128i {
let shf = _mm_srlv_epi64(a, count).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, shf, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_permute_ps(a: __m512, imm8: i32) -> __m512 {
let imm8 = (imm8 & 0xFF) as u8;
let undefined = _mm512_undefined_ps();
macro_rules! shuffle4 {
($a:expr, $b:expr, $c:expr, $d:expr) => {
simd_shuffle16(
a,
undefined,
[
$a,
$b,
$c,
$d,
$a + 4,
$b + 4,
$c + 4,
$d + 4,
$a + 8,
$b + 8,
$c + 8,
$d + 8,
$a + 12,
$b + 12,
$c + 12,
$d + 12,
],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr) => {
match (imm8 >> 6) & 0b11 {
0b00 => shuffle4!($a, $b, $c, 0),
0b01 => shuffle4!($a, $b, $c, 1),
0b10 => shuffle4!($a, $b, $c, 2),
_ => shuffle4!($a, $b, $c, 3),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr) => {
match (imm8 >> 4) & 0b11 {
0b00 => shuffle3!($a, $b, 0),
0b01 => shuffle3!($a, $b, 1),
0b10 => shuffle3!($a, $b, 2),
_ => shuffle3!($a, $b, 3),
}
};
}
macro_rules! shuffle1 {
($a:expr) => {
match (imm8 >> 2) & 0b11 {
0b00 => shuffle2!($a, 0),
0b01 => shuffle2!($a, 1),
0b10 => shuffle2!($a, 2),
_ => shuffle2!($a, 3),
}
};
}
match imm8 & 0b11 {
0b00 => shuffle1!(0),
0b01 => shuffle1!(1),
0b10 => shuffle1!(2),
_ => shuffle1!(3),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_permute_ps(src: __m512, k: __mmask16, a: __m512, imm8: i32) -> __m512 {
macro_rules! call {
($imm8:expr) => {
_mm512_permute_ps(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_permute_ps(k: __mmask16, a: __m512, imm8: i32) -> __m512 {
macro_rules! call {
($imm8:expr) => {
_mm512_permute_ps(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, r.as_f32x16(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_permute_ps(src: __m256, k: __mmask8, a: __m256, imm8: i32) -> __m256 {
macro_rules! call {
($imm8:expr) => {
_mm256_permute_ps(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_permute_ps(k: __mmask8, a: __m256, imm8: i32) -> __m256 {
macro_rules! call {
($imm8:expr) => {
_mm256_permute_ps(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, r.as_f32x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_mask_permute_ps(src: __m128, k: __mmask8, a: __m128, imm8: i32) -> __m128 {
macro_rules! call {
($imm8:expr) => {
_mm_permute_ps(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_maskz_permute_ps(k: __mmask8, a: __m128, imm8: i32) -> __m128 {
macro_rules! call {
($imm8:expr) => {
_mm_permute_ps(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, r.as_f32x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 2))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_permute_pd(a: __m512d, imm8: i32) -> __m512d {
let imm8 = (imm8 & 0xFF) as u8;
let undefined = _mm512_undefined_pd();
macro_rules! shuffle4 {
($a:expr, $b:expr, $c:expr, $d:expr) => {
simd_shuffle8(
a,
undefined,
[$a, $b, $c, $d, $a + 4, $b + 4, $c + 4, $d + 4],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr) => {
match (imm8 >> 3) & 0x1 {
0 => shuffle4!($a, $b, $c, 2),
_ => shuffle4!($a, $b, $c, 3),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr) => {
match (imm8 >> 2) & 0x1 {
0 => shuffle3!($a, $b, 2),
_ => shuffle3!($a, $b, 3),
}
};
}
macro_rules! shuffle1 {
($a:expr) => {
match (imm8 >> 1) & 0x1 {
0 => shuffle2!($a, 0),
_ => shuffle2!($a, 1),
}
};
}
match imm8 & 0x1 {
0 => shuffle1!(0),
_ => shuffle1!(1),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 2))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_permute_pd(src: __m512d, k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
macro_rules! call {
($imm8:expr) => {
_mm512_permute_pd(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 2))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_permute_pd(k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
macro_rules! call {
($imm8:expr) => {
_mm512_permute_pd(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, r.as_f64x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 2))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_permute_pd(src: __m256d, k: __mmask8, a: __m256d, imm8: i32) -> __m256d {
macro_rules! call {
($imm8:expr) => {
_mm256_permute_pd(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 2))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_permute_pd(k: __mmask8, a: __m256d, imm8: i32) -> __m256d {
macro_rules! call {
($imm8:expr) => {
_mm256_permute_pd(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, r.as_f64x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0b01))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_mask_permute_pd(src: __m128d, k: __mmask8, a: __m128d, imm8: i32) -> __m128d {
macro_rules! call {
($imm8:expr) => {
_mm_permute_pd(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermilpd, imm8 = 0b01))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_maskz_permute_pd(k: __mmask8, a: __m128d, imm8: i32) -> __m128d {
macro_rules! call {
($imm8:expr) => {
_mm_permute_pd(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, r.as_f64x2(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm, imm8 = 0b10011011))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_permutex_epi64(a: __m512i, imm8: i32) -> __m512i {
let imm8 = (imm8 & 0xFF) as u8;
let undefined = _mm512_set1_epi64(0);
macro_rules! shuffle4 {
($a:expr, $b:expr, $c:expr, $d:expr) => {
simd_shuffle8(
a,
undefined,
[$a, $b, $c, $d, $a + 4, $b + 4, $c + 4, $d + 4],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr) => {
match (imm8 >> 6) & 0b11 {
0b00 => shuffle4!($a, $b, $c, 0),
0b01 => shuffle4!($a, $b, $c, 1),
0b10 => shuffle4!($a, $b, $c, 2),
_ => shuffle4!($a, $b, $c, 3),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr) => {
match (imm8 >> 4) & 0b11 {
0b00 => shuffle3!($a, $b, 0),
0b01 => shuffle3!($a, $b, 1),
0b10 => shuffle3!($a, $b, 2),
_ => shuffle3!($a, $b, 3),
}
};
}
macro_rules! shuffle1 {
($a:expr) => {
match (imm8 >> 2) & 0b11 {
0b00 => shuffle2!($a, 0),
0b01 => shuffle2!($a, 1),
0b10 => shuffle2!($a, 2),
_ => shuffle2!($a, 3),
}
};
}
match imm8 & 0b11 {
0b00 => shuffle1!(0),
0b01 => shuffle1!(1),
0b10 => shuffle1!(2),
_ => shuffle1!(3),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm, imm8 = 0b11111111))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_permutex_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
imm8: i32,
) -> __m512i {
macro_rules! call {
($imm8:expr) => {
_mm512_permutex_epi64(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm, imm8 = 0b11111111))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_permutex_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m512i {
macro_rules! call {
($imm8:expr) => {
_mm512_permutex_epi64(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, r.as_i64x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm, imm8 = 0b10011011))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm256_permutex_epi64(a: __m256i, imm8: i32) -> __m256i {
let imm8 = (imm8 & 0xFF) as u8;
let undefined = _mm256_set1_epi64x(0);
macro_rules! shuffle4 {
($a:expr, $b:expr, $c:expr, $d:expr) => {
simd_shuffle4(a, undefined, [$a, $b, $c, $d])
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr) => {
match (imm8 >> 6) & 0b11 {
0b00 => shuffle4!($a, $b, $c, 0),
0b01 => shuffle4!($a, $b, $c, 1),
0b10 => shuffle4!($a, $b, $c, 2),
_ => shuffle4!($a, $b, $c, 3),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr) => {
match (imm8 >> 4) & 0b11 {
0b00 => shuffle3!($a, $b, 0),
0b01 => shuffle3!($a, $b, 1),
0b10 => shuffle3!($a, $b, 2),
_ => shuffle3!($a, $b, 3),
}
};
}
macro_rules! shuffle1 {
($a:expr) => {
match (imm8 >> 2) & 0b11 {
0b00 => shuffle2!($a, 0),
0b01 => shuffle2!($a, 1),
0b10 => shuffle2!($a, 2),
_ => shuffle2!($a, 3),
}
};
}
match imm8 & 0b11 {
0b00 => shuffle1!(0),
0b01 => shuffle1!(1),
0b10 => shuffle1!(2),
_ => shuffle1!(3),
}
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm, imm8 = 0b10011011))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_permutex_epi64(
src: __m256i,
k: __mmask8,
a: __m256i,
imm8: i32,
) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_permutex_epi64(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm, imm8 = 0b10011011))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_permutex_epi64(k: __mmask8, a: __m256i, imm8: i32) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_permutex_epi64(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, r.as_i64x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm, imm8 = 0b11111111))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_permutex_pd(a: __m512d, imm8: i32) -> __m512d {
let imm8 = (imm8 & 0xFF) as u8;
let undefined = _mm512_undefined_pd();
macro_rules! shuffle4 {
($a:expr, $b:expr, $c:expr, $d:expr) => {
simd_shuffle8(
a,
undefined,
[$a, $b, $c, $d, $a + 4, $b + 4, $c + 4, $d + 4],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr) => {
match (imm8 >> 6) & 0b11 {
0b00 => shuffle4!($a, $b, $c, 0),
0b01 => shuffle4!($a, $b, $c, 1),
0b10 => shuffle4!($a, $b, $c, 2),
_ => shuffle4!($a, $b, $c, 3),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr) => {
match (imm8 >> 4) & 0b11 {
0b00 => shuffle3!($a, $b, 0),
0b01 => shuffle3!($a, $b, 1),
0b10 => shuffle3!($a, $b, 2),
_ => shuffle3!($a, $b, 3),
}
};
}
macro_rules! shuffle1 {
($a:expr) => {
match (imm8 >> 2) & 0b11 {
0b00 => shuffle2!($a, 0),
0b01 => shuffle2!($a, 1),
0b10 => shuffle2!($a, 2),
_ => shuffle2!($a, 3),
}
};
}
match imm8 & 0b11 {
0b00 => shuffle1!(0),
0b01 => shuffle1!(1),
0b10 => shuffle1!(2),
_ => shuffle1!(3),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm, imm8 = 0b11111111))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_permutex_pd(src: __m512d, k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
macro_rules! call {
($imm8:expr) => {
_mm512_permutex_pd(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm, imm8 = 0b11111111))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_permutex_pd(k: __mmask8, a: __m512d, imm8: i32) -> __m512d {
macro_rules! call {
($imm8:expr) => {
_mm512_permutex_pd(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, r.as_f64x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm, imm8 = 0b10011011))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm256_permutex_pd(a: __m256d, imm8: i32) -> __m256d {
let imm8 = (imm8 & 0xFF) as u8;
let undefined = _mm256_undefined_pd();
macro_rules! shuffle4 {
($a:expr, $b:expr, $c:expr, $d:expr) => {
simd_shuffle4(a, undefined, [$a, $b, $c, $d])
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr) => {
match (imm8 >> 6) & 0b11 {
0b00 => shuffle4!($a, $b, $c, 0),
0b01 => shuffle4!($a, $b, $c, 1),
0b10 => shuffle4!($a, $b, $c, 2),
_ => shuffle4!($a, $b, $c, 3),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr) => {
match (imm8 >> 4) & 0b11 {
0b00 => shuffle3!($a, $b, 0),
0b01 => shuffle3!($a, $b, 1),
0b10 => shuffle3!($a, $b, 2),
_ => shuffle3!($a, $b, 3),
}
};
}
macro_rules! shuffle1 {
($a:expr) => {
match (imm8 >> 2) & 0b11 {
0b00 => shuffle2!($a, 0),
0b01 => shuffle2!($a, 1),
0b10 => shuffle2!($a, 2),
_ => shuffle2!($a, 3),
}
};
}
match imm8 & 0b11 {
0b00 => shuffle1!(0),
0b01 => shuffle1!(1),
0b10 => shuffle1!(2),
_ => shuffle1!(3),
}
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm, imm8 = 0b10011011))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_permutex_pd(src: __m256d, k: __mmask8, a: __m256d, imm8: i32) -> __m256d {
macro_rules! call {
($imm8:expr) => {
_mm256_permutex_pd(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm, imm8 = 0b10011011))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_permutex_pd(k: __mmask8, a: __m256d, imm8: i32) -> __m256d {
macro_rules! call {
($imm8:expr) => {
_mm256_permutex_pd(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, r.as_f64x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_permutevar_epi32(idx: __m512i, a: __m512i) -> __m512i {
transmute(vpermd(a.as_i32x16(), idx.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermd))]
pub unsafe fn _mm512_mask_permutevar_epi32(
src: __m512i,
k: __mmask16,
idx: __m512i,
a: __m512i,
) -> __m512i {
let permute = _mm512_permutevar_epi32(idx, a).as_i32x16();
transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilps))]
pub unsafe fn _mm512_permutevar_ps(a: __m512, b: __m512i) -> __m512 {
transmute(vpermilps(a.as_f32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilps))]
pub unsafe fn _mm512_mask_permutevar_ps(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512i,
) -> __m512 {
let permute = _mm512_permutevar_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilps))]
pub unsafe fn _mm512_maskz_permutevar_ps(k: __mmask16, a: __m512, b: __m512i) -> __m512 {
let permute = _mm512_permutevar_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermilps))]
pub unsafe fn _mm256_mask_permutevar_ps(src: __m256, k: __mmask8, a: __m256, b: __m256i) -> __m256 {
let permute = _mm256_permutevar_ps(a, b).as_f32x8();
transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermilps))]
pub unsafe fn _mm256_maskz_permutevar_ps(k: __mmask8, a: __m256, b: __m256i) -> __m256 {
let permute = _mm256_permutevar_ps(a, b).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermilps))]
pub unsafe fn _mm_mask_permutevar_ps(src: __m128, k: __mmask8, a: __m128, b: __m128i) -> __m128 {
let permute = _mm_permutevar_ps(a, b).as_f32x4();
transmute(simd_select_bitmask(k, permute, src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermilps))]
pub unsafe fn _mm_maskz_permutevar_ps(k: __mmask8, a: __m128, b: __m128i) -> __m128 {
let permute = _mm_permutevar_ps(a, b).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilpd))]
pub unsafe fn _mm512_permutevar_pd(a: __m512d, b: __m512i) -> __m512d {
transmute(vpermilpd(a.as_f64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilpd))]
pub unsafe fn _mm512_mask_permutevar_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512i,
) -> __m512d {
let permute = _mm512_permutevar_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilpd))]
pub unsafe fn _mm512_maskz_permutevar_pd(k: __mmask8, a: __m512d, b: __m512i) -> __m512d {
let permute = _mm512_permutevar_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermilpd))]
pub unsafe fn _mm256_mask_permutevar_pd(
src: __m256d,
k: __mmask8,
a: __m256d,
b: __m256i,
) -> __m256d {
let permute = _mm256_permutevar_pd(a, b).as_f64x4();
transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermilpd))]
pub unsafe fn _mm256_maskz_permutevar_pd(k: __mmask8, a: __m256d, b: __m256i) -> __m256d {
let permute = _mm256_permutevar_pd(a, b).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermilpd))]
pub unsafe fn _mm_mask_permutevar_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
let permute = _mm_permutevar_pd(a, b).as_f64x2();
transmute(simd_select_bitmask(k, permute, src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermilpd))]
pub unsafe fn _mm_maskz_permutevar_pd(k: __mmask8, a: __m128d, b: __m128i) -> __m128d {
let permute = _mm_permutevar_pd(a, b).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_permutexvar_epi32(idx: __m512i, a: __m512i) -> __m512i {
transmute(vpermd(a.as_i32x16(), idx.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermd))]
pub unsafe fn _mm512_mask_permutexvar_epi32(
src: __m512i,
k: __mmask16,
idx: __m512i,
a: __m512i,
) -> __m512i {
let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
transmute(simd_select_bitmask(k, permute, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermd))]
pub unsafe fn _mm512_maskz_permutexvar_epi32(k: __mmask16, idx: __m512i, a: __m512i) -> __m512i {
let permute = _mm512_permutexvar_epi32(idx, a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm256_permutexvar_epi32(idx: __m256i, a: __m256i) -> __m256i {
transmute(_mm256_permutevar8x32_epi32(a, idx))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermd))]
pub unsafe fn _mm256_mask_permutexvar_epi32(
src: __m256i,
k: __mmask8,
idx: __m256i,
a: __m256i,
) -> __m256i {
let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
transmute(simd_select_bitmask(k, permute, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermd))]
pub unsafe fn _mm256_maskz_permutexvar_epi32(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
let permute = _mm256_permutexvar_epi32(idx, a).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_permutexvar_epi64(idx: __m512i, a: __m512i) -> __m512i {
transmute(vpermq(a.as_i64x8(), idx.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermq))]
pub unsafe fn _mm512_mask_permutexvar_epi64(
src: __m512i,
k: __mmask8,
idx: __m512i,
a: __m512i,
) -> __m512i {
let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
transmute(simd_select_bitmask(k, permute, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermq))]
pub unsafe fn _mm512_maskz_permutexvar_epi64(k: __mmask8, idx: __m512i, a: __m512i) -> __m512i {
let permute = _mm512_permutexvar_epi64(idx, a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm256_permutexvar_epi64(idx: __m256i, a: __m256i) -> __m256i {
transmute(vpermq256(a.as_i64x4(), idx.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermq))]
pub unsafe fn _mm256_mask_permutexvar_epi64(
src: __m256i,
k: __mmask8,
idx: __m256i,
a: __m256i,
) -> __m256i {
let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
transmute(simd_select_bitmask(k, permute, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermq))]
pub unsafe fn _mm256_maskz_permutexvar_epi64(k: __mmask8, idx: __m256i, a: __m256i) -> __m256i {
let permute = _mm256_permutexvar_epi64(idx, a).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermps))]
pub unsafe fn _mm512_permutexvar_ps(idx: __m512i, a: __m512) -> __m512 {
transmute(vpermps(a.as_f32x16(), idx.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermps))]
pub unsafe fn _mm512_mask_permutexvar_ps(
src: __m512,
k: __mmask16,
idx: __m512i,
a: __m512,
) -> __m512 {
let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
transmute(simd_select_bitmask(k, permute, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermps))]
pub unsafe fn _mm512_maskz_permutexvar_ps(k: __mmask16, idx: __m512i, a: __m512) -> __m512 {
let permute = _mm512_permutexvar_ps(idx, a).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermps))]
pub unsafe fn _mm256_permutexvar_ps(idx: __m256i, a: __m256) -> __m256 {
transmute(_mm256_permutevar8x32_ps(a, idx))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermps))]
pub unsafe fn _mm256_mask_permutexvar_ps(
src: __m256,
k: __mmask8,
idx: __m256i,
a: __m256,
) -> __m256 {
let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
transmute(simd_select_bitmask(k, permute, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermps))]
pub unsafe fn _mm256_maskz_permutexvar_ps(k: __mmask8, idx: __m256i, a: __m256) -> __m256 {
let permute = _mm256_permutexvar_ps(idx, a).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermpd))]
pub unsafe fn _mm512_permutexvar_pd(idx: __m512i, a: __m512d) -> __m512d {
transmute(vpermpd(a.as_f64x8(), idx.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermpd))]
pub unsafe fn _mm512_mask_permutexvar_pd(
src: __m512d,
k: __mmask8,
idx: __m512i,
a: __m512d,
) -> __m512d {
let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
transmute(simd_select_bitmask(k, permute, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermpd))]
pub unsafe fn _mm512_maskz_permutexvar_pd(k: __mmask8, idx: __m512i, a: __m512d) -> __m512d {
let permute = _mm512_permutexvar_pd(idx, a).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermpd))]
pub unsafe fn _mm256_permutexvar_pd(idx: __m256i, a: __m256d) -> __m256d {
transmute(vpermpd256(a.as_f64x4(), idx.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermpd))]
pub unsafe fn _mm256_mask_permutexvar_pd(
src: __m256d,
k: __mmask8,
idx: __m256i,
a: __m256d,
) -> __m256d {
let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
transmute(simd_select_bitmask(k, permute, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermpd))]
pub unsafe fn _mm256_maskz_permutexvar_pd(k: __mmask8, idx: __m256i, a: __m256d) -> __m256d {
let permute = _mm256_permutexvar_pd(idx, a).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_permutex2var_epi32(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
transmute(vpermi2d(a.as_i32x16(), idx.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermt2d))]
pub unsafe fn _mm512_mask_permutex2var_epi32(
a: __m512i,
k: __mmask16,
idx: __m512i,
b: __m512i,
) -> __m512i {
let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
transmute(simd_select_bitmask(k, permute, a.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_maskz_permutex2var_epi32(
k: __mmask16,
a: __m512i,
idx: __m512i,
b: __m512i,
) -> __m512i {
let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermi2d))]
pub unsafe fn _mm512_mask2_permutex2var_epi32(
a: __m512i,
idx: __m512i,
k: __mmask16,
b: __m512i,
) -> __m512i {
let permute = _mm512_permutex2var_epi32(a, idx, b).as_i32x16();
transmute(simd_select_bitmask(k, permute, idx.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm256_permutex2var_epi32(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
transmute(vpermi2d256(a.as_i32x8(), idx.as_i32x8(), b.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermt2d))]
pub unsafe fn _mm256_mask_permutex2var_epi32(
a: __m256i,
k: __mmask8,
idx: __m256i,
b: __m256i,
) -> __m256i {
let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
transmute(simd_select_bitmask(k, permute, a.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm256_maskz_permutex2var_epi32(
k: __mmask8,
a: __m256i,
idx: __m256i,
b: __m256i,
) -> __m256i {
let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermi2d))]
pub unsafe fn _mm256_mask2_permutex2var_epi32(
a: __m256i,
idx: __m256i,
k: __mmask8,
b: __m256i,
) -> __m256i {
let permute = _mm256_permutex2var_epi32(a, idx, b).as_i32x8();
transmute(simd_select_bitmask(k, permute, idx.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm_permutex2var_epi32(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
transmute(vpermi2d128(a.as_i32x4(), idx.as_i32x4(), b.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermt2d))]
pub unsafe fn _mm_mask_permutex2var_epi32(
a: __m128i,
k: __mmask8,
idx: __m128i,
b: __m128i,
) -> __m128i {
let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
transmute(simd_select_bitmask(k, permute, a.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm_maskz_permutex2var_epi32(
k: __mmask8,
a: __m128i,
idx: __m128i,
b: __m128i,
) -> __m128i {
let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermi2d))]
pub unsafe fn _mm_mask2_permutex2var_epi32(
a: __m128i,
idx: __m128i,
k: __mmask8,
b: __m128i,
) -> __m128i {
let permute = _mm_permutex2var_epi32(a, idx, b).as_i32x4();
transmute(simd_select_bitmask(k, permute, idx.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_permutex2var_epi64(a: __m512i, idx: __m512i, b: __m512i) -> __m512i {
transmute(vpermi2q(a.as_i64x8(), idx.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermt2q))]
pub unsafe fn _mm512_mask_permutex2var_epi64(
a: __m512i,
k: __mmask8,
idx: __m512i,
b: __m512i,
) -> __m512i {
let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
transmute(simd_select_bitmask(k, permute, a.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_maskz_permutex2var_epi64(
k: __mmask8,
a: __m512i,
idx: __m512i,
b: __m512i,
) -> __m512i {
let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermi2q))]
pub unsafe fn _mm512_mask2_permutex2var_epi64(
a: __m512i,
idx: __m512i,
k: __mmask8,
b: __m512i,
) -> __m512i {
let permute = _mm512_permutex2var_epi64(a, idx, b).as_i64x8();
transmute(simd_select_bitmask(k, permute, idx.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm256_permutex2var_epi64(a: __m256i, idx: __m256i, b: __m256i) -> __m256i {
transmute(vpermi2q256(a.as_i64x4(), idx.as_i64x4(), b.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermt2q))]
pub unsafe fn _mm256_mask_permutex2var_epi64(
a: __m256i,
k: __mmask8,
idx: __m256i,
b: __m256i,
) -> __m256i {
let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
transmute(simd_select_bitmask(k, permute, a.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm256_maskz_permutex2var_epi64(
k: __mmask8,
a: __m256i,
idx: __m256i,
b: __m256i,
) -> __m256i {
let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermi2q))]
pub unsafe fn _mm256_mask2_permutex2var_epi64(
a: __m256i,
idx: __m256i,
k: __mmask8,
b: __m256i,
) -> __m256i {
let permute = _mm256_permutex2var_epi64(a, idx, b).as_i64x4();
transmute(simd_select_bitmask(k, permute, idx.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm_permutex2var_epi64(a: __m128i, idx: __m128i, b: __m128i) -> __m128i {
transmute(vpermi2q128(a.as_i64x2(), idx.as_i64x2(), b.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermt2q))]
pub unsafe fn _mm_mask_permutex2var_epi64(
a: __m128i,
k: __mmask8,
idx: __m128i,
b: __m128i,
) -> __m128i {
let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
transmute(simd_select_bitmask(k, permute, a.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm_maskz_permutex2var_epi64(
k: __mmask8,
a: __m128i,
idx: __m128i,
b: __m128i,
) -> __m128i {
let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermi2q))]
pub unsafe fn _mm_mask2_permutex2var_epi64(
a: __m128i,
idx: __m128i,
k: __mmask8,
b: __m128i,
) -> __m128i {
let permute = _mm_permutex2var_epi64(a, idx, b).as_i64x2();
transmute(simd_select_bitmask(k, permute, idx.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_permutex2var_ps(a: __m512, idx: __m512i, b: __m512) -> __m512 {
transmute(vpermi2ps(a.as_f32x16(), idx.as_i32x16(), b.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermt2ps))]
pub unsafe fn _mm512_mask_permutex2var_ps(
a: __m512,
k: __mmask16,
idx: __m512i,
b: __m512,
) -> __m512 {
let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
transmute(simd_select_bitmask(k, permute, a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_maskz_permutex2var_ps(
k: __mmask16,
a: __m512,
idx: __m512i,
b: __m512,
) -> __m512 {
let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_mask2_permutex2var_ps(
a: __m512,
idx: __m512i,
k: __mmask16,
b: __m512,
) -> __m512 {
let permute = _mm512_permutex2var_ps(a, idx, b).as_f32x16();
let idx = _mm512_castsi512_ps(idx).as_f32x16();
transmute(simd_select_bitmask(k, permute, idx))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm256_permutex2var_ps(a: __m256, idx: __m256i, b: __m256) -> __m256 {
transmute(vpermi2ps256(a.as_f32x8(), idx.as_i32x8(), b.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermt2ps))]
pub unsafe fn _mm256_mask_permutex2var_ps(
a: __m256,
k: __mmask8,
idx: __m256i,
b: __m256,
) -> __m256 {
let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
transmute(simd_select_bitmask(k, permute, a.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm256_maskz_permutex2var_ps(
k: __mmask8,
a: __m256,
idx: __m256i,
b: __m256,
) -> __m256 {
let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm256_mask2_permutex2var_ps(
a: __m256,
idx: __m256i,
k: __mmask8,
b: __m256,
) -> __m256 {
let permute = _mm256_permutex2var_ps(a, idx, b).as_f32x8();
let idx = _mm256_castsi256_ps(idx).as_f32x8();
transmute(simd_select_bitmask(k, permute, idx))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm_permutex2var_ps(a: __m128, idx: __m128i, b: __m128) -> __m128 {
transmute(vpermi2ps128(a.as_f32x4(), idx.as_i32x4(), b.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermt2ps))]
pub unsafe fn _mm_mask_permutex2var_ps(a: __m128, k: __mmask8, idx: __m128i, b: __m128) -> __m128 {
let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
transmute(simd_select_bitmask(k, permute, a.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm_maskz_permutex2var_ps(k: __mmask8, a: __m128, idx: __m128i, b: __m128) -> __m128 {
let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm_mask2_permutex2var_ps(a: __m128, idx: __m128i, k: __mmask8, b: __m128) -> __m128 {
let permute = _mm_permutex2var_ps(a, idx, b).as_f32x4();
let idx = _mm_castsi128_ps(idx).as_f32x4();
transmute(simd_select_bitmask(k, permute, idx))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_permutex2var_pd(a: __m512d, idx: __m512i, b: __m512d) -> __m512d {
transmute(vpermi2pd(a.as_f64x8(), idx.as_i64x8(), b.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermt2pd))]
pub unsafe fn _mm512_mask_permutex2var_pd(
a: __m512d,
k: __mmask8,
idx: __m512i,
b: __m512d,
) -> __m512d {
let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
transmute(simd_select_bitmask(k, permute, a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_maskz_permutex2var_pd(
k: __mmask8,
a: __m512d,
idx: __m512i,
b: __m512d,
) -> __m512d {
let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm512_mask2_permutex2var_pd(
a: __m512d,
idx: __m512i,
k: __mmask8,
b: __m512d,
) -> __m512d {
let permute = _mm512_permutex2var_pd(a, idx, b).as_f64x8();
let idx = _mm512_castsi512_pd(idx).as_f64x8();
transmute(simd_select_bitmask(k, permute, idx))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm256_permutex2var_pd(a: __m256d, idx: __m256i, b: __m256d) -> __m256d {
transmute(vpermi2pd256(a.as_f64x4(), idx.as_i64x4(), b.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermt2pd))]
pub unsafe fn _mm256_mask_permutex2var_pd(
a: __m256d,
k: __mmask8,
idx: __m256i,
b: __m256d,
) -> __m256d {
let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
transmute(simd_select_bitmask(k, permute, a.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm256_maskz_permutex2var_pd(
k: __mmask8,
a: __m256d,
idx: __m256i,
b: __m256d,
) -> __m256d {
let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm256_mask2_permutex2var_pd(
a: __m256d,
idx: __m256i,
k: __mmask8,
b: __m256d,
) -> __m256d {
let permute = _mm256_permutex2var_pd(a, idx, b).as_f64x4();
let idx = _mm256_castsi256_pd(idx).as_f64x4();
transmute(simd_select_bitmask(k, permute, idx))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm_permutex2var_pd(a: __m128d, idx: __m128i, b: __m128d) -> __m128d {
transmute(vpermi2pd128(a.as_f64x2(), idx.as_i64x2(), b.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpermt2pd))]
pub unsafe fn _mm_mask_permutex2var_pd(
a: __m128d,
k: __mmask8,
idx: __m128i,
b: __m128d,
) -> __m128d {
let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
transmute(simd_select_bitmask(k, permute, a.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm_maskz_permutex2var_pd(
k: __mmask8,
a: __m128d,
idx: __m128i,
b: __m128d,
) -> __m128d {
let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, permute, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm))]
pub unsafe fn _mm_mask2_permutex2var_pd(
a: __m128d,
idx: __m128i,
k: __mmask8,
b: __m128d,
) -> __m128d {
let permute = _mm_permutex2var_pd(a, idx, b).as_f64x2();
let idx = _mm_castsi128_pd(idx).as_f64x2();
transmute(simd_select_bitmask(k, permute, idx))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpermilps, imm8 = 9))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_shuffle_epi32(a: __m512i, imm8: _MM_PERM_ENUM) -> __m512i {
let imm8 = (imm8 & 0xFF) as u8;
let a = a.as_i32x16();
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr,
$i:expr,
$j:expr,
$k:expr,
$l:expr,
$m:expr,
$n:expr,
$o:expr,
$p:expr
) => {
simd_shuffle16(
a,
a,
[
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
_ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
_ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr, $i: expr, $m: expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
_ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
}
};
}
let r: i32x16 = match imm8 & 0x3 {
0 => shuffle1!(0, 4, 8, 12),
1 => shuffle1!(1, 5, 9, 13),
2 => shuffle1!(2, 6, 10, 14),
_ => shuffle1!(3, 7, 11, 15),
};
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_shuffle_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
imm8: _MM_PERM_ENUM,
) -> __m512i {
macro_rules! call {
($imm8:expr) => {
_mm512_shuffle_epi32(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_shuffle_epi32(k: __mmask16, a: __m512i, imm8: _MM_PERM_ENUM) -> __m512i {
macro_rules! call {
($imm8:expr) => {
_mm512_shuffle_epi32(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, r.as_i32x16(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_shuffle_epi32(
src: __m256i,
k: __mmask8,
a: __m256i,
imm8: _MM_PERM_ENUM,
) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_shuffle_epi32(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_shuffle_epi32(k: __mmask8, a: __m256i, imm8: _MM_PERM_ENUM) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_shuffle_epi32(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, r.as_i32x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_mask_shuffle_epi32(
src: __m128i,
k: __mmask8,
a: __m128i,
imm8: _MM_PERM_ENUM,
) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_shuffle_epi32(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpshufd, imm8 = 9))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_maskz_shuffle_epi32(k: __mmask8, a: __m128i, imm8: _MM_PERM_ENUM) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_shuffle_epi32(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, r.as_i32x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufps, imm8 = 0))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_shuffle_ps(a: __m512, b: __m512, imm8: i32) -> __m512 {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr,
$i:expr,
$j:expr,
$k:expr,
$l:expr,
$m:expr,
$n:expr,
$o:expr,
$p:expr
) => {
simd_shuffle16(
a,
b,
[
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, 16, $e, $f, $g, 20, $i, $j, $k, 24, $m, $n, $o, 28),
1 => shuffle4!($a, $b, $c, 17, $e, $f, $g, 21, $i, $j, $k, 25, $m, $n, $o, 29),
2 => shuffle4!($a, $b, $c, 18, $e, $f, $g, 22, $i, $j, $k, 26, $m, $n, $o, 30),
_ => shuffle4!($a, $b, $c, 19, $e, $f, $g, 23, $i, $j, $k, 27, $m, $n, $o, 31),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, 16, $e, $f, 20, $i, $j, 24, $m, $n, 28),
1 => shuffle3!($a, $b, 17, $e, $f, 21, $i, $j, 25, $m, $n, 29),
2 => shuffle3!($a, $b, 18, $e, $f, 22, $i, $j, 26, $m, $n, 30),
_ => shuffle3!($a, $b, 19, $e, $f, 23, $i, $j, 27, $m, $n, 31),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr, $i: expr, $m: expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, 0, $e, 4, $i, 8, $m, 12),
1 => shuffle2!($a, 1, $e, 5, $i, 9, $m, 13),
2 => shuffle2!($a, 2, $e, 6, $i, 10, $m, 14),
_ => shuffle2!($a, 3, $e, 7, $i, 11, $m, 15),
}
};
}
match imm8 & 0x3 {
0 => shuffle1!(0, 4, 8, 12),
1 => shuffle1!(1, 5, 9, 13),
2 => shuffle1!(2, 6, 10, 14),
_ => shuffle1!(3, 7, 11, 15),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufps, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_shuffle_ps(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512,
imm8: i32,
) -> __m512 {
macro_rules! call {
($imm8:expr) => {
_mm512_shuffle_ps(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufps, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_shuffle_ps(k: __mmask16, a: __m512, b: __m512, imm8: i32) -> __m512 {
macro_rules! call {
($imm8:expr) => {
_mm512_shuffle_ps(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, r.as_f32x16(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vshufps, imm8 = 9))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm256_mask_shuffle_ps(
src: __m256,
k: __mmask8,
a: __m256,
b: __m256,
imm8: i32,
) -> __m256 {
macro_rules! call {
($imm8:expr) => {
_mm256_shuffle_ps(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vshufps, imm8 = 9))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_maskz_shuffle_ps(k: __mmask8, a: __m256, b: __m256, imm8: i32) -> __m256 {
macro_rules! call {
($imm8:expr) => {
_mm256_shuffle_ps(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, r.as_f32x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vshufps, imm8 = 9))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_shuffle_ps(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
imm8: i32,
) -> __m128 {
macro_rules! call {
($imm8:expr) => {
_mm_shuffle_ps(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vshufps, imm8 = 9))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_shuffle_ps(k: __mmask8, a: __m128, b: __m128, imm8: i32) -> __m128 {
macro_rules! call {
($imm8:expr) => {
_mm_shuffle_ps(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, r.as_f32x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufpd, imm8 = 3))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_shuffle_pd(a: __m512d, b: __m512d, imm8: i32) -> __m512d {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle8 {
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr, $h:expr) => {
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
};
}
macro_rules! shuffle7 {
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr, $g:expr) => {
match (imm8 >> 7) & 0x1 {
0 => shuffle8!($a, $b, $c, $d, $e, $f, $g, 14),
_ => shuffle8!($a, $b, $c, $d, $e, $f, $g, 15),
}
};
}
macro_rules! shuffle6 {
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr, $f:expr) => {
match (imm8 >> 6) & 0x1 {
0 => shuffle7!($a, $b, $c, $d, $e, $f, 6),
_ => shuffle7!($a, $b, $c, $d, $e, $f, 7),
}
};
}
macro_rules! shuffle5 {
($a:expr, $b:expr, $c:expr, $d:expr, $e:expr) => {
match (imm8 >> 5) & 0x1 {
0 => shuffle6!($a, $b, $c, $d, $e, 12),
_ => shuffle6!($a, $b, $c, $d, $e, 13),
}
};
}
macro_rules! shuffle4 {
($a:expr, $b:expr, $c:expr, $d:expr) => {
match (imm8 >> 4) & 0x1 {
0 => shuffle5!($a, $b, $c, $d, 4),
_ => shuffle5!($a, $b, $c, $d, 5),
}
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr) => {
match (imm8 >> 3) & 0x1 {
0 => shuffle4!($a, $b, $c, 10),
_ => shuffle4!($a, $b, $c, 11),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr) => {
match (imm8 >> 2) & 0x1 {
0 => shuffle3!($a, $b, 2),
_ => shuffle3!($a, $b, 3),
}
};
}
macro_rules! shuffle1 {
($a:expr) => {
match (imm8 >> 1) & 0x1 {
0 => shuffle2!($a, 8),
_ => shuffle2!($a, 9),
}
};
}
match imm8 & 0x1 {
0 => shuffle1!(0),
_ => shuffle1!(1),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufpd, imm8 = 3))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_shuffle_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
imm8: i32,
) -> __m512d {
macro_rules! call {
($imm8:expr) => {
_mm512_shuffle_pd(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufpd, imm8 = 3))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_shuffle_pd(k: __mmask8, a: __m512d, b: __m512d, imm8: i32) -> __m512d {
macro_rules! call {
($imm8:expr) => {
_mm512_shuffle_pd(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, r.as_f64x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vshufpd, imm8 = 9))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm256_mask_shuffle_pd(
src: __m256d,
k: __mmask8,
a: __m256d,
b: __m256d,
imm8: i32,
) -> __m256d {
macro_rules! call {
($imm8:expr) => {
_mm256_shuffle_pd(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vshufpd, imm8 = 9))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_maskz_shuffle_pd(k: __mmask8, a: __m256d, b: __m256d, imm8: i32) -> __m256d {
macro_rules! call {
($imm8:expr) => {
_mm256_shuffle_pd(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, r.as_f64x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vshufpd, imm8 = 9))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_shuffle_pd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
imm8: i32,
) -> __m128d {
macro_rules! call {
($imm8:expr) => {
_mm_shuffle_pd(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f64x2(), src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vshufpd, imm8 = 9))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_shuffle_pd(k: __mmask8, a: __m128d, b: __m128d, imm8: i32) -> __m128d {
macro_rules! call {
($imm8:expr) => {
_mm_shuffle_pd(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, r.as_f64x2(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10010101))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_shuffle_i32x4(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
let a = a.as_i32x16();
let b = b.as_i32x16();
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr,
$i:expr,
$j:expr,
$k:expr,
$l:expr,
$m:expr,
$n:expr,
$o:expr,
$p:expr
) => {
simd_shuffle16(
a,
b,
[
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
_ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
_ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr, $i: expr, $m: expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
_ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
}
};
}
let r: i32x16 = match imm8 & 0x3 {
0 => shuffle1!(0, 1, 2, 3),
1 => shuffle1!(4, 5, 6, 7),
2 => shuffle1!(8, 9, 10, 11),
_ => shuffle1!(12, 13, 14, 15),
};
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b10110101))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_shuffle_i32x4(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
imm8: i32,
) -> __m512i {
macro_rules! call {
($imm8:expr) => {
_mm512_shuffle_i32x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b10110101))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_shuffle_i32x4(
k: __mmask16,
a: __m512i,
b: __m512i,
imm8: i32,
) -> __m512i {
macro_rules! call {
($imm8:expr) => {
_mm512_shuffle_i32x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, r.as_i32x16(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm, imm8 = 0b01))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_shuffle_i32x4(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
let a = a.as_i32x8();
let b = b.as_i32x8();
macro_rules! shuffle2 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr
) => {
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
};
}
macro_rules! shuffle1 {
($a:expr, $b:expr, $c: expr, $d: expr) => {
match (imm8 >> 1) & 0x1 {
0 => shuffle2!($a, $b, $c, $d, 8, 9, 10, 11),
_ => shuffle2!($a, $b, $c, $d, 12, 13, 14, 15),
}
};
}
let r: i32x8 = match imm8 & 0x1 {
0 => shuffle1!(0, 1, 2, 3),
_ => shuffle1!(4, 5, 6, 7),
};
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b11))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm256_mask_shuffle_i32x4(
src: __m256i,
k: __mmask8,
a: __m256i,
b: __m256i,
imm8: i32,
) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_shuffle_i32x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vshufi32x4, imm8 = 0b11))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_maskz_shuffle_i32x4(
k: __mmask8,
a: __m256i,
b: __m256i,
imm8: i32,
) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_shuffle_i32x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, r.as_i32x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_shuffle_i64x2(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr
) => {
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
_ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, $e, $f, 8, 9),
1 => shuffle3!($a, $b, $e, $f, 10, 11),
2 => shuffle3!($a, $b, $e, $f, 12, 13),
_ => shuffle3!($a, $b, $e, $f, 14, 15),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, $e, 0, 1),
1 => shuffle2!($a, $e, 2, 3),
2 => shuffle2!($a, $e, 4, 5),
_ => shuffle2!($a, $e, 6, 7),
}
};
}
match imm8 & 0x3 {
0 => shuffle1!(0, 1),
1 => shuffle1!(2, 3),
2 => shuffle1!(4, 5),
_ => shuffle1!(6, 7),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_shuffle_i64x2(
src: __m512i,
k: __mmask8,
a: __m512i,
b: __m512i,
imm8: i32,
) -> __m512i {
macro_rules! call {
($imm8:expr) => {
_mm512_shuffle_i64x2(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b10111111))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_shuffle_i64x2(
k: __mmask8,
a: __m512i,
b: __m512i,
imm8: i32,
) -> __m512i {
macro_rules! call {
($imm8:expr) => {
_mm512_shuffle_i64x2(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, r.as_i64x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm, imm8 = 0b01))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_shuffle_i64x2(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
let a = a.as_i64x4();
let b = b.as_i64x4();
macro_rules! shuffle2 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr
) => {
simd_shuffle4(a, b, [$a, $b, $c, $d])
};
}
macro_rules! shuffle1 {
($a:expr, $b:expr) => {
match (imm8 >> 1) & 0x1 {
0 => shuffle2!($a, $b, 4, 5),
_ => shuffle2!($a, $b, 6, 7),
}
};
}
let r: i64x4 = match imm8 & 0x1 {
0 => shuffle1!(0, 1),
_ => shuffle1!(2, 3),
};
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b11))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm256_mask_shuffle_i64x2(
src: __m256i,
k: __mmask8,
a: __m256i,
b: __m256i,
imm8: i32,
) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_shuffle_i64x2(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vshufi64x2, imm8 = 0b11))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_maskz_shuffle_i64x2(
k: __mmask8,
a: __m256i,
b: __m256i,
imm8: i32,
) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_shuffle_i64x2(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, r.as_i64x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_shuffle_f32x4(a: __m512, b: __m512, imm8: i32) -> __m512 {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr,
$i:expr,
$j:expr,
$k:expr,
$l:expr,
$m:expr,
$n:expr,
$o:expr,
$p:expr
) => {
simd_shuffle16(
a,
b,
[
$a, $b, $c, $d, $e, $f, $g, $h, $i, $j, $k, $l, $m, $n, $o, $p,
],
)
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr, $i:expr, $j:expr, $k:expr, $m:expr, $n:expr, $o:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 16, 17, 18, 19),
1 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 20, 21, 22, 23),
2 => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 24, 25, 26, 27),
_ => shuffle4!($a, $b, $c, $e, $f, $g, $i, $j, $k, $m, $n, $o, 28, 29, 30, 31),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr, $i:expr, $j:expr, $m:expr, $n:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 16, 17, 18, 19),
1 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 20, 21, 22, 23),
2 => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 24, 25, 26, 27),
_ => shuffle3!($a, $b, $e, $f, $i, $j, $m, $n, 28, 29, 30, 31),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr, $i: expr, $m: expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, $e, $i, $m, 0, 1, 2, 3),
1 => shuffle2!($a, $e, $i, $m, 4, 5, 6, 7),
2 => shuffle2!($a, $e, $i, $m, 8, 9, 10, 11),
_ => shuffle2!($a, $e, $i, $m, 12, 13, 14, 15),
}
};
}
match imm8 & 0x3 {
0 => shuffle1!(0, 1, 2, 3),
1 => shuffle1!(4, 5, 6, 7),
2 => shuffle1!(8, 9, 10, 11),
_ => shuffle1!(12, 13, 14, 15),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b10111111))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_shuffle_f32x4(
src: __m512,
k: __mmask16,
a: __m512,
b: __m512,
imm8: i32,
) -> __m512 {
macro_rules! call {
($imm8:expr) => {
_mm512_shuffle_f32x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b10111111))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_shuffle_f32x4(k: __mmask16, a: __m512, b: __m512, imm8: i32) -> __m512 {
macro_rules! call {
($imm8:expr) => {
_mm512_shuffle_f32x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, r.as_f32x16(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm, imm8 = 0b01))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_shuffle_f32x4(a: __m256, b: __m256, imm8: i32) -> __m256 {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
let a = a.as_f32x8();
let b = b.as_f32x8();
macro_rules! shuffle2 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr
) => {
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
};
}
macro_rules! shuffle1 {
($a:expr, $b:expr, $c: expr, $d: expr) => {
match (imm8 >> 1) & 0x1 {
0 => shuffle2!($a, $b, $c, $d, 8, 9, 10, 11),
_ => shuffle2!($a, $b, $c, $d, 12, 13, 14, 15),
}
};
}
let r: f32x8 = match imm8 & 0x1 {
0 => shuffle1!(0, 1, 2, 3),
_ => shuffle1!(4, 5, 6, 7),
};
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b11))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm256_mask_shuffle_f32x4(
src: __m256,
k: __mmask8,
a: __m256,
b: __m256,
imm8: i32,
) -> __m256 {
macro_rules! call {
($imm8:expr) => {
_mm256_shuffle_f32x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vshuff32x4, imm8 = 0b11))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_maskz_shuffle_f32x4(k: __mmask8, a: __m256, b: __m256, imm8: i32) -> __m256 {
macro_rules! call {
($imm8:expr) => {
_mm256_shuffle_f32x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, r.as_f32x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_shuffle_f64x2(a: __m512d, b: __m512d, imm8: i32) -> __m512d {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
macro_rules! shuffle4 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr,
$e:expr,
$f:expr,
$g:expr,
$h:expr
) => {
simd_shuffle8(a, b, [$a, $b, $c, $d, $e, $f, $g, $h])
};
}
macro_rules! shuffle3 {
($a:expr, $b:expr, $c:expr, $e:expr, $f:expr, $g:expr) => {
match (imm8 >> 6) & 0x3 {
0 => shuffle4!($a, $b, $c, $e, $f, $g, 8, 9),
1 => shuffle4!($a, $b, $c, $e, $f, $g, 10, 11),
2 => shuffle4!($a, $b, $c, $e, $f, $g, 12, 13),
_ => shuffle4!($a, $b, $c, $e, $f, $g, 14, 15),
}
};
}
macro_rules! shuffle2 {
($a:expr, $b:expr, $e:expr, $f:expr) => {
match (imm8 >> 4) & 0x3 {
0 => shuffle3!($a, $b, $e, $f, 8, 9),
1 => shuffle3!($a, $b, $e, $f, 10, 11),
2 => shuffle3!($a, $b, $e, $f, 12, 13),
_ => shuffle3!($a, $b, $e, $f, 14, 15),
}
};
}
macro_rules! shuffle1 {
($a:expr, $e:expr) => {
match (imm8 >> 2) & 0x3 {
0 => shuffle2!($a, $e, 0, 1),
1 => shuffle2!($a, $e, 2, 3),
2 => shuffle2!($a, $e, 4, 5),
_ => shuffle2!($a, $e, 6, 7),
}
};
}
match imm8 & 0x3 {
0 => shuffle1!(0, 1),
1 => shuffle1!(2, 3),
2 => shuffle1!(4, 5),
_ => shuffle1!(6, 7),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_shuffle_f64x2(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
imm8: i32,
) -> __m512d {
macro_rules! call {
($imm8:expr) => {
_mm512_shuffle_f64x2(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b10111111))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_shuffle_f64x2(
k: __mmask8,
a: __m512d,
b: __m512d,
imm8: i32,
) -> __m512d {
macro_rules! call {
($imm8:expr) => {
_mm512_shuffle_f64x2(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, r.as_f64x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vperm, imm8 = 0b01))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_shuffle_f64x2(a: __m256d, b: __m256d, imm8: i32) -> __m256d {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8 = (imm8 & 0xFF) as u8;
let a = a.as_f64x4();
let b = b.as_f64x4();
macro_rules! shuffle2 {
(
$a:expr,
$b:expr,
$c:expr,
$d:expr
) => {
simd_shuffle4(a, b, [$a, $b, $c, $d])
};
}
macro_rules! shuffle1 {
($a:expr, $b:expr) => {
match (imm8 >> 1) & 0x1 {
0 => shuffle2!($a, $b, 4, 5),
_ => shuffle2!($a, $b, 6, 7),
}
};
}
let r: f64x4 = match imm8 & 0x1 {
0 => shuffle1!(0, 1),
_ => shuffle1!(2, 3),
};
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b11))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm256_mask_shuffle_f64x2(
src: __m256d,
k: __mmask8,
a: __m256d,
b: __m256d,
imm8: i32,
) -> __m256d {
macro_rules! call {
($imm8:expr) => {
_mm256_shuffle_f64x2(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vshuff64x2, imm8 = 0b11))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_maskz_shuffle_f64x2(
k: __mmask8,
a: __m256d,
b: __m256d,
imm8: i32,
) -> __m256d {
macro_rules! call {
($imm8:expr) => {
_mm256_shuffle_f64x2(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, r.as_f64x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf32x4, imm8 = 3)
)]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_extractf32x4_ps(a: __m512, imm8: i32) -> __m128 {
assert!(imm8 >= 0 && imm8 <= 3);
match imm8 & 0x3 {
0 => simd_shuffle4(a, _mm512_undefined_ps(), [0, 1, 2, 3]),
1 => simd_shuffle4(a, _mm512_undefined_ps(), [4, 5, 6, 7]),
2 => simd_shuffle4(a, _mm512_undefined_ps(), [8, 9, 10, 11]),
_ => simd_shuffle4(a, _mm512_undefined_ps(), [12, 13, 14, 15]),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf32x4, imm8 = 3)
)]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_extractf32x4_ps(
src: __m128,
k: __mmask8,
a: __m512,
imm8: i32,
) -> __m128 {
macro_rules! call {
($imm8:expr) => {
_mm512_extractf32x4_ps(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf32x4, imm8 = 3)
)]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_extractf32x4_ps(k: __mmask8, a: __m512, imm8: i32) -> __m128 {
macro_rules! call {
($imm8:expr) => {
_mm512_extractf32x4_ps(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, r.as_f32x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextract, imm8 = 1)
)]
#[rustc_args_required_const(1)]
pub unsafe fn _mm256_extractf32x4_ps(a: __m256, imm8: i32) -> __m128 {
assert!(imm8 >= 0 && imm8 <= 1);
match imm8 & 0x1 {
0 => simd_shuffle4(a, _mm256_undefined_ps(), [0, 1, 2, 3]),
_ => simd_shuffle4(a, _mm256_undefined_ps(), [4, 5, 6, 7]),
}
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf32x4, imm8 = 1)
)]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_extractf32x4_ps(
src: __m128,
k: __mmask8,
a: __m256,
imm8: i32,
) -> __m128 {
macro_rules! call {
($imm8:expr) => {
_mm256_extractf32x4_ps(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f32x4(), src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf32x4, imm8 = 1)
)]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_extractf32x4_ps(k: __mmask8, a: __m256, imm8: i32) -> __m128 {
macro_rules! call {
($imm8:expr) => {
_mm256_extractf32x4_ps(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, r.as_f32x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf64x4, imm8 = 1)
)]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_extracti64x4_epi64(a: __m512i, imm8: i32) -> __m256i {
assert!(imm8 >= 0 && imm8 <= 1);
match imm8 & 0x1 {
0 => simd_shuffle4(a, _mm512_set1_epi64(0), [0, 1, 2, 3]),
_ => simd_shuffle4(a, _mm512_set1_epi64(0), [4, 5, 6, 7]),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextracti64x4, imm8 = 1)
)]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_extracti64x4_epi64(
src: __m256i,
k: __mmask8,
a: __m512i,
imm8: i32,
) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm512_extracti64x4_epi64(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextracti64x4, imm8 = 1)
)]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_extracti64x4_epi64(k: __mmask8, a: __m512i, imm8: i32) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm512_extracti64x4_epi64(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, r.as_i64x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf64x4, imm8 = 1)
)]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_extractf64x4_pd(a: __m512d, imm8: i32) -> __m256d {
assert!(imm8 >= 0 && imm8 <= 1);
match imm8 & 0x1 {
0 => simd_shuffle4(a, _mm512_undefined_pd(), [0, 1, 2, 3]),
_ => simd_shuffle4(a, _mm512_undefined_pd(), [4, 5, 6, 7]),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf64x4, imm8 = 1)
)]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_extractf64x4_pd(
src: __m256d,
k: __mmask8,
a: __m512d,
imm8: i32,
) -> __m256d {
macro_rules! call {
($imm8:expr) => {
_mm512_extractf64x4_pd(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f64x4(), src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf64x4, imm8 = 1)
)]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_extractf64x4_pd(k: __mmask8, a: __m512d, imm8: i32) -> __m256d {
macro_rules! call {
($imm8:expr) => {
_mm512_extractf64x4_pd(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, r.as_f64x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextractf32x4, imm8 = 3)
)]
#[rustc_args_required_const(1)]
pub unsafe fn _mm512_extracti32x4_epi32(a: __m512i, imm8: i32) -> __m128i {
assert!(imm8 >= 0 && imm8 <= 3);
let a = a.as_i32x16();
let undefined = _mm512_undefined_epi32().as_i32x16();
let extract: i32x4 = match imm8 & 0x3 {
0 => simd_shuffle4(a, undefined, [0, 1, 2, 3]),
1 => simd_shuffle4(a, undefined, [4, 5, 6, 7]),
2 => simd_shuffle4(a, undefined, [8, 9, 10, 11]),
_ => simd_shuffle4(a, undefined, [12, 13, 14, 15]),
};
transmute(extract)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextracti32x4, imm8 = 3)
)]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_mask_extracti32x4_epi32(
src: __m128i,
k: __mmask8,
a: __m512i,
imm8: i32,
) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm512_extracti32x4_epi32(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextracti32x4, imm8 = 3)
)]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_maskz_extracti32x4_epi32(k: __mmask8, a: __m512i, imm8: i32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm512_extracti32x4_epi32(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, r.as_i32x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextract, imm8 = 1)
)]
#[rustc_args_required_const(1)]
pub unsafe fn _mm256_extracti32x4_epi32(a: __m256i, imm8: i32) -> __m128i {
assert!(imm8 >= 0 && imm8 <= 1);
let a = a.as_i32x8();
let undefined = _mm256_undefined_si256().as_i32x8();
let extract: i32x4 = match imm8 & 0x1 {
0 => simd_shuffle4(a, undefined, [0, 1, 2, 3]),
_ => simd_shuffle4(a, undefined, [4, 5, 6, 7]),
};
transmute(extract)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextracti32x4, imm8 = 1)
)]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_mask_extracti32x4_epi32(
src: __m128i,
k: __mmask8,
a: __m256i,
imm8: i32,
) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm256_extracti32x4_epi32(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vextracti32x4, imm8 = 1)
)]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_maskz_extracti32x4_epi32(k: __mmask8, a: __m256i, imm8: i32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm256_extracti32x4_epi32(a, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, r.as_i32x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovsldup))]
pub unsafe fn _mm512_moveldup_ps(a: __m512) -> __m512 {
let r: f32x16 = simd_shuffle16(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovsldup))]
pub unsafe fn _mm512_mask_moveldup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
let mov: f32x16 = simd_shuffle16(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovsldup))]
pub unsafe fn _mm512_maskz_moveldup_ps(k: __mmask16, a: __m512) -> __m512 {
let mov: f32x16 = simd_shuffle16(a, a, [0, 0, 2, 2, 4, 4, 6, 6, 8, 8, 10, 10, 12, 12, 14, 14]);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovsldup))]
pub unsafe fn _mm256_mask_moveldup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
let mov = _mm256_moveldup_ps(a);
transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovsldup))]
pub unsafe fn _mm256_maskz_moveldup_ps(k: __mmask8, a: __m256) -> __m256 {
let mov = _mm256_moveldup_ps(a);
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, mov.as_f32x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovsldup))]
pub unsafe fn _mm_mask_moveldup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
let mov = _mm_moveldup_ps(a);
transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovsldup))]
pub unsafe fn _mm_maskz_moveldup_ps(k: __mmask8, a: __m128) -> __m128 {
let mov = _mm_moveldup_ps(a);
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, mov.as_f32x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovshdup))]
pub unsafe fn _mm512_movehdup_ps(a: __m512) -> __m512 {
let r: f32x16 = simd_shuffle16(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovshdup))]
pub unsafe fn _mm512_mask_movehdup_ps(src: __m512, k: __mmask16, a: __m512) -> __m512 {
let mov: f32x16 = simd_shuffle16(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
transmute(simd_select_bitmask(k, mov, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovshdup))]
pub unsafe fn _mm512_maskz_movehdup_ps(k: __mmask16, a: __m512) -> __m512 {
let mov: f32x16 = simd_shuffle16(a, a, [1, 1, 3, 3, 5, 5, 7, 7, 9, 9, 11, 11, 13, 13, 15, 15]);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovshdup))]
pub unsafe fn _mm256_mask_movehdup_ps(src: __m256, k: __mmask8, a: __m256) -> __m256 {
let mov = _mm256_movehdup_ps(a);
transmute(simd_select_bitmask(k, mov.as_f32x8(), src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovshdup))]
pub unsafe fn _mm256_maskz_movehdup_ps(k: __mmask8, a: __m256) -> __m256 {
let mov = _mm256_movehdup_ps(a);
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, mov.as_f32x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovshdup))]
pub unsafe fn _mm_mask_movehdup_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
let mov = _mm_movehdup_ps(a);
transmute(simd_select_bitmask(k, mov.as_f32x4(), src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovshdup))]
pub unsafe fn _mm_maskz_movehdup_ps(k: __mmask8, a: __m128) -> __m128 {
let mov = _mm_movehdup_ps(a);
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, mov.as_f32x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovddup))]
pub unsafe fn _mm512_movedup_pd(a: __m512d) -> __m512d {
let r: f64x8 = simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovddup))]
pub unsafe fn _mm512_mask_movedup_pd(src: __m512d, k: __mmask8, a: __m512d) -> __m512d {
let mov: f64x8 = simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
transmute(simd_select_bitmask(k, mov, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovddup))]
pub unsafe fn _mm512_maskz_movedup_pd(k: __mmask8, a: __m512d) -> __m512d {
let mov: f64x8 = simd_shuffle8(a, a, [0, 0, 2, 2, 4, 4, 6, 6]);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, mov, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovddup))]
pub unsafe fn _mm256_mask_movedup_pd(src: __m256d, k: __mmask8, a: __m256d) -> __m256d {
let mov = _mm256_movedup_pd(a);
transmute(simd_select_bitmask(k, mov.as_f64x4(), src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovddup))]
pub unsafe fn _mm256_maskz_movedup_pd(k: __mmask8, a: __m256d) -> __m256d {
let mov = _mm256_movedup_pd(a);
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, mov.as_f64x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovddup))]
pub unsafe fn _mm_mask_movedup_pd(src: __m128d, k: __mmask8, a: __m128d) -> __m128d {
let mov = _mm_movedup_pd(a);
transmute(simd_select_bitmask(k, mov.as_f64x2(), src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovddup))]
pub unsafe fn _mm_maskz_movedup_pd(k: __mmask8, a: __m128d) -> __m128d {
let mov = _mm_movedup_pd(a);
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, mov.as_f64x2(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinsertf32x4, imm8 = 2))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_inserti32x4(a: __m512i, b: __m128i, imm8: i32) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 3);
let a = a.as_i32x16();
let b = _mm512_castsi128_si512(b).as_i32x16();
let ret: i32x16 = match imm8 & 0b11 {
0 => simd_shuffle16(
a,
b,
[16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
),
1 => simd_shuffle16(
a,
b,
[0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
),
2 => simd_shuffle16(
a,
b,
[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
),
_ => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
};
transmute(ret)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinserti32x4, imm8 = 2))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_inserti32x4(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m128i,
imm8: i32,
) -> __m512i {
macro_rules! call {
($imm8:expr) => {
_mm512_inserti32x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinserti32x4, imm8 = 2))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_inserti32x4(k: __mmask16, a: __m512i, b: __m128i, imm8: i32) -> __m512i {
macro_rules! call {
($imm8:expr) => {
_mm512_inserti32x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, r.as_i32x16(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vinsert, imm8 = 1)
)]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_inserti32x4(a: __m256i, b: __m128i, imm8: i32) -> __m256i {
assert!(imm8 >= 0 && imm8 <= 1);
let a = a.as_i32x8();
let b = _mm256_castsi128_si256(b).as_i32x8();
let ret: i32x8 = match imm8 & 0b1 {
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
_ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
};
transmute(ret)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vinserti32x4, imm8 = 1)
)]
#[rustc_args_required_const(4)]
pub unsafe fn _mm256_mask_inserti32x4(
src: __m256i,
k: __mmask8,
a: __m256i,
b: __m128i,
imm8: i32,
) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_inserti32x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vinserti32x4, imm8 = 1)
)]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_maskz_inserti32x4(k: __mmask8, a: __m256i, b: __m128i, imm8: i32) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_inserti32x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, r.as_i32x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinsertf64x4, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_inserti64x4(a: __m512i, b: __m256i, imm8: i32) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 1);
let b = _mm512_castsi256_si512(b);
match imm8 & 0b1 {
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
_ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinserti64x4, imm8 = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_inserti64x4(
src: __m512i,
k: __mmask8,
a: __m512i,
b: __m256i,
imm8: i32,
) -> __m512i {
macro_rules! call {
($imm8:expr) => {
_mm512_inserti64x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinserti64x4, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_inserti64x4(k: __mmask8, a: __m512i, b: __m256i, imm8: i32) -> __m512i {
macro_rules! call {
($imm8:expr) => {
_mm512_inserti64x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, r.as_i64x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinsertf32x4, imm8 = 2))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_insertf32x4(a: __m512, b: __m128, imm8: i32) -> __m512 {
assert!(imm8 >= 0 && imm8 <= 3);
let b = _mm512_castps128_ps512(b);
match imm8 & 0b11 {
0 => simd_shuffle16(
a,
b,
[16, 17, 18, 19, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
),
1 => simd_shuffle16(
a,
b,
[0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 12, 13, 14, 15],
),
2 => simd_shuffle16(
a,
b,
[0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 12, 13, 14, 15],
),
_ => simd_shuffle16(a, b, [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19]),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinsertf32x4, imm8 = 2))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_insertf32x4(
src: __m512,
k: __mmask16,
a: __m512,
b: __m128,
imm8: i32,
) -> __m512 {
macro_rules! call {
($imm8:expr) => {
_mm512_insertf32x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f32x16(), src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinsertf32x4, imm8 = 2))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_insertf32x4(k: __mmask16, a: __m512, b: __m128, imm8: i32) -> __m512 {
macro_rules! call {
($imm8:expr) => {
_mm512_insertf32x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, r.as_f32x16(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vinsert, imm8 = 1)
)]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_insertf32x4(a: __m256, b: __m128, imm8: i32) -> __m256 {
assert!(imm8 >= 0 && imm8 <= 1);
let b = _mm256_castps128_ps256(b);
match imm8 & 0b1 {
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
_ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
}
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vinsertf32x4, imm8 = 1)
)]
#[rustc_args_required_const(4)]
pub unsafe fn _mm256_mask_insertf32x4(
src: __m256,
k: __mmask8,
a: __m256,
b: __m128,
imm8: i32,
) -> __m256 {
macro_rules! call {
($imm8:expr) => {
_mm256_insertf32x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f32x8(), src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(
all(test, not(target_os = "windows")),
assert_instr(vinsertf32x4, imm8 = 1)
)]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_maskz_insertf32x4(k: __mmask8, a: __m256, b: __m128, imm8: i32) -> __m256 {
macro_rules! call {
($imm8:expr) => {
_mm256_insertf32x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, r.as_f32x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinsertf64x4, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_insertf64x4(a: __m512d, b: __m256d, imm8: i32) -> __m512d {
assert!(imm8 >= 0 && imm8 <= 1);
let b = _mm512_castpd256_pd512(b);
match imm8 & 0b1 {
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 4, 5, 6, 7]),
_ => simd_shuffle8(a, b, [0, 1, 2, 3, 8, 9, 10, 11]),
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinsertf64x4, imm8 = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_insertf64x4(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m256d,
imm8: i32,
) -> __m512d {
macro_rules! call {
($imm8:expr) => {
_mm512_insertf64x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_f64x8(), src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vinsertf64x4, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_insertf64x4(k: __mmask8, a: __m512d, b: __m256d, imm8: i32) -> __m512d {
macro_rules! call {
($imm8:expr) => {
_mm512_insertf64x4(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, r.as_f64x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpckhps))]
pub unsafe fn _mm512_unpackhi_epi32(a: __m512i, b: __m512i) -> __m512i {
let a = a.as_i32x16();
let b = b.as_i32x16();
#[rustfmt::skip]
let r: i32x16 = simd_shuffle16(
a, b,
[ 2, 18, 3, 19,
2 + 4, 18 + 4, 3 + 4, 19 + 4,
2 + 8, 18 + 8, 3 + 8, 19 + 8,
2 + 12, 18 + 12, 3 + 12, 19 + 12],
);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpunpckhdq))]
pub unsafe fn _mm512_mask_unpackhi_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
) -> __m512i {
let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, unpackhi, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpunpckhdq))]
pub unsafe fn _mm512_maskz_unpackhi_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let unpackhi = _mm512_unpackhi_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpunpckhdq))]
pub unsafe fn _mm256_mask_unpackhi_epi32(
src: __m256i,
k: __mmask8,
a: __m256i,
b: __m256i,
) -> __m256i {
let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
transmute(simd_select_bitmask(k, unpackhi, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpunpckhdq))]
pub unsafe fn _mm256_maskz_unpackhi_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let unpackhi = _mm256_unpackhi_epi32(a, b).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpunpckhdq))]
pub unsafe fn _mm_mask_unpackhi_epi32(
src: __m128i,
k: __mmask8,
a: __m128i,
b: __m128i,
) -> __m128i {
let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
transmute(simd_select_bitmask(k, unpackhi, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpunpckhdq))]
pub unsafe fn _mm_maskz_unpackhi_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let unpackhi = _mm_unpackhi_epi32(a, b).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpckhpd))]
pub unsafe fn _mm512_unpackhi_epi64(a: __m512i, b: __m512i) -> __m512i {
simd_shuffle8(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6])
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpunpckhqdq))]
pub unsafe fn _mm512_mask_unpackhi_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
b: __m512i,
) -> __m512i {
let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, unpackhi, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpunpckhqdq))]
pub unsafe fn _mm512_maskz_unpackhi_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let unpackhi = _mm512_unpackhi_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpunpckhqdq))]
pub unsafe fn _mm256_mask_unpackhi_epi64(
src: __m256i,
k: __mmask8,
a: __m256i,
b: __m256i,
) -> __m256i {
let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
transmute(simd_select_bitmask(k, unpackhi, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpunpckhqdq))]
pub unsafe fn _mm256_maskz_unpackhi_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let unpackhi = _mm256_unpackhi_epi64(a, b).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpunpckhqdq))]
pub unsafe fn _mm_mask_unpackhi_epi64(
src: __m128i,
k: __mmask8,
a: __m128i,
b: __m128i,
) -> __m128i {
let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
transmute(simd_select_bitmask(k, unpackhi, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpunpckhqdq))]
pub unsafe fn _mm_maskz_unpackhi_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let unpackhi = _mm_unpackhi_epi64(a, b).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpckhps))]
pub unsafe fn _mm512_unpackhi_ps(a: __m512, b: __m512) -> __m512 {
#[rustfmt::skip]
simd_shuffle16(
a, b,
[ 2, 18, 3, 19,
2 + 4, 18 + 4, 3 + 4, 19 + 4,
2 + 8, 18 + 8, 3 + 8, 19 + 8,
2 + 12, 18 + 12, 3 + 12, 19 + 12],
)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpckhps))]
pub unsafe fn _mm512_mask_unpackhi_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, unpackhi, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpckhps))]
pub unsafe fn _mm512_maskz_unpackhi_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
let unpackhi = _mm512_unpackhi_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vunpckhps))]
pub unsafe fn _mm256_mask_unpackhi_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
transmute(simd_select_bitmask(k, unpackhi, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vunpckhps))]
pub unsafe fn _mm256_maskz_unpackhi_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
let unpackhi = _mm256_unpackhi_ps(a, b).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vunpckhps))]
pub unsafe fn _mm_mask_unpackhi_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
transmute(simd_select_bitmask(k, unpackhi, src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vunpckhps))]
pub unsafe fn _mm_maskz_unpackhi_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let unpackhi = _mm_unpackhi_ps(a, b).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpckhpd))]
pub unsafe fn _mm512_unpackhi_pd(a: __m512d, b: __m512d) -> __m512d {
simd_shuffle8(a, b, [1, 9, 1 + 2, 9 + 2, 1 + 4, 9 + 4, 1 + 6, 9 + 6])
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpckhpd))]
pub unsafe fn _mm512_mask_unpackhi_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
) -> __m512d {
let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, unpackhi, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpckhpd))]
pub unsafe fn _mm512_maskz_unpackhi_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let unpackhi = _mm512_unpackhi_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vunpckhpd))]
pub unsafe fn _mm256_mask_unpackhi_pd(
src: __m256d,
k: __mmask8,
a: __m256d,
b: __m256d,
) -> __m256d {
let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
transmute(simd_select_bitmask(k, unpackhi, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vunpckhpd))]
pub unsafe fn _mm256_maskz_unpackhi_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let unpackhi = _mm256_unpackhi_pd(a, b).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vunpckhpd))]
pub unsafe fn _mm_mask_unpackhi_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
transmute(simd_select_bitmask(k, unpackhi, src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vunpckhpd))]
pub unsafe fn _mm_maskz_unpackhi_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let unpackhi = _mm_unpackhi_pd(a, b).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, unpackhi, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpcklps))]
pub unsafe fn _mm512_unpacklo_epi32(a: __m512i, b: __m512i) -> __m512i {
let a = a.as_i32x16();
let b = b.as_i32x16();
#[rustfmt::skip]
let r: i32x16 = simd_shuffle16(
a, b,
[ 0, 16, 1, 17,
0 + 4, 16 + 4, 1 + 4, 17 + 4,
0 + 8, 16 + 8, 1 + 8, 17 + 8,
0 + 12, 16 + 12, 1 + 12, 17 + 12],
);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpunpckldq))]
pub unsafe fn _mm512_mask_unpacklo_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
) -> __m512i {
let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, unpacklo, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpunpckldq))]
pub unsafe fn _mm512_maskz_unpacklo_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let unpacklo = _mm512_unpacklo_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, unpacklo, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpunpckldq))]
pub unsafe fn _mm256_mask_unpacklo_epi32(
src: __m256i,
k: __mmask8,
a: __m256i,
b: __m256i,
) -> __m256i {
let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
transmute(simd_select_bitmask(k, unpacklo, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpunpckldq))]
pub unsafe fn _mm256_maskz_unpacklo_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let unpacklo = _mm256_unpacklo_epi32(a, b).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, unpacklo, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpunpckldq))]
pub unsafe fn _mm_mask_unpacklo_epi32(
src: __m128i,
k: __mmask8,
a: __m128i,
b: __m128i,
) -> __m128i {
let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
transmute(simd_select_bitmask(k, unpacklo, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpunpckldq))]
pub unsafe fn _mm_maskz_unpacklo_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let unpacklo = _mm_unpacklo_epi32(a, b).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, unpacklo, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpcklpd))]
pub unsafe fn _mm512_unpacklo_epi64(a: __m512i, b: __m512i) -> __m512i {
simd_shuffle8(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6])
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpunpcklqdq))]
pub unsafe fn _mm512_mask_unpacklo_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
b: __m512i,
) -> __m512i {
let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, unpacklo, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpunpcklqdq))]
pub unsafe fn _mm512_maskz_unpacklo_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let unpacklo = _mm512_unpacklo_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, unpacklo, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpunpcklqdq))]
pub unsafe fn _mm256_mask_unpacklo_epi64(
src: __m256i,
k: __mmask8,
a: __m256i,
b: __m256i,
) -> __m256i {
let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
transmute(simd_select_bitmask(k, unpacklo, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpunpcklqdq))]
pub unsafe fn _mm256_maskz_unpacklo_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let unpacklo = _mm256_unpacklo_epi64(a, b).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, unpacklo, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpunpcklqdq))]
pub unsafe fn _mm_mask_unpacklo_epi64(
src: __m128i,
k: __mmask8,
a: __m128i,
b: __m128i,
) -> __m128i {
let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
transmute(simd_select_bitmask(k, unpacklo, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpunpcklqdq))]
pub unsafe fn _mm_maskz_unpacklo_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let unpacklo = _mm_unpacklo_epi64(a, b).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, unpacklo, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpcklps))]
pub unsafe fn _mm512_unpacklo_ps(a: __m512, b: __m512) -> __m512 {
#[rustfmt::skip]
simd_shuffle16(a, b,
[ 0, 16, 1, 17,
0 + 4, 16 + 4, 1 + 4, 17 + 4,
0 + 8, 16 + 8, 1 + 8, 17 + 8,
0 + 12, 16 + 12, 1 + 12, 17 + 12],
)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpcklps))]
pub unsafe fn _mm512_mask_unpacklo_ps(src: __m512, k: __mmask16, a: __m512, b: __m512) -> __m512 {
let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
transmute(simd_select_bitmask(k, unpacklo, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpcklps))]
pub unsafe fn _mm512_maskz_unpacklo_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
let unpacklo = _mm512_unpacklo_ps(a, b).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, unpacklo, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vunpcklps))]
pub unsafe fn _mm256_mask_unpacklo_ps(src: __m256, k: __mmask8, a: __m256, b: __m256) -> __m256 {
let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
transmute(simd_select_bitmask(k, unpacklo, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vunpcklps))]
pub unsafe fn _mm256_maskz_unpacklo_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
let unpacklo = _mm256_unpacklo_ps(a, b).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, unpacklo, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vunpcklps))]
pub unsafe fn _mm_mask_unpacklo_ps(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
transmute(simd_select_bitmask(k, unpacklo, src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vunpcklps))]
pub unsafe fn _mm_maskz_unpacklo_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let unpacklo = _mm_unpacklo_ps(a, b).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, unpacklo, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpcklpd))]
pub unsafe fn _mm512_unpacklo_pd(a: __m512d, b: __m512d) -> __m512d {
simd_shuffle8(a, b, [0, 8, 0 + 2, 8 + 2, 0 + 4, 8 + 4, 0 + 6, 8 + 6])
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpcklpd))]
pub unsafe fn _mm512_mask_unpacklo_pd(
src: __m512d,
k: __mmask8,
a: __m512d,
b: __m512d,
) -> __m512d {
let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
transmute(simd_select_bitmask(k, unpacklo, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vunpcklpd))]
pub unsafe fn _mm512_maskz_unpacklo_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
let unpacklo = _mm512_unpacklo_pd(a, b).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, unpacklo, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vunpcklpd))]
pub unsafe fn _mm256_mask_unpacklo_pd(
src: __m256d,
k: __mmask8,
a: __m256d,
b: __m256d,
) -> __m256d {
let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
transmute(simd_select_bitmask(k, unpacklo, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vunpcklpd))]
pub unsafe fn _mm256_maskz_unpacklo_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
let unpacklo = _mm256_unpacklo_pd(a, b).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, unpacklo, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vunpcklpd))]
pub unsafe fn _mm_mask_unpacklo_pd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
transmute(simd_select_bitmask(k, unpacklo, src.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vunpcklpd))]
pub unsafe fn _mm_maskz_unpacklo_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let unpacklo = _mm_unpacklo_pd(a, b).as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
transmute(simd_select_bitmask(k, unpacklo, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castps128_ps512(a: __m128) -> __m512 {
simd_shuffle16(
a,
_mm_set1_ps(-1.),
[0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castps256_ps512(a: __m256) -> __m512 {
simd_shuffle16(
a,
_mm256_set1_ps(-1.),
[0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_zextps128_ps512(a: __m128) -> __m512 {
simd_shuffle16(
a,
_mm_set1_ps(0.),
[0, 1, 2, 3, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_zextps256_ps512(a: __m256) -> __m512 {
simd_shuffle16(
a,
_mm256_set1_ps(0.),
[0, 1, 2, 3, 4, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8],
)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castps512_ps128(a: __m512) -> __m128 {
simd_shuffle4(a, a, [0, 1, 2, 3])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castps512_ps256(a: __m512) -> __m256 {
simd_shuffle8(a, a, [0, 1, 2, 3, 4, 5, 6, 7])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castps_pd(a: __m512) -> __m512d {
transmute(a.as_m512())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castps_si512(a: __m512) -> __m512i {
transmute(a.as_m512())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castpd128_pd512(a: __m128d) -> __m512d {
simd_shuffle8(a, _mm_set1_pd(-1.), [0, 1, 2, 2, 2, 2, 2, 2])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castpd256_pd512(a: __m256d) -> __m512d {
simd_shuffle8(a, _mm256_set1_pd(-1.), [0, 1, 2, 3, 4, 4, 4, 4])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_zextpd128_pd512(a: __m128d) -> __m512d {
simd_shuffle8(a, _mm_set1_pd(0.), [0, 1, 2, 2, 2, 2, 2, 2])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_zextpd256_pd512(a: __m256d) -> __m512d {
simd_shuffle8(a, _mm256_set1_pd(0.), [0, 1, 2, 3, 4, 4, 4, 4])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castpd512_pd128(a: __m512d) -> __m128d {
simd_shuffle2(a, a, [0, 1])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castpd512_pd256(a: __m512d) -> __m256d {
simd_shuffle4(a, a, [0, 1, 2, 3])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castpd_ps(a: __m512d) -> __m512 {
transmute(a.as_m512d())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castpd_si512(a: __m512d) -> __m512i {
transmute(a.as_m512d())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castsi128_si512(a: __m128i) -> __m512i {
simd_shuffle8(a, _mm_set1_epi64x(-1), [0, 1, 2, 2, 2, 2, 2, 2])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castsi256_si512(a: __m256i) -> __m512i {
simd_shuffle8(a, _mm256_set1_epi64x(-1), [0, 1, 2, 3, 4, 4, 4, 4])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_zextsi128_si512(a: __m128i) -> __m512i {
simd_shuffle8(a, _mm_set1_epi64x(0), [0, 1, 2, 2, 2, 2, 2, 2])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_zextsi256_si512(a: __m256i) -> __m512i {
simd_shuffle8(a, _mm256_set1_epi64x(0), [0, 1, 2, 3, 4, 4, 4, 4])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castsi512_si128(a: __m512i) -> __m128i {
simd_shuffle2(a, a, [0, 1])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castsi512_si256(a: __m512i) -> __m256i {
simd_shuffle4(a, a, [0, 1, 2, 3])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castsi512_ps(a: __m512i) -> __m512 {
transmute(a)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_castsi512_pd(a: __m512i) -> __m512d {
transmute(a)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(all(test, not(target_os = "windows")), assert_instr(vmovd))]
pub unsafe fn _mm512_cvtsi512_si32(a: __m512i) -> i32 {
let extract: i32 = simd_extract(a.as_i32x16(), 0);
transmute(extract)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcast))]
pub unsafe fn _mm512_broadcastd_epi32(a: __m128i) -> __m512i {
let a = _mm512_castsi128_si512(a).as_i32x16();
let ret: i32x16 = simd_shuffle16(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]);
transmute(ret)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpbroadcast))]
pub unsafe fn _mm512_mask_broadcastd_epi32(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpbroadcast))]
pub unsafe fn _mm512_maskz_broadcastd_epi32(k: __mmask16, a: __m128i) -> __m512i {
let broadcast = _mm512_broadcastd_epi32(a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcast))]
pub unsafe fn _mm256_mask_broadcastd_epi32(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcast))]
pub unsafe fn _mm256_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m256i {
let broadcast = _mm256_broadcastd_epi32(a).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcast))]
pub unsafe fn _mm_mask_broadcastd_epi32(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
transmute(simd_select_bitmask(k, broadcast, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcast))]
pub unsafe fn _mm_maskz_broadcastd_epi32(k: __mmask8, a: __m128i) -> __m128i {
let broadcast = _mm_broadcastd_epi32(a).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcas))]
pub unsafe fn _mm512_broadcastq_epi64(a: __m128i) -> __m512i {
simd_shuffle8(a, a, [0, 0, 0, 0, 0, 0, 0, 0])
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpbroadcast))]
pub unsafe fn _mm512_mask_broadcastq_epi64(src: __m512i, k: __mmask8, a: __m128i) -> __m512i {
let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpbroadcast))]
pub unsafe fn _mm512_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m512i {
let broadcast = _mm512_broadcastq_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcast))]
pub unsafe fn _mm256_mask_broadcastq_epi64(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
transmute(simd_select_bitmask(k, broadcast, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcast))]
pub unsafe fn _mm256_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m256i {
let broadcast = _mm256_broadcastq_epi64(a).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcast))]
pub unsafe fn _mm_mask_broadcastq_epi64(src: __m128i, k: __mmask8, a: __m128i) -> __m128i {
let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
transmute(simd_select_bitmask(k, broadcast, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcast))]
pub unsafe fn _mm_maskz_broadcastq_epi64(k: __mmask8, a: __m128i) -> __m128i {
let broadcast = _mm_broadcastq_epi64(a).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
pub unsafe fn _mm512_broadcastss_ps(a: __m128) -> __m512 {
simd_shuffle16(a, a, [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
pub unsafe fn _mm512_mask_broadcastss_ps(src: __m512, k: __mmask16, a: __m128) -> __m512 {
let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
pub unsafe fn _mm512_maskz_broadcastss_ps(k: __mmask16, a: __m128) -> __m512 {
let broadcast = _mm512_broadcastss_ps(a).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
pub unsafe fn _mm256_mask_broadcastss_ps(src: __m256, k: __mmask8, a: __m128) -> __m256 {
let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
pub unsafe fn _mm256_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m256 {
let broadcast = _mm256_broadcastss_ps(a).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
pub unsafe fn _mm_mask_broadcastss_ps(src: __m128, k: __mmask8, a: __m128) -> __m128 {
let broadcast = _mm_broadcastss_ps(a).as_f32x4();
transmute(simd_select_bitmask(k, broadcast, src.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vbroadcastss))]
pub unsafe fn _mm_maskz_broadcastss_ps(k: __mmask8, a: __m128) -> __m128 {
let broadcast = _mm_broadcastss_ps(a).as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcastsd))]
pub unsafe fn _mm512_broadcastsd_pd(a: __m128d) -> __m512d {
simd_shuffle8(a, a, [0, 0, 0, 0, 0, 0, 0, 0])
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcastsd))]
pub unsafe fn _mm512_mask_broadcastsd_pd(src: __m512d, k: __mmask8, a: __m128d) -> __m512d {
let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vbroadcastsd))]
pub unsafe fn _mm512_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m512d {
let broadcast = _mm512_broadcastsd_pd(a).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vbroadcastsd))]
pub unsafe fn _mm256_mask_broadcastsd_pd(src: __m256d, k: __mmask8, a: __m128d) -> __m256d {
let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
transmute(simd_select_bitmask(k, broadcast, src.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vbroadcastsd))]
pub unsafe fn _mm256_maskz_broadcastsd_pd(k: __mmask8, a: __m128d) -> __m256d {
let broadcast = _mm256_broadcastsd_pd(a).as_f64x4();
let zero = _mm256_setzero_pd().as_f64x4();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_broadcast_i32x4(a: __m128i) -> __m512i {
let a = a.as_i32x4();
let ret: i32x16 = simd_shuffle16(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3]);
transmute(ret)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_broadcast_i32x4(src: __m512i, k: __mmask16, a: __m128i) -> __m512i {
let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
transmute(simd_select_bitmask(k, broadcast, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_maskz_broadcast_i32x4(k: __mmask16, a: __m128i) -> __m512i {
let broadcast = _mm512_broadcast_i32x4(a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
pub unsafe fn _mm256_broadcast_i32x4(a: __m128i) -> __m256i {
let a = a.as_i32x4();
let ret: i32x8 = simd_shuffle8(a, a, [0, 1, 2, 3, 0, 1, 2, 3]);
transmute(ret)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
pub unsafe fn _mm256_mask_broadcast_i32x4(src: __m256i, k: __mmask8, a: __m128i) -> __m256i {
let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
transmute(simd_select_bitmask(k, broadcast, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
pub unsafe fn _mm256_maskz_broadcast_i32x4(k: __mmask8, a: __m128i) -> __m256i {
let broadcast = _mm256_broadcast_i32x4(a).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_broadcast_i64x4(a: __m256i) -> __m512i {
simd_shuffle8(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_broadcast_i64x4(src: __m512i, k: __mmask8, a: __m256i) -> __m512i {
let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
transmute(simd_select_bitmask(k, broadcast, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_maskz_broadcast_i64x4(k: __mmask8, a: __m256i) -> __m512i {
let broadcast = _mm512_broadcast_i64x4(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_broadcast_f32x4(a: __m128) -> __m512 {
simd_shuffle16(a, a, [0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_broadcast_f32x4(src: __m512, k: __mmask16, a: __m128) -> __m512 {
let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
transmute(simd_select_bitmask(k, broadcast, src.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_maskz_broadcast_f32x4(k: __mmask16, a: __m128) -> __m512 {
let broadcast = _mm512_broadcast_f32x4(a).as_f32x16();
let zero = _mm512_setzero_ps().as_f32x16();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
pub unsafe fn _mm256_broadcast_f32x4(a: __m128) -> __m256 {
simd_shuffle8(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
pub unsafe fn _mm256_mask_broadcast_f32x4(src: __m256, k: __mmask8, a: __m128) -> __m256 {
let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
transmute(simd_select_bitmask(k, broadcast, src.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
pub unsafe fn _mm256_maskz_broadcast_f32x4(k: __mmask8, a: __m128) -> __m256 {
let broadcast = _mm256_broadcast_f32x4(a).as_f32x8();
let zero = _mm256_setzero_ps().as_f32x8();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_broadcast_f64x4(a: __m256d) -> __m512d {
simd_shuffle8(a, a, [0, 1, 2, 3, 0, 1, 2, 3])
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_broadcast_f64x4(src: __m512d, k: __mmask8, a: __m256d) -> __m512d {
let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
transmute(simd_select_bitmask(k, broadcast, src.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_maskz_broadcast_f64x4(k: __mmask8, a: __m256d) -> __m512d {
let broadcast = _mm512_broadcast_f64x4(a).as_f64x8();
let zero = _mm512_setzero_pd().as_f64x8();
transmute(simd_select_bitmask(k, broadcast, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovdqa32))]
pub unsafe fn _mm512_mask_blend_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
transmute(simd_select_bitmask(k, b.as_i32x16(), a.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovdqa32))]
pub unsafe fn _mm256_mask_blend_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
transmute(simd_select_bitmask(k, b.as_i32x8(), a.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovdqa32))]
pub unsafe fn _mm_mask_blend_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
transmute(simd_select_bitmask(k, b.as_i32x4(), a.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovdqa64))]
pub unsafe fn _mm512_mask_blend_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
transmute(simd_select_bitmask(k, b.as_i64x8(), a.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovdqa64))]
pub unsafe fn _mm256_mask_blend_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
transmute(simd_select_bitmask(k, b.as_i64x4(), a.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovdqa64))]
pub unsafe fn _mm_mask_blend_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
transmute(simd_select_bitmask(k, b.as_i64x2(), a.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_mask_blend_ps(k: __mmask16, a: __m512, b: __m512) -> __m512 {
transmute(simd_select_bitmask(k, b.as_f32x16(), a.as_f32x16()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm256_mask_blend_ps(k: __mmask8, a: __m256, b: __m256) -> __m256 {
transmute(simd_select_bitmask(k, b.as_f32x8(), a.as_f32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm_mask_blend_ps(k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(simd_select_bitmask(k, b.as_f32x4(), a.as_f32x4()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovapd))]
pub unsafe fn _mm512_mask_blend_pd(k: __mmask8, a: __m512d, b: __m512d) -> __m512d {
transmute(simd_select_bitmask(k, b.as_f64x8(), a.as_f64x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovapd))]
pub unsafe fn _mm256_mask_blend_pd(k: __mmask8, a: __m256d, b: __m256d) -> __m256d {
transmute(simd_select_bitmask(k, b.as_f64x4(), a.as_f64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovapd))]
pub unsafe fn _mm_mask_blend_pd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(simd_select_bitmask(k, b.as_f64x2(), a.as_f64x2()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(valignd, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_alignr_epi32(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 255);
let a = a.as_i32x16();
let b = b.as_i32x16();
let imm8: i32 = imm8 % 16;
let r: i32x16 = match imm8 {
0 => simd_shuffle16(
a,
b,
[
16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
],
),
1 => simd_shuffle16(
a,
b,
[
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0,
],
),
2 => simd_shuffle16(
a,
b,
[18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1],
),
3 => simd_shuffle16(
a,
b,
[19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2],
),
4 => simd_shuffle16(
a,
b,
[20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3],
),
5 => simd_shuffle16(
a,
b,
[21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4],
),
6 => simd_shuffle16(
a,
b,
[22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5],
),
7 => simd_shuffle16(
a,
b,
[23, 24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6],
),
8 => simd_shuffle16(
a,
b,
[24, 25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7],
),
9 => simd_shuffle16(
a,
b,
[25, 26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8],
),
10 => simd_shuffle16(a, b, [26, 27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]),
11 => simd_shuffle16(a, b, [27, 28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]),
12 => simd_shuffle16(a, b, [28, 29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]),
13 => simd_shuffle16(a, b, [29, 30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]),
14 => simd_shuffle16(a, b, [30, 31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]),
_ => simd_shuffle16(a, b, [31, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]),
};
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(valignd, imm8 = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_alignr_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
imm8: i32,
) -> __m512i {
macro_rules! call {
($imm8:expr) => {
_mm512_alignr_epi32(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i32x16(), src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(valignd, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_alignr_epi32(
k: __mmask16,
a: __m512i,
b: __m512i,
imm8: i32,
) -> __m512i {
macro_rules! call {
($imm8:expr) => {
_mm512_alignr_epi32(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, r.as_i32x16(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(valignd, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_alignr_epi32(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
assert!(imm8 >= 0 && imm8 <= 255);
let a = a.as_i32x8();
let b = b.as_i32x8();
let imm8: i32 = imm8 % 16;
let r: i32x8 = match imm8 {
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
1 => simd_shuffle8(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
2 => simd_shuffle8(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
3 => simd_shuffle8(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
4 => simd_shuffle8(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
5 => simd_shuffle8(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
6 => simd_shuffle8(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
7 => simd_shuffle8(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
8 => simd_shuffle8(a, b, [0, 1, 2, 3, 4, 5, 6, 7]),
9 => simd_shuffle8(a, b, [1, 2, 3, 4, 5, 6, 7, 8]),
10 => simd_shuffle8(a, b, [2, 3, 4, 5, 6, 7, 8, 9]),
11 => simd_shuffle8(a, b, [3, 4, 5, 6, 7, 8, 9, 10]),
12 => simd_shuffle8(a, b, [4, 5, 6, 7, 8, 9, 10, 11]),
13 => simd_shuffle8(a, b, [5, 6, 7, 8, 9, 10, 11, 12]),
14 => simd_shuffle8(a, b, [6, 7, 8, 9, 10, 11, 12, 13]),
_ => simd_shuffle8(a, b, [7, 8, 9, 10, 11, 12, 13, 14]),
};
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(valignd, imm8 = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm256_mask_alignr_epi32(
src: __m256i,
k: __mmask8,
a: __m256i,
b: __m256i,
imm8: i32,
) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_alignr_epi32(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i32x8(), src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(valignd, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_maskz_alignr_epi32(k: __mmask8, a: __m256i, b: __m256i, imm8: i32) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_alignr_epi32(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, r.as_i32x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpalignr, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_alignr_epi32(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
assert!(imm8 >= 0 && imm8 <= 255);
let a = a.as_i32x4();
let b = b.as_i32x4();
let imm8: i32 = imm8 % 8;
let r: i32x4 = match imm8 {
0 => simd_shuffle4(a, b, [4, 5, 6, 7]),
1 => simd_shuffle4(a, b, [5, 6, 7, 0]),
2 => simd_shuffle4(a, b, [6, 7, 0, 1]),
3 => simd_shuffle4(a, b, [7, 0, 1, 2]),
4 => simd_shuffle4(a, b, [0, 1, 2, 3]),
5 => simd_shuffle4(a, b, [1, 2, 3, 0]),
6 => simd_shuffle4(a, b, [2, 3, 0, 1]),
_ => simd_shuffle4(a, b, [3, 0, 1, 2]),
};
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(valignd, imm8 = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_alignr_epi32(
src: __m128i,
k: __mmask8,
a: __m128i,
b: __m128i,
imm8: i32,
) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_alignr_epi32(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i32x4(), src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(valignd, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_alignr_epi32(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_alignr_epi32(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, r.as_i32x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(valignq, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm512_alignr_epi64(a: __m512i, b: __m512i, imm8: i32) -> __m512i {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8: i32 = imm8 % 8;
let r: i64x8 = match imm8 {
0 => simd_shuffle8(a, b, [8, 9, 10, 11, 12, 13, 14, 15]),
1 => simd_shuffle8(a, b, [9, 10, 11, 12, 13, 14, 15, 0]),
2 => simd_shuffle8(a, b, [10, 11, 12, 13, 14, 15, 0, 1]),
3 => simd_shuffle8(a, b, [11, 12, 13, 14, 15, 0, 1, 2]),
4 => simd_shuffle8(a, b, [12, 13, 14, 15, 0, 1, 2, 3]),
5 => simd_shuffle8(a, b, [13, 14, 15, 0, 1, 2, 3, 4]),
6 => simd_shuffle8(a, b, [14, 15, 0, 1, 2, 3, 4, 5]),
_ => simd_shuffle8(a, b, [15, 0, 1, 2, 3, 4, 5, 6]),
};
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(valignq, imm8 = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm512_mask_alignr_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
b: __m512i,
imm8: i32,
) -> __m512i {
macro_rules! call {
($imm8:expr) => {
_mm512_alignr_epi64(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i64x8(), src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(valignq, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm512_maskz_alignr_epi64(k: __mmask8, a: __m512i, b: __m512i, imm8: i32) -> __m512i {
macro_rules! call {
($imm8:expr) => {
_mm512_alignr_epi64(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, r.as_i64x8(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(valignq, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm256_alignr_epi64(a: __m256i, b: __m256i, imm8: i32) -> __m256i {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8: i32 = imm8 % 8;
let r: i64x4 = match imm8 {
0 => simd_shuffle4(a, b, [4, 5, 6, 7]),
1 => simd_shuffle4(a, b, [5, 6, 7, 0]),
2 => simd_shuffle4(a, b, [6, 7, 0, 1]),
3 => simd_shuffle4(a, b, [7, 0, 1, 2]),
4 => simd_shuffle4(a, b, [0, 1, 2, 3]),
5 => simd_shuffle4(a, b, [1, 2, 3, 4]),
6 => simd_shuffle4(a, b, [2, 3, 4, 5]),
_ => simd_shuffle4(a, b, [3, 4, 5, 6]),
};
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(valignq, imm8 = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm256_mask_alignr_epi64(
src: __m256i,
k: __mmask8,
a: __m256i,
b: __m256i,
imm8: i32,
) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_alignr_epi64(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i64x4(), src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(valignq, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm256_maskz_alignr_epi64(k: __mmask8, a: __m256i, b: __m256i, imm8: i32) -> __m256i {
macro_rules! call {
($imm8:expr) => {
_mm256_alignr_epi64(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, r.as_i64x4(), zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpalignr, imm8 = 1))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_alignr_epi64(a: __m128i, b: __m128i, imm8: i32) -> __m128i {
assert!(imm8 >= 0 && imm8 <= 255);
let imm8: i32 = imm8 % 4;
let r: i64x2 = match imm8 {
0 => simd_shuffle2(a, b, [2, 3]),
1 => simd_shuffle2(a, b, [3, 0]),
2 => simd_shuffle2(a, b, [0, 1]),
_ => simd_shuffle2(a, b, [1, 2]),
};
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(valignq, imm8 = 1))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_alignr_epi64(
src: __m128i,
k: __mmask8,
a: __m128i,
b: __m128i,
imm8: i32,
) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_alignr_epi64(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(simd_select_bitmask(k, r.as_i64x2(), src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(valignq, imm8 = 1))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_alignr_epi64(k: __mmask8, a: __m128i, b: __m128i, imm8: i32) -> __m128i {
macro_rules! call {
($imm8:expr) => {
_mm_alignr_epi64(a, b, $imm8)
};
}
let r = constify_imm8_sae!(imm8, call);
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, r.as_i64x2(), zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm512_and_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandd))]
pub unsafe fn _mm512_mask_and_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let and = _mm512_and_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, and, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandd))]
pub unsafe fn _mm512_maskz_and_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let and = _mm512_and_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, and, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpandd))]
pub unsafe fn _mm256_mask_and_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let and = simd_and(a.as_i32x8(), b.as_i32x8());
transmute(simd_select_bitmask(k, and, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpandd))]
pub unsafe fn _mm256_maskz_and_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let and = simd_and(a.as_i32x8(), b.as_i32x8());
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, and, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpandd))]
pub unsafe fn _mm_mask_and_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let and = simd_and(a.as_i32x4(), b.as_i32x4());
transmute(simd_select_bitmask(k, and, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpandd))]
pub unsafe fn _mm_maskz_and_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let and = simd_and(a.as_i32x4(), b.as_i32x4());
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, and, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm512_and_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_and(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm512_mask_and_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let and = _mm512_and_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, and, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm512_maskz_and_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let and = _mm512_and_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, and, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm256_mask_and_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let and = simd_and(a.as_i64x4(), b.as_i64x4());
transmute(simd_select_bitmask(k, and, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm256_maskz_and_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let and = simd_and(a.as_i64x4(), b.as_i64x4());
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, and, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm_mask_and_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let and = simd_and(a.as_i64x2(), b.as_i64x2());
transmute(simd_select_bitmask(k, and, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm_maskz_and_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let and = simd_and(a.as_i64x2(), b.as_i64x2());
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, and, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandq))]
pub unsafe fn _mm512_and_si512(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_and(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vporq))]
pub unsafe fn _mm512_or_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpord))]
pub unsafe fn _mm512_mask_or_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let or = _mm512_or_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, or, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpord))]
pub unsafe fn _mm512_maskz_or_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let or = _mm512_or_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, or, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vor))]
pub unsafe fn _mm256_or_epi32(a: __m256i, b: __m256i) -> __m256i {
transmute(simd_or(a.as_i32x8(), b.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpord))]
pub unsafe fn _mm256_mask_or_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let or = _mm256_or_epi32(a, b).as_i32x8();
transmute(simd_select_bitmask(k, or, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpord))]
pub unsafe fn _mm256_maskz_or_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let or = _mm256_or_epi32(a, b).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, or, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vor))]
pub unsafe fn _mm_or_epi32(a: __m128i, b: __m128i) -> __m128i {
transmute(simd_or(a.as_i32x4(), b.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpord))]
pub unsafe fn _mm_mask_or_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let or = _mm_or_epi32(a, b).as_i32x4();
transmute(simd_select_bitmask(k, or, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpord))]
pub unsafe fn _mm_maskz_or_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let or = _mm_or_epi32(a, b).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, or, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vporq))]
pub unsafe fn _mm512_or_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_or(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vporq))]
pub unsafe fn _mm512_mask_or_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let or = _mm512_or_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, or, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vporq))]
pub unsafe fn _mm512_maskz_or_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let or = _mm512_or_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, or, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vor))]
pub unsafe fn _mm256_or_epi64(a: __m256i, b: __m256i) -> __m256i {
transmute(simd_or(a.as_i64x4(), b.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vporq))]
pub unsafe fn _mm256_mask_or_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let or = _mm256_or_epi64(a, b).as_i64x4();
transmute(simd_select_bitmask(k, or, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vporq))]
pub unsafe fn _mm256_maskz_or_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let or = _mm256_or_epi64(a, b).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, or, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vor))]
pub unsafe fn _mm_or_epi64(a: __m128i, b: __m128i) -> __m128i {
transmute(simd_or(a.as_i64x2(), b.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vporq))]
pub unsafe fn _mm_mask_or_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let or = _mm_or_epi64(a, b).as_i64x2();
transmute(simd_select_bitmask(k, or, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vporq))]
pub unsafe fn _mm_maskz_or_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let or = _mm_or_epi64(a, b).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, or, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vporq))]
pub unsafe fn _mm512_or_si512(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_or(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpxorq))]
pub unsafe fn _mm512_xor_epi32(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpxord))]
pub unsafe fn _mm512_mask_xor_epi32(src: __m512i, k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let xor = _mm512_xor_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, xor, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpxord))]
pub unsafe fn _mm512_maskz_xor_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let xor = _mm512_xor_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, xor, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vxor))]
pub unsafe fn _mm256_xor_epi32(a: __m256i, b: __m256i) -> __m256i {
transmute(simd_xor(a.as_i32x8(), b.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpxord))]
pub unsafe fn _mm256_mask_xor_epi32(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let xor = _mm256_xor_epi32(a, b).as_i32x8();
transmute(simd_select_bitmask(k, xor, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpxord))]
pub unsafe fn _mm256_maskz_xor_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let xor = _mm256_xor_epi32(a, b).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, xor, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vxor))]
pub unsafe fn _mm_xor_epi32(a: __m128i, b: __m128i) -> __m128i {
transmute(simd_xor(a.as_i32x4(), b.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpxord))]
pub unsafe fn _mm_mask_xor_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let xor = _mm_xor_epi32(a, b).as_i32x4();
transmute(simd_select_bitmask(k, xor, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpxord))]
pub unsafe fn _mm_maskz_xor_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let xor = _mm_xor_epi32(a, b).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, xor, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpxorq))]
pub unsafe fn _mm512_xor_epi64(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_xor(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpxorq))]
pub unsafe fn _mm512_mask_xor_epi64(src: __m512i, k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let xor = _mm512_xor_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, xor, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpxorq))]
pub unsafe fn _mm512_maskz_xor_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let xor = _mm512_xor_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, xor, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vxor))]
pub unsafe fn _mm256_xor_epi64(a: __m256i, b: __m256i) -> __m256i {
transmute(simd_xor(a.as_i64x4(), b.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpxorq))]
pub unsafe fn _mm256_mask_xor_epi64(src: __m256i, k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let xor = _mm256_xor_epi64(a, b).as_i64x4();
transmute(simd_select_bitmask(k, xor, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpxorq))]
pub unsafe fn _mm256_maskz_xor_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let xor = _mm256_xor_epi64(a, b).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, xor, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vxor))]
pub unsafe fn _mm_xor_epi64(a: __m128i, b: __m128i) -> __m128i {
transmute(simd_xor(a.as_i64x2(), b.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpxorq))]
pub unsafe fn _mm_mask_xor_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let xor = _mm_xor_epi64(a, b).as_i64x2();
transmute(simd_select_bitmask(k, xor, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpxorq))]
pub unsafe fn _mm_maskz_xor_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let xor = _mm_xor_epi64(a, b).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, xor, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpxorq))]
pub unsafe fn _mm512_xor_si512(a: __m512i, b: __m512i) -> __m512i {
transmute(simd_xor(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandnq))]
pub unsafe fn _mm512_andnot_epi32(a: __m512i, b: __m512i) -> __m512i {
_mm512_and_epi32(_mm512_xor_epi32(a, _mm512_set1_epi32(u32::MAX as i32)), b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandnd))]
pub unsafe fn _mm512_mask_andnot_epi32(
src: __m512i,
k: __mmask16,
a: __m512i,
b: __m512i,
) -> __m512i {
let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
transmute(simd_select_bitmask(k, andnot, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandnd))]
pub unsafe fn _mm512_maskz_andnot_epi32(k: __mmask16, a: __m512i, b: __m512i) -> __m512i {
let andnot = _mm512_andnot_epi32(a, b).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, andnot, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpandnd))]
pub unsafe fn _mm256_mask_andnot_epi32(
src: __m256i,
k: __mmask8,
a: __m256i,
b: __m256i,
) -> __m256i {
let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
transmute(simd_select_bitmask(k, andnot, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpandnd))]
pub unsafe fn _mm256_maskz_andnot_epi32(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let not = _mm256_xor_epi32(a, _mm256_set1_epi32(u32::MAX as i32));
let andnot = simd_and(not.as_i32x8(), b.as_i32x8());
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, andnot, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpandnd))]
pub unsafe fn _mm_mask_andnot_epi32(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
transmute(simd_select_bitmask(k, andnot, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpandnd))]
pub unsafe fn _mm_maskz_andnot_epi32(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let not = _mm_xor_epi32(a, _mm_set1_epi32(u32::MAX as i32));
let andnot = simd_and(not.as_i32x4(), b.as_i32x4());
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, andnot, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandnq))]
pub unsafe fn _mm512_andnot_epi64(a: __m512i, b: __m512i) -> __m512i {
_mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandnq))]
pub unsafe fn _mm512_mask_andnot_epi64(
src: __m512i,
k: __mmask8,
a: __m512i,
b: __m512i,
) -> __m512i {
let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
transmute(simd_select_bitmask(k, andnot, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandnq))]
pub unsafe fn _mm512_maskz_andnot_epi64(k: __mmask8, a: __m512i, b: __m512i) -> __m512i {
let andnot = _mm512_andnot_epi64(a, b).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, andnot, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpandnq))]
pub unsafe fn _mm256_mask_andnot_epi64(
src: __m256i,
k: __mmask8,
a: __m256i,
b: __m256i,
) -> __m256i {
let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
transmute(simd_select_bitmask(k, andnot, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpandnq))]
pub unsafe fn _mm256_maskz_andnot_epi64(k: __mmask8, a: __m256i, b: __m256i) -> __m256i {
let not = _mm256_xor_epi64(a, _mm256_set1_epi64x(u64::MAX as i64));
let andnot = simd_and(not.as_i64x4(), b.as_i64x4());
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, andnot, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpandnq))]
pub unsafe fn _mm_mask_andnot_epi64(src: __m128i, k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
transmute(simd_select_bitmask(k, andnot, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpandnq))]
pub unsafe fn _mm_maskz_andnot_epi64(k: __mmask8, a: __m128i, b: __m128i) -> __m128i {
let not = _mm_xor_epi64(a, _mm_set1_epi64x(u64::MAX as i64));
let andnot = simd_and(not.as_i64x2(), b.as_i64x2());
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, andnot, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpandnq))]
pub unsafe fn _mm512_andnot_si512(a: __m512i, b: __m512i) -> __m512i {
_mm512_and_epi64(_mm512_xor_epi64(a, _mm512_set1_epi64(u64::MAX as i64)), b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(and))]
pub unsafe fn _kand_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
transmute(a & b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(and))]
pub unsafe fn _mm512_kand(a: __mmask16, b: __mmask16) -> __mmask16 {
transmute(a & b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(or))]
pub unsafe fn _kor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
transmute(a | b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(or))]
pub unsafe fn _mm512_kor(a: __mmask16, b: __mmask16) -> __mmask16 {
transmute(a | b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(xor))]
pub unsafe fn _kxor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
transmute(a ^ b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(xor))]
pub unsafe fn _mm512_kxor(a: __mmask16, b: __mmask16) -> __mmask16 {
transmute(a ^ b)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _knot_mask16(a: __mmask16) -> __mmask16 {
transmute(a ^ 0b11111111_11111111)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_knot(a: __mmask16) -> __mmask16 {
transmute(a ^ 0b11111111_11111111)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(not))]
pub unsafe fn _kandn_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
_mm512_kand(_mm512_knot(a), b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(not))]
pub unsafe fn _mm512_kandn(a: __mmask16, b: __mmask16) -> __mmask16 {
_mm512_kand(_mm512_knot(a), b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(xor))]
pub unsafe fn _kxnor_mask16(a: __mmask16, b: __mmask16) -> __mmask16 {
_mm512_knot(_mm512_kxor(a, b))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(xor))]
pub unsafe fn _mm512_kxnor(a: __mmask16, b: __mmask16) -> __mmask16 {
_mm512_knot(_mm512_kxor(a, b))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(mov))]
pub unsafe fn _mm512_kmov(a: __mmask16) -> __mmask16 {
let r: u16 = a;
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_int2mask(mask: i32) -> __mmask16 {
let r: u16 = mask as u16;
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(mov))]
pub unsafe fn _mm512_mask2int(k1: __mmask16) -> i32 {
let r: i32 = k1 as i32;
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(mov))]
pub unsafe fn _mm512_kunpackb(a: __mmask16, b: __mmask16) -> __mmask16 {
let a = a & 0b00000000_11111111;
let b = b & 0b11111111_00000000;
transmute(a | b)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(cmp))]
pub unsafe fn _mm512_kortestc(a: __mmask16, b: __mmask16) -> i32 {
let r = a | b;
if r == 0b11111111_11111111 {
1
} else {
0
}
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vptestmd))]
pub unsafe fn _mm512_test_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
let and = _mm512_and_epi32(a, b);
let zero = _mm512_setzero_si512();
_mm512_cmpneq_epi32_mask(and, zero)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vptestmd))]
pub unsafe fn _mm512_mask_test_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
let and = _mm512_and_epi32(a, b);
let zero = _mm512_setzero_si512();
_mm512_mask_cmpneq_epi32_mask(k, and, zero)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vptestmd))]
pub unsafe fn _mm256_test_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
let and = _mm256_and_si256(a, b);
let zero = _mm256_setzero_si256();
_mm256_cmpneq_epi32_mask(and, zero)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vptestmd))]
pub unsafe fn _mm256_mask_test_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
let and = _mm256_and_si256(a, b);
let zero = _mm256_setzero_si256();
_mm256_mask_cmpneq_epi32_mask(k, and, zero)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vptestmd))]
pub unsafe fn _mm_test_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
let and = _mm_and_si128(a, b);
let zero = _mm_setzero_si128();
_mm_cmpneq_epi32_mask(and, zero)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vptestmd))]
pub unsafe fn _mm_mask_test_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
let and = _mm_and_si128(a, b);
let zero = _mm_setzero_si128();
_mm_mask_cmpneq_epi32_mask(k, and, zero)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vptestmq))]
pub unsafe fn _mm512_test_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
let and = _mm512_and_epi64(a, b);
let zero = _mm512_setzero_si512();
_mm512_cmpneq_epi64_mask(and, zero)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vptestmq))]
pub unsafe fn _mm512_mask_test_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
let and = _mm512_and_epi64(a, b);
let zero = _mm512_setzero_si512();
_mm512_mask_cmpneq_epi64_mask(k, and, zero)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vptestmq))]
pub unsafe fn _mm256_test_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
let and = _mm256_and_si256(a, b);
let zero = _mm256_setzero_si256();
_mm256_cmpneq_epi64_mask(and, zero)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vptestmq))]
pub unsafe fn _mm256_mask_test_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
let and = _mm256_and_si256(a, b);
let zero = _mm256_setzero_si256();
_mm256_mask_cmpneq_epi64_mask(k, and, zero)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vptestmq))]
pub unsafe fn _mm_test_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
let and = _mm_and_si128(a, b);
let zero = _mm_setzero_si128();
_mm_cmpneq_epi64_mask(and, zero)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vptestmq))]
pub unsafe fn _mm_mask_test_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
let and = _mm_and_si128(a, b);
let zero = _mm_setzero_si128();
_mm_mask_cmpneq_epi64_mask(k, and, zero)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vptestnmd))]
pub unsafe fn _mm512_testn_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
let and = _mm512_and_epi32(a, b);
let zero = _mm512_setzero_si512();
_mm512_cmpeq_epi32_mask(and, zero)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vptestnmd))]
pub unsafe fn _mm512_mask_testn_epi32_mask(k: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
let and = _mm512_and_epi32(a, b);
let zero = _mm512_setzero_si512();
_mm512_mask_cmpeq_epi32_mask(k, and, zero)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vptestnmd))]
pub unsafe fn _mm256_testn_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
let and = _mm256_and_si256(a, b);
let zero = _mm256_setzero_si256();
_mm256_cmpeq_epi32_mask(and, zero)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vptestnmd))]
pub unsafe fn _mm256_mask_testn_epi32_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
let and = _mm256_and_si256(a, b);
let zero = _mm256_setzero_si256();
_mm256_mask_cmpeq_epi32_mask(k, and, zero)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vptestnmd))]
pub unsafe fn _mm_testn_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
let and = _mm_and_si128(a, b);
let zero = _mm_setzero_si128();
_mm_cmpeq_epi32_mask(and, zero)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vptestnmd))]
pub unsafe fn _mm_mask_testn_epi32_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
let and = _mm_and_si128(a, b);
let zero = _mm_setzero_si128();
_mm_mask_cmpeq_epi32_mask(k, and, zero)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vptestnmq))]
pub unsafe fn _mm512_testn_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
let and = _mm512_and_epi64(a, b);
let zero = _mm512_setzero_si512();
_mm512_cmpeq_epi64_mask(and, zero)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vptestnmq))]
pub unsafe fn _mm512_mask_testn_epi64_mask(k: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
let and = _mm512_and_epi64(a, b);
let zero = _mm512_setzero_si512();
_mm512_mask_cmpeq_epi64_mask(k, and, zero)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vptestnmq))]
pub unsafe fn _mm256_testn_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
let and = _mm256_and_si256(a, b);
let zero = _mm256_setzero_si256();
_mm256_cmpeq_epi64_mask(and, zero)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vptestnmq))]
pub unsafe fn _mm256_mask_testn_epi64_mask(k: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
let and = _mm256_and_si256(a, b);
let zero = _mm256_setzero_si256();
_mm256_mask_cmpeq_epi64_mask(k, and, zero)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vptestnmq))]
pub unsafe fn _mm_testn_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
let and = _mm_and_si128(a, b);
let zero = _mm_setzero_si128();
_mm_cmpeq_epi64_mask(and, zero)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vptestnmq))]
pub unsafe fn _mm_mask_testn_epi64_mask(k: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
let and = _mm_and_si128(a, b);
let zero = _mm_setzero_si128();
_mm_mask_cmpeq_epi64_mask(k, and, zero)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovntps))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn _mm512_stream_ps(mem_addr: *mut f32, a: __m512) {
intrinsics::nontemporal_store(mem_addr as *mut __m512, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovntps))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn _mm512_stream_pd(mem_addr: *mut f64, a: __m512d) {
intrinsics::nontemporal_store(mem_addr as *mut __m512d, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovntps))]
#[allow(clippy::cast_ptr_alignment)]
pub unsafe fn _mm512_stream_si512(mem_addr: *mut i64, a: __m512i) {
intrinsics::nontemporal_store(mem_addr as *mut __m512i, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set_ps(
e0: f32,
e1: f32,
e2: f32,
e3: f32,
e4: f32,
e5: f32,
e6: f32,
e7: f32,
e8: f32,
e9: f32,
e10: f32,
e11: f32,
e12: f32,
e13: f32,
e14: f32,
e15: f32,
) -> __m512 {
_mm512_setr_ps(
e15, e14, e13, e12, e11, e10, e9, e8, e7, e6, e5, e4, e3, e2, e1, e0,
)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr_ps(
e0: f32,
e1: f32,
e2: f32,
e3: f32,
e4: f32,
e5: f32,
e6: f32,
e7: f32,
e8: f32,
e9: f32,
e10: f32,
e11: f32,
e12: f32,
e13: f32,
e14: f32,
e15: f32,
) -> __m512 {
let r = f32x16::new(
e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set1_pd(a: f64) -> __m512d {
transmute(f64x8::splat(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set1_ps(a: f32) -> __m512 {
transmute(f32x16::splat(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set_epi32(
e15: i32,
e14: i32,
e13: i32,
e12: i32,
e11: i32,
e10: i32,
e9: i32,
e8: i32,
e7: i32,
e6: i32,
e5: i32,
e4: i32,
e3: i32,
e2: i32,
e1: i32,
e0: i32,
) -> __m512i {
_mm512_setr_epi32(
e0, e1, e2, e3, e4, e5, e6, e7, e8, e9, e10, e11, e12, e13, e14, e15,
)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set1_epi8(a: i8) -> __m512i {
transmute(i8x64::splat(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set1_epi16(a: i16) -> __m512i {
transmute(i16x32::splat(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set1_epi32(a: i32) -> __m512i {
transmute(i32x16::splat(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpbroadcastd))]
pub unsafe fn _mm512_mask_set1_epi32(src: __m512i, k: __mmask16, a: i32) -> __m512i {
let r = _mm512_set1_epi32(a).as_i32x16();
transmute(simd_select_bitmask(k, r, src.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpbroadcastd))]
pub unsafe fn _mm512_maskz_set1_epi32(k: __mmask16, a: i32) -> __m512i {
let r = _mm512_set1_epi32(a).as_i32x16();
let zero = _mm512_setzero_si512().as_i32x16();
transmute(simd_select_bitmask(k, r, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcastd))]
pub unsafe fn _mm256_mask_set1_epi32(src: __m256i, k: __mmask8, a: i32) -> __m256i {
let r = _mm256_set1_epi32(a).as_i32x8();
transmute(simd_select_bitmask(k, r, src.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcastd))]
pub unsafe fn _mm256_maskz_set1_epi32(k: __mmask8, a: i32) -> __m256i {
let r = _mm256_set1_epi32(a).as_i32x8();
let zero = _mm256_setzero_si256().as_i32x8();
transmute(simd_select_bitmask(k, r, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcastd))]
pub unsafe fn _mm_mask_set1_epi32(src: __m128i, k: __mmask8, a: i32) -> __m128i {
let r = _mm_set1_epi32(a).as_i32x4();
transmute(simd_select_bitmask(k, r, src.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcastd))]
pub unsafe fn _mm_maskz_set1_epi32(k: __mmask8, a: i32) -> __m128i {
let r = _mm_set1_epi32(a).as_i32x4();
let zero = _mm_setzero_si128().as_i32x4();
transmute(simd_select_bitmask(k, r, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set1_epi64(a: i64) -> __m512i {
transmute(i64x8::splat(a))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpbroadcastq))]
pub unsafe fn _mm512_mask_set1_epi64(src: __m512i, k: __mmask8, a: i64) -> __m512i {
let r = _mm512_set1_epi64(a).as_i64x8();
transmute(simd_select_bitmask(k, r, src.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpbroadcastq))]
pub unsafe fn _mm512_maskz_set1_epi64(k: __mmask8, a: i64) -> __m512i {
let r = _mm512_set1_epi64(a).as_i64x8();
let zero = _mm512_setzero_si512().as_i64x8();
transmute(simd_select_bitmask(k, r, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcastq))]
pub unsafe fn _mm256_mask_set1_epi64(src: __m256i, k: __mmask8, a: i64) -> __m256i {
let r = _mm256_set1_epi64x(a).as_i64x4();
transmute(simd_select_bitmask(k, r, src.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcastq))]
pub unsafe fn _mm256_maskz_set1_epi64(k: __mmask8, a: i64) -> __m256i {
let r = _mm256_set1_epi64x(a).as_i64x4();
let zero = _mm256_setzero_si256().as_i64x4();
transmute(simd_select_bitmask(k, r, zero))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcastq))]
pub unsafe fn _mm_mask_set1_epi64(src: __m128i, k: __mmask8, a: i64) -> __m128i {
let r = _mm_set1_epi64x(a).as_i64x2();
transmute(simd_select_bitmask(k, r, src.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpbroadcastq))]
pub unsafe fn _mm_maskz_set1_epi64(k: __mmask8, a: i64) -> __m128i {
let r = _mm_set1_epi64x(a).as_i64x2();
let zero = _mm_setzero_si128().as_i64x2();
transmute(simd_select_bitmask(k, r, zero))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
let r = i64x8::new(d, c, b, a, d, c, b, a);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr4_epi64(d: i64, c: i64, b: i64, a: i64) -> __m512i {
let r = i64x8::new(a, b, c, d, a, b, c, d);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmplt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_LT_OS)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmplt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(k1, a, b, _CMP_LT_OS)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpnlt_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_NLT_US)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpnlt_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(k1, a, b, _CMP_NLT_US)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmple_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_LE_OS)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmple_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(k1, a, b, _CMP_LE_OS)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpnle_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_NLE_US)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpnle_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(k1, a, b, _CMP_NLE_US)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpeq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_EQ_OQ)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpeq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(k1, a, b, _CMP_EQ_OQ)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpneq_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_NEQ_UQ)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpneq_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(k1, a, b, _CMP_NEQ_UQ)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm512_cmp_ps_mask(a: __m512, b: __m512, imm8: i32) -> __mmask16 {
let neg_one = -1;
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm5:expr) => {
vcmpps(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm512_mask_cmp_ps_mask(k1: __mmask16, a: __m512, b: __m512, imm8: i32) -> __mmask16 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm5:expr) => {
vcmpps(a, b, $imm5, k1 as i16, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm256_cmp_ps_mask(a: __m256, b: __m256, imm8: i32) -> __mmask8 {
let neg_one = -1;
let a = a.as_f32x8();
let b = b.as_f32x8();
macro_rules! call {
($imm5:expr) => {
vcmpps256(a, b, $imm5, neg_one)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm256_mask_cmp_ps_mask(k1: __mmask8, a: __m256, b: __m256, imm8: i32) -> __mmask8 {
let a = a.as_f32x8();
let b = b.as_f32x8();
macro_rules! call {
($imm5:expr) => {
vcmpps256(a, b, $imm5, k1 as i8)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm_cmp_ps_mask(a: __m128, b: __m128, imm8: i32) -> __mmask8 {
let neg_one = -1;
let a = a.as_f32x4();
let b = b.as_f32x4();
macro_rules! call {
($imm5:expr) => {
vcmpps128(a, b, $imm5, neg_one)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm_mask_cmp_ps_mask(k1: __mmask8, a: __m128, b: __m128, imm8: i32) -> __mmask8 {
let a = a.as_f32x4();
let b = b.as_f32x4();
macro_rules! call {
($imm5:expr) => {
vcmpps128(a, b, $imm5, k1 as i8)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2, 3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
pub unsafe fn _mm512_cmp_round_ps_mask(a: __m512, b: __m512, imm8: i32, sae: i32) -> __mmask16 {
let neg_one = -1;
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmpps(a, b, $imm5, neg_one, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3, 4)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
pub unsafe fn _mm512_mask_cmp_round_ps_mask(
m: __mmask16,
a: __m512,
b: __m512,
imm8: i32,
sae: i32,
) -> __mmask16 {
let a = a.as_f32x16();
let b = b.as_f32x16();
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmpps(a, b, $imm5, m as i16, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_ORD_Q)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(k1, a, b, _CMP_ORD_Q)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpunord_ps_mask(a: __m512, b: __m512) -> __mmask16 {
_mm512_cmp_ps_mask(a, b, _CMP_UNORD_Q)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpunord_ps_mask(k1: __mmask16, a: __m512, b: __m512) -> __mmask16 {
_mm512_mask_cmp_ps_mask(k1, a, b, _CMP_UNORD_Q)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmplt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_LT_OS)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmplt_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(k1, a, b, _CMP_LT_OS)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpnlt_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_NLT_US)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpnlt_pd_mask(m: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(m, a, b, _CMP_NLT_US)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmple_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_LE_OS)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmple_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(k1, a, b, _CMP_LE_OS)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpnle_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_NLE_US)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpnle_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(k1, a, b, _CMP_NLE_US)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpeq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_EQ_OQ)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpeq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(k1, a, b, _CMP_EQ_OQ)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpneq_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_NEQ_UQ)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpneq_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(k1, a, b, _CMP_NEQ_UQ)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm512_cmp_pd_mask(a: __m512d, b: __m512d, imm8: i32) -> __mmask8 {
let neg_one = -1;
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm5:expr) => {
vcmppd(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm512_mask_cmp_pd_mask(k1: __mmask8, a: __m512d, b: __m512d, imm8: i32) -> __mmask8 {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm5:expr) => {
vcmppd(a, b, $imm5, k1 as i8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm256_cmp_pd_mask(a: __m256d, b: __m256d, imm8: i32) -> __mmask8 {
let neg_one = -1;
let a = a.as_f64x4();
let b = b.as_f64x4();
macro_rules! call {
($imm5:expr) => {
vcmppd256(a, b, $imm5, neg_one)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm256_mask_cmp_pd_mask(k1: __mmask8, a: __m256d, b: __m256d, imm8: i32) -> __mmask8 {
let a = a.as_f64x4();
let b = b.as_f64x4();
macro_rules! call {
($imm5:expr) => {
vcmppd256(a, b, $imm5, k1 as i8)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm_cmp_pd_mask(a: __m128d, b: __m128d, imm8: i32) -> __mmask8 {
let neg_one = -1;
let a = a.as_f64x2();
let b = b.as_f64x2();
macro_rules! call {
($imm5:expr) => {
vcmppd128(a, b, $imm5, neg_one)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm_mask_cmp_pd_mask(k1: __mmask8, a: __m128d, b: __m128d, imm8: i32) -> __mmask8 {
let a = a.as_f64x2();
let b = b.as_f64x2();
macro_rules! call {
($imm5:expr) => {
vcmppd128(a, b, $imm5, k1 as i8)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2, 3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
pub unsafe fn _mm512_cmp_round_pd_mask(a: __m512d, b: __m512d, imm8: i32, sae: i32) -> __mmask8 {
let neg_one = -1;
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmppd(a, b, $imm5, neg_one, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3, 4)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
pub unsafe fn _mm512_mask_cmp_round_pd_mask(
k1: __mmask8,
a: __m512d,
b: __m512d,
imm8: i32,
sae: i32,
) -> __mmask8 {
let a = a.as_f64x8();
let b = b.as_f64x8();
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmppd(a, b, $imm5, k1 as i8, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_ORD_Q)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(k1, a, b, _CMP_ORD_Q)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_cmpunord_pd_mask(a: __m512d, b: __m512d) -> __mmask8 {
_mm512_cmp_pd_mask(a, b, _CMP_UNORD_Q)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp))]
pub unsafe fn _mm512_mask_cmpunord_pd_mask(k1: __mmask8, a: __m512d, b: __m512d) -> __mmask8 {
_mm512_mask_cmp_pd_mask(k1, a, b, _CMP_UNORD_Q)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm_cmp_ss_mask(a: __m128, b: __m128, imm8: i32) -> __mmask8 {
let neg_one = -1;
macro_rules! call {
($imm5:expr) => {
vcmpss(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm_mask_cmp_ss_mask(k1: __mmask8, a: __m128, b: __m128, imm8: i32) -> __mmask8 {
macro_rules! call {
($imm5:expr) => {
vcmpss(a, b, $imm5, k1 as i8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2, 3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
pub unsafe fn _mm_cmp_round_ss_mask(a: __m128, b: __m128, imm8: i32, sae: i32) -> __mmask8 {
let neg_one = -1;
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmpss(a, b, $imm5, neg_one, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3, 4)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
pub unsafe fn _mm_mask_cmp_round_ss_mask(
k1: __mmask8,
a: __m128,
b: __m128,
imm8: i32,
sae: i32,
) -> __mmask8 {
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmpss(a, b, $imm5, k1 as i8, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm_cmp_sd_mask(a: __m128d, b: __m128d, imm8: i32) -> __mmask8 {
let neg_one = -1;
macro_rules! call {
($imm5:expr) => {
vcmpsd(a, b, $imm5, neg_one, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0))]
pub unsafe fn _mm_mask_cmp_sd_mask(k1: __mmask8, a: __m128d, b: __m128d, imm8: i32) -> __mmask8 {
macro_rules! call {
($imm5:expr) => {
vcmpsd(a, b, $imm5, k1 as i8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm5!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2, 3)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
pub unsafe fn _mm_cmp_round_sd_mask(a: __m128d, b: __m128d, imm8: i32, sae: i32) -> __mmask8 {
let neg_one = -1;
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmpsd(a, b, $imm5, neg_one, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3, 4)]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 0, sae = 4))]
pub unsafe fn _mm_mask_cmp_round_sd_mask(
k1: __mmask8,
a: __m128d,
b: __m128d,
imm8: i32,
sae: i32,
) -> __mmask8 {
macro_rules! call {
($imm5:expr, $imm4:expr) => {
vcmpsd(a, b, $imm5, k1 as i8, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmplt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<u32x16, _>(simd_lt(a.as_u32x16(), b.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmplt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmplt_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmplt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<u32x8, _>(simd_lt(a.as_u32x8(), b.as_u32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmplt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmplt_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmplt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<u32x4, _>(simd_lt(a.as_u32x4(), b.as_u32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmplt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmplt_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpgt_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<u32x16, _>(simd_gt(a.as_u32x16(), b.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpgt_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmpgt_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmpgt_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<u32x8, _>(simd_gt(a.as_u32x8(), b.as_u32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmpgt_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmpgt_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<u32x4, _>(simd_gt(a.as_u32x4(), b.as_u32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpgt_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmpgt_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmple_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<u32x16, _>(simd_le(a.as_u32x16(), b.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmple_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmple_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmple_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<u32x8, _>(simd_le(a.as_u32x8(), b.as_u32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmple_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmple_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmple_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<u32x4, _>(simd_le(a.as_u32x4(), b.as_u32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmple_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmple_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpge_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<u32x16, _>(simd_ge(a.as_u32x16(), b.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpge_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmpge_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmpge_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<u32x8, _>(simd_ge(a.as_u32x8(), b.as_u32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpge_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmpge_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmpge_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<u32x4, _>(simd_ge(a.as_u32x4(), b.as_u32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpge_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmpge_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpeq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<u32x16, _>(simd_eq(a.as_u32x16(), b.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpeq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmpeq_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmpeq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<u32x8, _>(simd_eq(a.as_u32x8(), b.as_u32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmpeq_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmpeq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<u32x4, _>(simd_eq(a.as_u32x4(), b.as_u32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpeq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmpeq_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpneq_epu32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<u32x16, _>(simd_ne(a.as_u32x16(), b.as_u32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpneq_epu32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmpneq_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmpneq_epu32_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<u32x8, _>(simd_ne(a.as_u32x8(), b.as_u32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmpneq_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmpneq_epu32_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<u32x4, _>(simd_ne(a.as_u32x4(), b.as_u32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpneq_epu32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmpneq_epu32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm512_cmp_epu32_mask(a: __m512i, b: __m512i, imm8: _MM_CMPINT_ENUM) -> __mmask16 {
let neg_one = -1;
let a = a.as_i32x16();
let b = b.as_i32x16();
macro_rules! call {
($imm3:expr) => {
vpcmpud(a, b, $imm3, neg_one)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm512_mask_cmp_epu32_mask(
k1: __mmask16,
a: __m512i,
b: __m512i,
imm8: _MM_CMPINT_ENUM,
) -> __mmask16 {
let a = a.as_i32x16();
let b = b.as_i32x16();
macro_rules! call {
($imm3:expr) => {
vpcmpud(a, b, $imm3, k1 as i16)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm256_cmp_epu32_mask(a: __m256i, b: __m256i, imm8: _MM_CMPINT_ENUM) -> __mmask8 {
let neg_one = -1;
let a = a.as_i32x8();
let b = b.as_i32x8();
macro_rules! call {
($imm3:expr) => {
vpcmpud256(a, b, $imm3, neg_one)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm256_mask_cmp_epu32_mask(
k1: __mmask8,
a: __m256i,
b: __m256i,
imm8: _MM_CMPINT_ENUM,
) -> __mmask8 {
let a = a.as_i32x8();
let b = b.as_i32x8();
macro_rules! call {
($imm3:expr) => {
vpcmpud256(a, b, $imm3, k1 as i8)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm_cmp_epu32_mask(a: __m128i, b: __m128i, imm8: _MM_CMPINT_ENUM) -> __mmask8 {
let neg_one = -1;
let a = a.as_i32x4();
let b = b.as_i32x4();
macro_rules! call {
($imm3:expr) => {
vpcmpud128(a, b, $imm3, neg_one)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm_mask_cmp_epu32_mask(
k1: __mmask8,
a: __m128i,
b: __m128i,
imm8: _MM_CMPINT_ENUM,
) -> __mmask8 {
let a = a.as_i32x4();
let b = b.as_i32x4();
macro_rules! call {
($imm3:expr) => {
vpcmpud128(a, b, $imm3, k1 as i8)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmplt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<i32x16, _>(simd_lt(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmplt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmplt_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmplt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<i32x8, _>(simd_lt(a.as_i32x8(), b.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmplt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmplt_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmplt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<i32x4, _>(simd_lt(a.as_i32x4(), b.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmplt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmplt_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpgt_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<i32x16, _>(simd_gt(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpgt_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmpgt_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmpgt_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<i32x8, _>(simd_gt(a.as_i32x8(), b.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmpgt_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmpgt_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<i32x4, _>(simd_gt(a.as_i32x4(), b.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpgt_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmpgt_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmple_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<i32x16, _>(simd_le(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmple_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmple_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmple_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<i32x8, _>(simd_le(a.as_i32x8(), b.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmple_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmple_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmple_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<i32x4, _>(simd_le(a.as_i32x4(), b.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmple_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmple_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpge_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<i32x16, _>(simd_ge(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpge_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmpge_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmpge_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<i32x8, _>(simd_ge(a.as_i32x8(), b.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpge_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmpge_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmpge_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<i32x4, _>(simd_ge(a.as_i32x4(), b.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpge_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmpge_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpeq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<i32x16, _>(simd_eq(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpeq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmpeq_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmpeq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<i32x8, _>(simd_eq(a.as_i32x8(), b.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmpeq_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmpeq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<i32x4, _>(simd_eq(a.as_i32x4(), b.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpeq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmpeq_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpneq_epi32_mask(a: __m512i, b: __m512i) -> __mmask16 {
simd_bitmask::<i32x16, _>(simd_ne(a.as_i32x16(), b.as_i32x16()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpneq_epi32_mask(k1: __mmask16, a: __m512i, b: __m512i) -> __mmask16 {
_mm512_cmpneq_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmpneq_epi32_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<i32x8, _>(simd_ne(a.as_i32x8(), b.as_i32x8()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmpneq_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmpneq_epi32_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<i32x4, _>(simd_ne(a.as_i32x4(), b.as_i32x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpneq_epi32_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmpneq_epi32_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm512_cmp_epi32_mask(a: __m512i, b: __m512i, imm8: _MM_CMPINT_ENUM) -> __mmask16 {
let neg_one = -1;
let a = a.as_i32x16();
let b = b.as_i32x16();
macro_rules! call {
($imm3:expr) => {
vpcmpd(a, b, $imm3, neg_one)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm512_mask_cmp_epi32_mask(
k1: __mmask16,
a: __m512i,
b: __m512i,
imm8: _MM_CMPINT_ENUM,
) -> __mmask16 {
let a = a.as_i32x16();
let b = b.as_i32x16();
macro_rules! call {
($imm3:expr) => {
vpcmpd(a, b, $imm3, k1 as i16)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm256_cmp_epi32_mask(a: __m256i, b: __m256i, imm8: _MM_CMPINT_ENUM) -> __mmask8 {
let neg_one = -1;
let a = a.as_i32x8();
let b = b.as_i32x8();
macro_rules! call {
($imm3:expr) => {
vpcmpd256(a, b, $imm3, neg_one)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm256_mask_cmp_epi32_mask(
k1: __mmask8,
a: __m256i,
b: __m256i,
imm8: _MM_CMPINT_ENUM,
) -> __mmask8 {
let a = a.as_i32x8();
let b = b.as_i32x8();
macro_rules! call {
($imm3:expr) => {
vpcmpd256(a, b, $imm3, k1 as i8)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm_cmp_epi32_mask(a: __m128i, b: __m128i, imm8: _MM_CMPINT_ENUM) -> __mmask8 {
let neg_one = -1;
let a = a.as_i32x4();
let b = b.as_i32x4();
macro_rules! call {
($imm3:expr) => {
vpcmpd128(a, b, $imm3, neg_one)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm_mask_cmp_epi32_mask(
k1: __mmask8,
a: __m128i,
b: __m128i,
imm8: _MM_CMPINT_ENUM,
) -> __mmask8 {
let a = a.as_i32x4();
let b = b.as_i32x4();
macro_rules! call {
($imm3:expr) => {
vpcmpd128(a, b, $imm3, k1 as i8)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmplt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_lt(a.as_u64x8(), b.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmplt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmplt_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmplt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<__m256i, _>(simd_lt(a.as_u64x4(), b.as_u64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmplt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmplt_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmplt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<__m128i, _>(simd_lt(a.as_u64x2(), b.as_u64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmplt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmplt_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpgt_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_gt(a.as_u64x8(), b.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpgt_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmpgt_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<__m256i, _>(simd_gt(a.as_u64x4(), b.as_u64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmpgt_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmpgt_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<__m128i, _>(simd_gt(a.as_u64x2(), b.as_u64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpgt_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmpgt_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmple_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_le(a.as_u64x8(), b.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmple_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmple_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmple_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<__m256i, _>(simd_le(a.as_u64x4(), b.as_u64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmple_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmple_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmple_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<__m128i, _>(simd_le(a.as_u64x2(), b.as_u64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmple_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmple_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpge_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_ge(a.as_u64x8(), b.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpge_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpge_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmpge_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<__m256i, _>(simd_ge(a.as_u64x4(), b.as_u64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpge_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmpge_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmpge_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<__m128i, _>(simd_ge(a.as_u64x2(), b.as_u64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpge_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmpge_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpeq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_eq(a.as_u64x8(), b.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpeq_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmpeq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<__m256i, _>(simd_eq(a.as_u64x4(), b.as_u64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmpeq_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmpeq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<__m128i, _>(simd_eq(a.as_u64x2(), b.as_u64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpeq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmpeq_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpneq_epu64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_ne(a.as_u64x8(), b.as_u64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpneq_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmpneq_epu64_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<__m256i, _>(simd_ne(a.as_u64x4(), b.as_u64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmpneq_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmpneq_epu64_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<__m128i, _>(simd_ne(a.as_u64x2(), b.as_u64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpneq_epu64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmpneq_epu64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm512_cmp_epu64_mask(a: __m512i, b: __m512i, imm8: _MM_CMPINT_ENUM) -> __mmask8 {
let neg_one = -1;
let a = a.as_i64x8();
let b = b.as_i64x8();
macro_rules! call {
($imm3:expr) => {
vpcmpuq(a, b, $imm3, neg_one)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm512_mask_cmp_epu64_mask(
k1: __mmask8,
a: __m512i,
b: __m512i,
imm8: _MM_CMPINT_ENUM,
) -> __mmask8 {
let a = a.as_i64x8();
let b = b.as_i64x8();
macro_rules! call {
($imm3:expr) => {
vpcmpuq(a, b, $imm3, k1 as i8)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm256_cmp_epu64_mask(a: __m256i, b: __m256i, imm8: _MM_CMPINT_ENUM) -> __mmask8 {
let neg_one = -1;
let a = a.as_i64x4();
let b = b.as_i64x4();
macro_rules! call {
($imm3:expr) => {
vpcmpuq256(a, b, $imm3, neg_one)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm256_mask_cmp_epu64_mask(
k1: __mmask8,
a: __m256i,
b: __m256i,
imm8: _MM_CMPINT_ENUM,
) -> __mmask8 {
let a = a.as_i64x4();
let b = b.as_i64x4();
macro_rules! call {
($imm3:expr) => {
vpcmpuq256(a, b, $imm3, k1 as i8)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm_cmp_epu64_mask(a: __m128i, b: __m128i, imm8: _MM_CMPINT_ENUM) -> __mmask8 {
let neg_one = -1;
let a = a.as_i64x2();
let b = b.as_i64x2();
macro_rules! call {
($imm3:expr) => {
vpcmpuq128(a, b, $imm3, neg_one)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm_mask_cmp_epu64_mask(
k1: __mmask8,
a: __m128i,
b: __m128i,
imm8: _MM_CMPINT_ENUM,
) -> __mmask8 {
let a = a.as_i64x2();
let b = b.as_i64x2();
macro_rules! call {
($imm3:expr) => {
vpcmpuq128(a, b, $imm3, k1 as i8)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmplt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_lt(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmplt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmplt_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmplt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<__m256i, _>(simd_lt(a.as_i64x4(), b.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmplt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmplt_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmplt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<__m128i, _>(simd_lt(a.as_i64x2(), b.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmplt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmplt_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpgt_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_gt(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpgt_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmpgt_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<__m256i, _>(simd_gt(a.as_i64x4(), b.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmpgt_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmpgt_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<__m128i, _>(simd_gt(a.as_i64x2(), b.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpgt_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmpgt_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmple_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_le(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmple_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmple_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmple_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<__m256i, _>(simd_le(a.as_i64x4(), b.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmple_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmple_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmple_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<__m128i, _>(simd_le(a.as_i64x2(), b.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmple_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmple_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpge_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_ge(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpge_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpge_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmpge_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<__m256i, _>(simd_ge(a.as_i64x4(), b.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpge_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmpge_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmpge_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<__m128i, _>(simd_ge(a.as_i64x2(), b.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpge_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmpge_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpeq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_eq(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpeq_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmpeq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<__m256i, _>(simd_eq(a.as_i64x4(), b.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmpeq_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmpeq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<__m128i, _>(simd_eq(a.as_i64x2(), b.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpeq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmpeq_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_cmpneq_epi64_mask(a: __m512i, b: __m512i) -> __mmask8 {
simd_bitmask::<__m512i, _>(simd_ne(a.as_i64x8(), b.as_i64x8()))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm512_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m512i, b: __m512i) -> __mmask8 {
_mm512_cmpneq_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_cmpneq_epi64_mask(a: __m256i, b: __m256i) -> __mmask8 {
simd_bitmask::<__m256i, _>(simd_ne(a.as_i64x4(), b.as_i64x4()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm256_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m256i, b: __m256i) -> __mmask8 {
_mm256_cmpneq_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_cmpneq_epi64_mask(a: __m128i, b: __m128i) -> __mmask8 {
simd_bitmask::<__m128i, _>(simd_ne(a.as_i64x2(), b.as_i64x2()))
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpcmp))]
pub unsafe fn _mm_mask_cmpneq_epi64_mask(k1: __mmask8, a: __m128i, b: __m128i) -> __mmask8 {
_mm_cmpneq_epi64_mask(a, b) & k1
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm512_cmp_epi64_mask(a: __m512i, b: __m512i, imm8: _MM_CMPINT_ENUM) -> __mmask8 {
let neg_one = -1;
let a = a.as_i64x8();
let b = b.as_i64x8();
macro_rules! call {
($imm3:expr) => {
vpcmpq(a, b, $imm3, neg_one)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm512_mask_cmp_epi64_mask(
k1: __mmask8,
a: __m512i,
b: __m512i,
imm8: _MM_CMPINT_ENUM,
) -> __mmask8 {
let a = a.as_i64x8();
let b = b.as_i64x8();
macro_rules! call {
($imm3:expr) => {
vpcmpq(a, b, $imm3, k1 as i8)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm256_cmp_epi64_mask(a: __m256i, b: __m256i, imm8: _MM_CMPINT_ENUM) -> __mmask8 {
let neg_one = -1;
let a = a.as_i64x4();
let b = b.as_i64x4();
macro_rules! call {
($imm3:expr) => {
vpcmpq256(a, b, $imm3, neg_one)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm256_mask_cmp_epi64_mask(
k1: __mmask8,
a: __m256i,
b: __m256i,
imm8: _MM_CMPINT_ENUM,
) -> __mmask8 {
let a = a.as_i64x4();
let b = b.as_i64x4();
macro_rules! call {
($imm3:expr) => {
vpcmpq256(a, b, $imm3, k1 as i8)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(2)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm_cmp_epi64_mask(a: __m128i, b: __m128i, imm8: _MM_CMPINT_ENUM) -> __mmask8 {
let neg_one = -1;
let a = a.as_i64x2();
let b = b.as_i64x2();
macro_rules! call {
($imm3:expr) => {
vpcmpq128(a, b, $imm3, neg_one)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[rustc_args_required_const(3)]
#[cfg_attr(test, assert_instr(vpcmp, imm8 = 0))]
pub unsafe fn _mm_mask_cmp_epi64_mask(
k1: __mmask8,
a: __m128i,
b: __m128i,
imm8: _MM_CMPINT_ENUM,
) -> __mmask8 {
let a = a.as_i64x2();
let b = b.as_i64x2();
macro_rules! call {
($imm3:expr) => {
vpcmpq128(a, b, $imm3, k1 as i8)
};
}
let r = constify_imm3!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_add_epi32(a: __m512i) -> i32 {
simd_reduce_add_unordered(a.as_i32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_add_epi32(k: __mmask16, a: __m512i) -> i32 {
simd_reduce_add_unordered(simd_select_bitmask(
k,
a.as_i32x16(),
_mm512_setzero_si512().as_i32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_add_epi64(a: __m512i) -> i64 {
simd_reduce_add_unordered(a.as_i64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_add_epi64(k: __mmask8, a: __m512i) -> i64 {
simd_reduce_add_unordered(simd_select_bitmask(
k,
a.as_i64x8(),
_mm512_setzero_si512().as_i64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_add_ps(a: __m512) -> f32 {
simd_reduce_add_unordered(a.as_f32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_add_ps(k: __mmask16, a: __m512) -> f32 {
simd_reduce_add_unordered(simd_select_bitmask(
k,
a.as_f32x16(),
_mm512_setzero_ps().as_f32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_add_pd(a: __m512d) -> f64 {
simd_reduce_add_unordered(a.as_f64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_add_pd(k: __mmask8, a: __m512d) -> f64 {
simd_reduce_add_unordered(simd_select_bitmask(
k,
a.as_f64x8(),
_mm512_setzero_pd().as_f64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_mul_epi32(a: __m512i) -> i32 {
simd_reduce_mul_unordered(a.as_i32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_mul_epi32(k: __mmask16, a: __m512i) -> i32 {
simd_reduce_mul_unordered(simd_select_bitmask(
k,
a.as_i32x16(),
_mm512_set1_epi32(1).as_i32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_mul_epi64(a: __m512i) -> i64 {
simd_reduce_mul_unordered(a.as_i64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_mul_epi64(k: __mmask8, a: __m512i) -> i64 {
simd_reduce_mul_unordered(simd_select_bitmask(
k,
a.as_i64x8(),
_mm512_set1_epi64(1).as_i64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_mul_ps(a: __m512) -> f32 {
simd_reduce_mul_unordered(a.as_f32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_mul_ps(k: __mmask16, a: __m512) -> f32 {
simd_reduce_mul_unordered(simd_select_bitmask(
k,
a.as_f32x16(),
_mm512_set1_ps(1.).as_f32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_mul_pd(a: __m512d) -> f64 {
simd_reduce_mul_unordered(a.as_f64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_mul_pd(k: __mmask8, a: __m512d) -> f64 {
simd_reduce_mul_unordered(simd_select_bitmask(
k,
a.as_f64x8(),
_mm512_set1_pd(1.).as_f64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_max_epi32(a: __m512i) -> i32 {
simd_reduce_max(a.as_i32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_max_epi32(k: __mmask16, a: __m512i) -> i32 {
simd_reduce_max(simd_select_bitmask(
k,
a.as_i32x16(),
_mm512_undefined_epi32().as_i32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_max_epi64(a: __m512i) -> i64 {
simd_reduce_max(a.as_i64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_max_epi64(k: __mmask8, a: __m512i) -> i64 {
simd_reduce_max(simd_select_bitmask(
k,
a.as_i64x8(),
_mm512_set1_epi64(0).as_i64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_max_epu32(a: __m512i) -> u32 {
simd_reduce_max(a.as_u32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_max_epu32(k: __mmask16, a: __m512i) -> u32 {
simd_reduce_max(simd_select_bitmask(
k,
a.as_u32x16(),
_mm512_undefined_epi32().as_u32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_max_epu64(a: __m512i) -> u64 {
simd_reduce_max(a.as_u64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_max_epu64(k: __mmask8, a: __m512i) -> u64 {
simd_reduce_max(simd_select_bitmask(
k,
a.as_u64x8(),
_mm512_set1_epi64(0).as_u64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_max_ps(a: __m512) -> f32 {
simd_reduce_max(a.as_f32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_max_ps(k: __mmask16, a: __m512) -> f32 {
simd_reduce_max(simd_select_bitmask(
k,
a.as_f32x16(),
_mm512_undefined_ps().as_f32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_max_pd(a: __m512d) -> f64 {
simd_reduce_max(a.as_f64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_max_pd(k: __mmask8, a: __m512d) -> f64 {
simd_reduce_max(simd_select_bitmask(
k,
a.as_f64x8(),
_mm512_undefined_pd().as_f64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_min_epi32(a: __m512i) -> i32 {
simd_reduce_min(a.as_i32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_min_epi32(k: __mmask16, a: __m512i) -> i32 {
simd_reduce_min(simd_select_bitmask(
k,
a.as_i32x16(),
_mm512_undefined_epi32().as_i32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_min_epi64(a: __m512i) -> i64 {
simd_reduce_min(a.as_i64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_min_epi64(k: __mmask8, a: __m512i) -> i64 {
simd_reduce_min(simd_select_bitmask(
k,
a.as_i64x8(),
_mm512_set1_epi64(0).as_i64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_min_epu32(a: __m512i) -> u32 {
simd_reduce_min(a.as_u32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_min_epu32(k: __mmask16, a: __m512i) -> u32 {
simd_reduce_min(simd_select_bitmask(
k,
a.as_u32x16(),
_mm512_undefined_epi32().as_u32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_min_epu64(a: __m512i) -> u64 {
simd_reduce_min(a.as_u64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_min_epu64(k: __mmask8, a: __m512i) -> u64 {
simd_reduce_min(simd_select_bitmask(
k,
a.as_u64x8(),
_mm512_set1_epi64(0).as_u64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_min_ps(a: __m512) -> f32 {
simd_reduce_min(a.as_f32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_min_ps(k: __mmask16, a: __m512) -> f32 {
simd_reduce_min(simd_select_bitmask(
k,
a.as_f32x16(),
_mm512_undefined_ps().as_f32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_min_pd(a: __m512d) -> f64 {
simd_reduce_min(a.as_f64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_min_pd(k: __mmask8, a: __m512d) -> f64 {
simd_reduce_min(simd_select_bitmask(
k,
a.as_f64x8(),
_mm512_undefined_pd().as_f64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_and_epi32(a: __m512i) -> i32 {
simd_reduce_and(a.as_i32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_and_epi32(k: __mmask16, a: __m512i) -> i32 {
simd_reduce_and(simd_select_bitmask(
k,
a.as_i32x16(),
_mm512_set1_epi32(0xFF).as_i32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_and_epi64(a: __m512i) -> i64 {
simd_reduce_and(a.as_i64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_and_epi64(k: __mmask8, a: __m512i) -> i64 {
simd_reduce_and(simd_select_bitmask(
k,
a.as_i64x8(),
_mm512_set1_epi64(1 << 0 | 1 << 1 | 1 << 2 | 1 << 3 | 1 << 4 | 1 << 5 | 1 << 6 | 1 << 7)
.as_i64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_or_epi32(a: __m512i) -> i32 {
simd_reduce_or(a.as_i32x16())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_or_epi32(k: __mmask16, a: __m512i) -> i32 {
simd_reduce_or(simd_select_bitmask(
k,
a.as_i32x16(),
_mm512_setzero_si512().as_i32x16(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_reduce_or_epi64(a: __m512i) -> i64 {
simd_reduce_or(a.as_i64x8())
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_mask_reduce_or_epi64(k: __mmask8, a: __m512i) -> i64 {
simd_reduce_or(simd_select_bitmask(
k,
a.as_i64x8(),
_mm512_setzero_si512().as_i64x8(),
))
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_undefined_pd() -> __m512d {
_mm512_set1_pd(0.0)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_undefined_ps() -> __m512 {
_mm512_set1_ps(0.0)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_undefined_epi32() -> __m512i {
_mm512_set1_epi32(0)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_undefined() -> __m512 {
_mm512_set1_ps(0.0)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_loadu_epi32(mem_addr: *const i32) -> __m512i {
ptr::read_unaligned(mem_addr as *const __m512i)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm256_loadu_epi32(mem_addr: *const i32) -> __m256i {
ptr::read_unaligned(mem_addr as *const __m256i)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm_loadu_epi32(mem_addr: *const i32) -> __m128i {
ptr::read_unaligned(mem_addr as *const __m128i)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovdw))]
pub unsafe fn _mm512_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
vpmovdwmem(mem_addr as *mut i8, a.as_i32x16(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovdw))]
pub unsafe fn _mm256_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
vpmovdwmem256(mem_addr as *mut i8, a.as_i32x8(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovdw))]
pub unsafe fn _mm_mask_cvtepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovdwmem128(mem_addr as *mut i8, a.as_i32x4(), k);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsdw))]
pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
vpmovsdwmem(mem_addr as *mut i8, a.as_i32x16(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsdw))]
pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
vpmovsdwmem256(mem_addr as *mut i8, a.as_i32x8(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsdw))]
pub unsafe fn _mm_mask_cvtsepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovsdwmem128(mem_addr as *mut i8, a.as_i32x4(), k);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusdw))]
pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
vpmovusdwmem(mem_addr as *mut i8, a.as_i32x16(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusdw))]
pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
vpmovusdwmem256(mem_addr as *mut i8, a.as_i32x8(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusdw))]
pub unsafe fn _mm_mask_cvtusepi32_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovusdwmem128(mem_addr as *mut i8, a.as_i32x4(), k);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovdb))]
pub unsafe fn _mm512_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
vpmovdbmem(mem_addr as *mut i8, a.as_i32x16(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovdb))]
pub unsafe fn _mm256_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
vpmovdbmem256(mem_addr as *mut i8, a.as_i32x8(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovdb))]
pub unsafe fn _mm_mask_cvtepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovdbmem128(mem_addr as *mut i8, a.as_i32x4(), k);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsdb))]
pub unsafe fn _mm512_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
vpmovsdbmem(mem_addr as *mut i8, a.as_i32x16(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsdb))]
pub unsafe fn _mm256_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
vpmovsdbmem256(mem_addr as *mut i8, a.as_i32x8(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsdb))]
pub unsafe fn _mm_mask_cvtsepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovsdbmem128(mem_addr as *mut i8, a.as_i32x4(), k);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusdb))]
pub unsafe fn _mm512_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask16, a: __m512i) {
vpmovusdbmem(mem_addr as *mut i8, a.as_i32x16(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusdb))]
pub unsafe fn _mm256_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
vpmovusdbmem256(mem_addr as *mut i8, a.as_i32x8(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusdb))]
pub unsafe fn _mm_mask_cvtusepi32_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovusdbmem128(mem_addr as *mut i8, a.as_i32x4(), k);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqw))]
pub unsafe fn _mm512_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
vpmovqwmem(mem_addr as *mut i8, a.as_i64x8(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqw))]
pub unsafe fn _mm256_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
vpmovqwmem256(mem_addr as *mut i8, a.as_i64x4(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqw))]
pub unsafe fn _mm_mask_cvtepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovqwmem128(mem_addr as *mut i8, a.as_i64x2(), k);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqw))]
pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
vpmovsqwmem(mem_addr as *mut i8, a.as_i64x8(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqw))]
pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
vpmovsqwmem256(mem_addr as *mut i8, a.as_i64x4(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqw))]
pub unsafe fn _mm_mask_cvtsepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovsqwmem128(mem_addr as *mut i8, a.as_i64x2(), k);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqw))]
pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
vpmovusqwmem(mem_addr as *mut i8, a.as_i64x8(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqw))]
pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
vpmovusqwmem256(mem_addr as *mut i8, a.as_i64x4(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqw))]
pub unsafe fn _mm_mask_cvtusepi64_storeu_epi16(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovusqwmem128(mem_addr as *mut i8, a.as_i64x2(), k);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqb))]
pub unsafe fn _mm512_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
vpmovqbmem(mem_addr as *mut i8, a.as_i64x8(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqb))]
pub unsafe fn _mm256_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
vpmovqbmem256(mem_addr as *mut i8, a.as_i64x4(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqb))]
pub unsafe fn _mm_mask_cvtepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovqbmem128(mem_addr as *mut i8, a.as_i64x2(), k);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqb))]
pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
vpmovsqbmem(mem_addr as *mut i8, a.as_i64x8(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqb))]
pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
vpmovsqbmem256(mem_addr as *mut i8, a.as_i64x4(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqb))]
pub unsafe fn _mm_mask_cvtsepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovsqbmem128(mem_addr as *mut i8, a.as_i64x2(), k);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqb))]
pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
vpmovusqbmem(mem_addr as *mut i8, a.as_i64x8(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqb))]
pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
vpmovusqbmem256(mem_addr as *mut i8, a.as_i64x4(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqb))]
pub unsafe fn _mm_mask_cvtusepi64_storeu_epi8(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovusqbmem128(mem_addr as *mut i8, a.as_i64x2(), k);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovqd))]
pub unsafe fn _mm512_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
vpmovqdmem(mem_addr as *mut i8, a.as_i64x8(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqd))]
pub unsafe fn _mm256_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
vpmovqdmem256(mem_addr as *mut i8, a.as_i64x4(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovqd))]
pub unsafe fn _mm_mask_cvtepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovqdmem128(mem_addr as *mut i8, a.as_i64x2(), k);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovsqd))]
pub unsafe fn _mm512_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
vpmovsqdmem(mem_addr as *mut i8, a.as_i64x8(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqd))]
pub unsafe fn _mm256_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
vpmovsqdmem256(mem_addr as *mut i8, a.as_i64x4(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovsqd))]
pub unsafe fn _mm_mask_cvtsepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovsqdmem128(mem_addr as *mut i8, a.as_i64x2(), k);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vpmovusqd))]
pub unsafe fn _mm512_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m512i) {
vpmovusqdmem(mem_addr as *mut i8, a.as_i64x8(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqd))]
pub unsafe fn _mm256_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m256i) {
vpmovusqdmem256(mem_addr as *mut i8, a.as_i64x4(), k);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vpmovusqd))]
pub unsafe fn _mm_mask_cvtusepi64_storeu_epi32(mem_addr: *mut i8, k: __mmask8, a: __m128i) {
vpmovusqdmem128(mem_addr as *mut i8, a.as_i64x2(), k);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_storeu_epi32(mem_addr: *mut i32, a: __m512i) {
ptr::write_unaligned(mem_addr as *mut __m512i, a);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm256_storeu_epi32(mem_addr: *mut i32, a: __m256i) {
ptr::write_unaligned(mem_addr as *mut __m256i, a);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm_storeu_epi32(mem_addr: *mut i32, a: __m128i) {
ptr::write_unaligned(mem_addr as *mut __m128i, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_loadu_epi64(mem_addr: *const i64) -> __m512i {
ptr::read_unaligned(mem_addr as *const __m512i)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm256_loadu_epi64(mem_addr: *const i64) -> __m256i {
ptr::read_unaligned(mem_addr as *const __m256i)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm_loadu_epi64(mem_addr: *const i64) -> __m128i {
ptr::read_unaligned(mem_addr as *const __m128i)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_storeu_epi64(mem_addr: *mut i64, a: __m512i) {
ptr::write_unaligned(mem_addr as *mut __m512i, a);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm256_storeu_epi64(mem_addr: *mut i64, a: __m256i) {
ptr::write_unaligned(mem_addr as *mut __m256i, a);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm_storeu_epi64(mem_addr: *mut i64, a: __m128i) {
ptr::write_unaligned(mem_addr as *mut __m128i, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_loadu_si512(mem_addr: *const i32) -> __m512i {
ptr::read_unaligned(mem_addr as *const __m512i)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_storeu_si512(mem_addr: *mut i32, a: __m512i) {
ptr::write_unaligned(mem_addr as *mut __m512i, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_loadu_pd(mem_addr: *const f64) -> __m512d {
ptr::read_unaligned(mem_addr as *const __m512d)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_storeu_pd(mem_addr: *mut f64, a: __m512d) {
ptr::write_unaligned(mem_addr as *mut __m512d, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
pub unsafe fn _mm512_loadu_ps(mem_addr: *const f32) -> __m512 {
ptr::read_unaligned(mem_addr as *const __m512)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovups))]
#[stable(feature = "simd_x86", since = "1.27.0")]
pub unsafe fn _mm512_storeu_ps(mem_addr: *mut f32, a: __m512) {
ptr::write_unaligned(mem_addr as *mut __m512, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_load_si512(mem_addr: *const i32) -> __m512i {
ptr::read(mem_addr as *const __m512i)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_store_si512(mem_addr: *mut i32, a: __m512i) {
ptr::write(mem_addr as *mut __m512i, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_load_epi32(mem_addr: *const i32) -> __m512i {
ptr::read(mem_addr as *const __m512i)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm256_load_epi32(mem_addr: *const i32) -> __m256i {
ptr::read(mem_addr as *const __m256i)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm_load_epi32(mem_addr: *const i32) -> __m128i {
ptr::read(mem_addr as *const __m128i)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_store_epi32(mem_addr: *mut i32, a: __m512i) {
ptr::write(mem_addr as *mut __m512i, a);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm256_store_epi32(mem_addr: *mut i32, a: __m256i) {
ptr::write(mem_addr as *mut __m256i, a);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm_store_epi32(mem_addr: *mut i32, a: __m128i) {
ptr::write(mem_addr as *mut __m128i, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_load_epi64(mem_addr: *const i64) -> __m512i {
ptr::read(mem_addr as *const __m512i)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm256_load_epi64(mem_addr: *const i64) -> __m256i {
ptr::read(mem_addr as *const __m256i)
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm_load_epi64(mem_addr: *const i64) -> __m128i {
ptr::read(mem_addr as *const __m128i)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_store_epi64(mem_addr: *mut i64, a: __m512i) {
ptr::write(mem_addr as *mut __m512i, a);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm256_store_epi64(mem_addr: *mut i64, a: __m256i) {
ptr::write(mem_addr as *mut __m256i, a);
}
#[inline]
#[target_feature(enable = "avx512f,avx512vl")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm_store_epi64(mem_addr: *mut i64, a: __m128i) {
ptr::write(mem_addr as *mut __m128i, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_load_ps(mem_addr: *const f32) -> __m512 {
ptr::read(mem_addr as *const __m512)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_store_ps(mem_addr: *mut f32, a: __m512) {
ptr::write(mem_addr as *mut __m512, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_load_pd(mem_addr: *const f64) -> __m512d {
ptr::read(mem_addr as *const __m512d)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovaps))]
pub unsafe fn _mm512_store_pd(mem_addr: *mut f64, a: __m512d) {
ptr::write(mem_addr as *mut __m512d, a);
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_setr_pd(
e0: f64,
e1: f64,
e2: f64,
e3: f64,
e4: f64,
e5: f64,
e6: f64,
e7: f64,
) -> __m512d {
let r = f64x8::new(e0, e1, e2, e3, e4, e5, e6, e7);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
pub unsafe fn _mm512_set_pd(
e0: f64,
e1: f64,
e2: f64,
e3: f64,
e4: f64,
e5: f64,
e6: f64,
e7: f64,
) -> __m512d {
_mm512_setr_pd(e7, e6, e5, e4, e3, e2, e1, e0)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovss))]
pub unsafe fn _mm_mask_move_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let extractsrc: f32 = simd_extract(src, 0);
let mut mov: f32 = extractsrc;
if (k & 0b00000001) != 0 {
mov = simd_extract(b, 0);
}
let r = simd_insert(a, 0, mov);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovss))]
pub unsafe fn _mm_maskz_move_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let mut mov: f32 = 0.;
if (k & 0b00000001) != 0 {
mov = simd_extract(b, 0);
}
let r = simd_insert(a, 0, mov);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovsd))]
pub unsafe fn _mm_mask_move_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let extractsrc: f64 = simd_extract(src, 0);
let mut mov: f64 = extractsrc;
if (k & 0b00000001) != 0 {
mov = simd_extract(b, 0);
}
let r = simd_insert(a, 0, mov);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmovsd))]
pub unsafe fn _mm_maskz_move_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let mut mov: f64 = 0.;
if (k & 0b00000001) != 0 {
mov = simd_extract(b, 0);
}
let r = simd_insert(a, 0, mov);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddss))]
pub unsafe fn _mm_mask_add_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let extractsrc: f32 = simd_extract(src, 0);
let mut add: f32 = extractsrc;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
add = extracta + extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddss))]
pub unsafe fn _mm_maskz_add_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let mut add: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
add = extracta + extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddsd))]
pub unsafe fn _mm_mask_add_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let extractsrc: f64 = simd_extract(src, 0);
let mut add: f64 = extractsrc;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
add = extracta + extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddsd))]
pub unsafe fn _mm_maskz_add_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let mut add: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
add = extracta + extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubss))]
pub unsafe fn _mm_mask_sub_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let extractsrc: f32 = simd_extract(src, 0);
let mut add: f32 = extractsrc;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
add = extracta - extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubss))]
pub unsafe fn _mm_maskz_sub_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let mut add: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
add = extracta - extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubsd))]
pub unsafe fn _mm_mask_sub_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let extractsrc: f64 = simd_extract(src, 0);
let mut add: f64 = extractsrc;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
add = extracta - extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubsd))]
pub unsafe fn _mm_maskz_sub_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let mut add: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
add = extracta - extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulss))]
pub unsafe fn _mm_mask_mul_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let extractsrc: f32 = simd_extract(src, 0);
let mut add: f32 = extractsrc;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
add = extracta * extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulss))]
pub unsafe fn _mm_maskz_mul_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let mut add: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
add = extracta * extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulsd))]
pub unsafe fn _mm_mask_mul_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let extractsrc: f64 = simd_extract(src, 0);
let mut add: f64 = extractsrc;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
add = extracta * extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulsd))]
pub unsafe fn _mm_maskz_mul_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let mut add: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
add = extracta * extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivss))]
pub unsafe fn _mm_mask_div_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let extractsrc: f32 = simd_extract(src, 0);
let mut add: f32 = extractsrc;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
add = extracta / extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivss))]
pub unsafe fn _mm_maskz_div_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
let mut add: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
add = extracta / extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivsd))]
pub unsafe fn _mm_mask_div_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let extractsrc: f64 = simd_extract(src, 0);
let mut add: f64 = extractsrc;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
add = extracta / extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivsd))]
pub unsafe fn _mm_maskz_div_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
let mut add: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
add = extracta / extractb;
}
let r = simd_insert(a, 0, add);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxss))]
pub unsafe fn _mm_mask_max_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vmaxss(
a.as_f32x4(),
b.as_f32x4(),
src.as_f32x4(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxss))]
pub unsafe fn _mm_maskz_max_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vmaxss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxsd))]
pub unsafe fn _mm_mask_max_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vmaxsd(
a.as_f64x2(),
b.as_f64x2(),
src.as_f64x2(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxsd))]
pub unsafe fn _mm_maskz_max_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vmaxsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminss))]
pub unsafe fn _mm_mask_min_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vminss(
a.as_f32x4(),
b.as_f32x4(),
src.as_f32x4(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminss))]
pub unsafe fn _mm_maskz_min_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vminss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminsd))]
pub unsafe fn _mm_mask_min_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vminsd(
a.as_f64x2(),
b.as_f64x2(),
src.as_f64x2(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminsd))]
pub unsafe fn _mm_maskz_min_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vminsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtss))]
pub unsafe fn _mm_mask_sqrt_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vsqrtss(
a.as_f32x4(),
b.as_f32x4(),
src.as_f32x4(),
k,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtss))]
pub unsafe fn _mm_maskz_sqrt_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vsqrtss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtsd))]
pub unsafe fn _mm_mask_sqrt_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vsqrtsd(
a.as_f64x2(),
b.as_f64x2(),
src.as_f64x2(),
k,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtsd))]
pub unsafe fn _mm_maskz_sqrt_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vsqrtsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14ss))]
pub unsafe fn _mm_rsqrt14_ss(a: __m128, b: __m128) -> __m128 {
transmute(vrsqrt14ss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
0b1,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14ss))]
pub unsafe fn _mm_mask_rsqrt14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vrsqrt14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14ss))]
pub unsafe fn _mm_maskz_rsqrt14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vrsqrt14ss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14sd))]
pub unsafe fn _mm_rsqrt14_sd(a: __m128d, b: __m128d) -> __m128d {
transmute(vrsqrt14sd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b1,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14sd))]
pub unsafe fn _mm_mask_rsqrt14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vrsqrt14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrsqrt14sd))]
pub unsafe fn _mm_maskz_rsqrt14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vrsqrt14sd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14ss))]
pub unsafe fn _mm_rcp14_ss(a: __m128, b: __m128) -> __m128 {
transmute(vrcp14ss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
0b1,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14ss))]
pub unsafe fn _mm_mask_rcp14_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vrcp14ss(a.as_f32x4(), b.as_f32x4(), src.as_f32x4(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14ss))]
pub unsafe fn _mm_maskz_rcp14_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vrcp14ss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14sd))]
pub unsafe fn _mm_rcp14_sd(a: __m128d, b: __m128d) -> __m128d {
transmute(vrcp14sd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b1,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14sd))]
pub unsafe fn _mm_mask_rcp14_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vrcp14sd(a.as_f64x2(), b.as_f64x2(), src.as_f64x2(), k))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrcp14sd))]
pub unsafe fn _mm_maskz_rcp14_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vrcp14sd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpss))]
pub unsafe fn _mm_getexp_ss(a: __m128, b: __m128) -> __m128 {
transmute(vgetexpss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
0b1,
_MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpss))]
pub unsafe fn _mm_mask_getexp_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vgetexpss(
a.as_f32x4(),
b.as_f32x4(),
src.as_f32x4(),
k,
_MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpss))]
pub unsafe fn _mm_maskz_getexp_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vgetexpss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
_MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpsd))]
pub unsafe fn _mm_getexp_sd(a: __m128d, b: __m128d) -> __m128d {
transmute(vgetexpsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b1,
_MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpsd))]
pub unsafe fn _mm_mask_getexp_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vgetexpsd(
a.as_f64x2(),
b.as_f64x2(),
src.as_f64x2(),
k,
_MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpsd))]
pub unsafe fn _mm_maskz_getexp_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vgetexpsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
_MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm_getmant_ss(
a: __m128,
b: __m128,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4_1:expr, $imm2:expr) => {
vgetmantss(
a,
b,
$imm2 << 2 | $imm4_1,
zero,
0b1,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm_mask_getmant_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let src = src.as_f32x4();
macro_rules! call {
($imm4_1:expr, $imm2:expr) => {
vgetmantss(a, b, $imm2 << 2 | $imm4_1, src, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm_maskz_getmant_ss(
k: __mmask8,
a: __m128,
b: __m128,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4_1:expr, $imm2:expr) => {
vgetmantss(
a,
b,
$imm2 << 2 | $imm4_1,
zero,
k,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm_getmant_sd(
a: __m128d,
b: __m128d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4_1:expr, $imm2:expr) => {
vgetmantsd(
a,
b,
$imm2 << 2 | $imm4_1,
zero,
0b1,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm_mask_getmant_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let src = src.as_f64x2();
macro_rules! call {
($imm4_1:expr, $imm2:expr) => {
vgetmantsd(a, b, $imm2 << 2 | $imm4_1, src, k, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm_maskz_getmant_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4_1:expr, $imm2:expr) => {
vgetmantsd(
a,
b,
$imm2 << 2 | $imm4_1,
zero,
k,
_MM_FROUND_CUR_DIRECTION,
)
};
}
let r = constify_imm4_mantissas!(norm, sign, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaless, imm8 = 255))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_roundscale_ss(a: __m128, b: __m128, imm8: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm8:expr) => {
vrndscaless(a, b, zero, 0b11111111, $imm8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_roundscale_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
imm8: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let src = src.as_f32x4();
macro_rules! call {
($imm8:expr) => {
vrndscaless(a, b, src, k, $imm8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_roundscale_ss(k: __mmask8, a: __m128, b: __m128, imm8: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm8:expr) => {
vrndscaless(a, b, zero, k, $imm8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 255))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_roundscale_sd(a: __m128d, b: __m128d, imm8: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm8:expr) => {
vrndscalesd(a, b, zero, 0b11111111, $imm8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_roundscale_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
imm8: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let src = src.as_f64x2();
macro_rules! call {
($imm8:expr) => {
vrndscalesd(a, b, src, k, $imm8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_roundscale_sd(k: __mmask8, a: __m128d, b: __m128d, imm8: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm8:expr) => {
vrndscalesd(a, b, zero, k, $imm8, _MM_FROUND_CUR_DIRECTION)
};
}
let r = constify_imm8_sae!(imm8, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefss))]
pub unsafe fn _mm_scalef_ss(a: __m128, b: __m128) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
transmute(vscalefss(a, b, zero, 0b11111111, _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefss))]
pub unsafe fn _mm_mask_scalef_ss(src: __m128, k: __mmask8, a: __m128, b: __m128) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let src = src.as_f32x4();
transmute(vscalefss(a, b, src, k, _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefss))]
pub unsafe fn _mm_maskz_scalef_ss(k: __mmask8, a: __m128, b: __m128) -> __m128 {
transmute(vscalefss(
a.as_f32x4(),
b.as_f32x4(),
_mm_setzero_ps().as_f32x4(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefsd))]
pub unsafe fn _mm_scalef_sd(a: __m128d, b: __m128d) -> __m128d {
transmute(vscalefsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
0b11111111,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefsd))]
pub unsafe fn _mm_mask_scalef_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vscalefsd(
a.as_f64x2(),
b.as_f64x2(),
src.as_f64x2(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefsd))]
pub unsafe fn _mm_maskz_scalef_sd(k: __mmask8, a: __m128d, b: __m128d) -> __m128d {
transmute(vscalefsd(
a.as_f64x2(),
b.as_f64x2(),
_mm_setzero_pd().as_f64x2(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213ss))]
pub unsafe fn _mm_mask_fmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
let mut fmadd: f32 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
fmadd = vfmadd132ss(fmadd, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213ss))]
pub unsafe fn _mm_maskz_fmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
let mut fmadd: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
fmadd = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213ss))]
pub unsafe fn _mm_mask3_fmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
let mut fmadd: f32 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
fmadd = vfmadd132ss(extracta, extractb, fmadd, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(c, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213sd))]
pub unsafe fn _mm_mask_fmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
let mut fmadd: f64 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
fmadd = vfmadd132sd(fmadd, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213sd))]
pub unsafe fn _mm_maskz_fmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let mut fmadd: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
fmadd = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213sd))]
pub unsafe fn _mm_mask3_fmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
let mut fmadd: f64 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
fmadd = vfmadd132sd(extracta, extractb, fmadd, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(c, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213ss))]
pub unsafe fn _mm_mask_fmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
let mut fmsub: f32 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
fmsub = vfmadd132ss(fmsub, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213ss))]
pub unsafe fn _mm_maskz_fmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
let mut fmsub: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
fmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213ss))]
pub unsafe fn _mm_mask3_fmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
let mut fmsub: f32 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
let extractc = -fmsub;
fmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(c, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213sd))]
pub unsafe fn _mm_mask_fmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
let mut fmsub: f64 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
fmsub = vfmadd132sd(fmsub, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213sd))]
pub unsafe fn _mm_maskz_fmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let mut fmsub: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
fmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213sd))]
pub unsafe fn _mm_mask3_fmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
let mut fmsub: f64 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
let extractc = -fmsub;
fmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(c, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213ss))]
pub unsafe fn _mm_mask_fnmadd_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
let mut fnmadd: f32 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extracta = -fnmadd;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
fnmadd = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213ss))]
pub unsafe fn _mm_maskz_fnmadd_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
let mut fnmadd: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
fnmadd = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213ss))]
pub unsafe fn _mm_mask3_fnmadd_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
let mut fnmadd: f32 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
fnmadd = vfmadd132ss(extracta, extractb, fnmadd, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(c, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213sd))]
pub unsafe fn _mm_mask_fnmadd_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
let mut fnmadd: f64 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extracta = -fnmadd;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
fnmadd = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213sd))]
pub unsafe fn _mm_maskz_fnmadd_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let mut fnmadd: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
fnmadd = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213sd))]
pub unsafe fn _mm_mask3_fnmadd_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
let mut fnmadd: f64 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
fnmadd = vfmadd132sd(extracta, extractb, fnmadd, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(c, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213ss))]
pub unsafe fn _mm_mask_fnmsub_ss(a: __m128, k: __mmask8, b: __m128, c: __m128) -> __m128 {
let mut fnmsub: f32 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extracta = -fnmsub;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
fnmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213ss))]
pub unsafe fn _mm_maskz_fnmsub_ss(k: __mmask8, a: __m128, b: __m128, c: __m128) -> __m128 {
let mut fnmsub: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
fnmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213ss))]
pub unsafe fn _mm_mask3_fnmsub_ss(a: __m128, b: __m128, c: __m128, k: __mmask8) -> __m128 {
let mut fnmsub: f32 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
let extractc = -fnmsub;
fnmsub = vfmadd132ss(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(c, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213sd))]
pub unsafe fn _mm_mask_fnmsub_sd(a: __m128d, k: __mmask8, b: __m128d, c: __m128d) -> __m128d {
let mut fnmsub: f64 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extracta = -fnmsub;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
fnmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213sd))]
pub unsafe fn _mm_maskz_fnmsub_sd(k: __mmask8, a: __m128d, b: __m128d, c: __m128d) -> __m128d {
let mut fnmsub: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
fnmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213sd))]
pub unsafe fn _mm_mask3_fnmsub_sd(a: __m128d, b: __m128d, c: __m128d, k: __mmask8) -> __m128d {
let mut fnmsub: f64 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
let extractc = -fnmsub;
fnmsub = vfmadd132sd(extracta, extractb, extractc, _MM_FROUND_CUR_DIRECTION);
}
let r = simd_insert(c, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_add_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vaddss(a, b, zero, 0b1, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_add_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
rounding: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let src = src.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vaddss(a, b, src, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_add_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vaddss(a, b, zero, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddsd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_add_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vaddsd(a, b, zero, 0b1, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddsd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_add_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let src = src.as_f64x2();
macro_rules! call {
($imm4:expr) => {
vaddsd(a, b, src, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vaddsd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_add_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vaddsd(a, b, zero, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_sub_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vsubss(a, b, zero, 0b1, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_sub_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
rounding: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let src = src.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vsubss(a, b, src, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_sub_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vsubss(a, b, zero, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubsd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_sub_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vsubsd(a, b, zero, 0b1, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubsd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_sub_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let src = src.as_f64x2();
macro_rules! call {
($imm4:expr) => {
vsubsd(a, b, src, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsubsd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_sub_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vsubsd(a, b, zero, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_mul_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vmulss(a, b, zero, 0b1, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_mul_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
rounding: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let src = src.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vmulss(a, b, src, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_mul_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vmulss(a, b, zero, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulsd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_mul_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vmulsd(a, b, zero, 0b1, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulsd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_mul_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let src = src.as_f64x2();
macro_rules! call {
($imm4:expr) => {
vmulsd(a, b, src, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmulsd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_mul_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vmulsd(a, b, zero, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_div_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vdivss(a, b, zero, 0b1, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_div_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
rounding: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let src = src.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vdivss(a, b, src, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_div_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vdivss(a, b, zero, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivsd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_div_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vdivsd(a, b, zero, 0b1, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivsd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_div_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let src = src.as_f64x2();
macro_rules! call {
($imm4:expr) => {
vdivsd(a, b, src, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vdivsd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_div_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vdivsd(a, b, zero, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxss, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_max_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vmaxss(a, b, zero, 0b1, $imm4)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxss, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_max_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
sae: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let src = src.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vmaxss(a, b, src, k, $imm4)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxss, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_max_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vmaxss(a, b, zero, k, $imm4)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxsd, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_max_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vmaxsd(a, b, zero, 0b1, $imm4)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxsd, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_max_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
sae: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let src = src.as_f64x2();
macro_rules! call {
($imm4:expr) => {
vmaxsd(a, b, src, k, $imm4)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vmaxsd, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_max_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vmaxsd(a, b, zero, k, $imm4)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminss, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_min_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vminss(a, b, zero, 0b1, $imm4)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminss, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_min_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
sae: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let src = src.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vminss(a, b, src, k, $imm4)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminss, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_min_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vminss(a, b, zero, k, $imm4)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminsd, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_min_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vminsd(a, b, zero, 0b1, $imm4)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminsd, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_min_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
sae: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let src = src.as_f64x2();
macro_rules! call {
($imm4:expr) => {
vminsd(a, b, src, k, $imm4)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vminsd, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_min_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vminsd(a, b, zero, k, $imm4)
};
}
transmute(constify_imm4_sae!(sae, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_sqrt_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vsqrtss(a, b, zero, 0b1, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_sqrt_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
rounding: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let src = src.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vsqrtss(a, b, src, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_sqrt_round_ss(k: __mmask8, a: __m128, b: __m128, rounding: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vsqrtss(a, b, zero, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtsd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_sqrt_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vsqrtsd(a, b, zero, 0b1, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtsd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_sqrt_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let src = src.as_f64x2();
macro_rules! call {
($imm4:expr) => {
vsqrtsd(a, b, src, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vsqrtsd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_sqrt_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vsqrtsd(a, b, zero, k, $imm4)
};
}
transmute(constify_imm4_round!(rounding, call))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpss, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_getexp_round_ss(a: __m128, b: __m128, sae: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vgetexpss(a, b, zero, 0b1, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpss, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_getexp_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
sae: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let src = src.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vgetexpss(a, b, src, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpss, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_getexp_round_ss(k: __mmask8, a: __m128, b: __m128, sae: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vgetexpss(a, b, zero, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpsd, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_getexp_round_sd(a: __m128d, b: __m128d, sae: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vgetexpsd(a, b, zero, 0b1, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpsd, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_getexp_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
sae: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let src = src.as_f64x2();
macro_rules! call {
($imm4:expr) => {
vgetexpsd(a, b, src, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetexpsd, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_getexp_round_sd(k: __mmask8, a: __m128d, b: __m128d, sae: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vgetexpsd(a, b, zero, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(2, 3, 4)]
pub unsafe fn _mm_getmant_round_ss(
a: __m128,
b: __m128,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantss(a, b, $imm2 << 2 | $imm4_1, zero, 0b1, $imm4_2)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(4, 5, 6)]
pub unsafe fn _mm_mask_getmant_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let src = src.as_f32x4();
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantss(a, b, $imm2 << 2 | $imm4_1, src, k, $imm4_2)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantss, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(3, 4, 5)]
pub unsafe fn _mm_maskz_getmant_round_ss(
k: __mmask8,
a: __m128,
b: __m128,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantss(a, b, $imm2 << 2 | $imm4_1, zero, k, $imm4_2)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(2, 3, 4)]
pub unsafe fn _mm_getmant_round_sd(
a: __m128d,
b: __m128d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantsd(a, b, $imm2 << 2 | $imm4_1, zero, 0b1, $imm4_2)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(4, 5, 6)]
pub unsafe fn _mm_mask_getmant_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let src = src.as_f64x2();
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantsd(a, b, $imm2 << 2 | $imm4_1, src, k, $imm4_2)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vgetmantsd, norm = 0, sign = 0, sae = 4))]
#[rustc_args_required_const(3, 4, 5)]
pub unsafe fn _mm_maskz_getmant_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
norm: _MM_MANTISSA_NORM_ENUM,
sign: _MM_MANTISSA_SIGN_ENUM,
sae: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4_1:expr, $imm2:expr, $imm4_2:expr) => {
vgetmantsd(a, b, $imm2 << 2 | $imm4_1, zero, k, $imm4_2)
};
}
let r = constify_imm4_mantissas_sae!(norm, sign, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0, sae = 8))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm_roundscale_round_ss(a: __m128, b: __m128, imm8: i32, sae: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscaless(a, b, zero, 0b11111111, $imm8, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm_mask_roundscale_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
imm8: i32,
sae: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let src = src.as_f32x4();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscaless(a, b, src, k, $imm8, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscaless, imm8 = 0, sae = 8))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm_maskz_roundscale_round_ss(
k: __mmask8,
a: __m128,
b: __m128,
imm8: i32,
sae: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscaless(a, b, zero, k, $imm8, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm_roundscale_round_sd(a: __m128d, b: __m128d, imm8: i32, sae: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscalesd(a, b, zero, 0b11111111, $imm8, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm_mask_roundscale_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
imm8: i32,
sae: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let src = src.as_f64x2();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscalesd(a, b, src, k, $imm8, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vrndscalesd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm_maskz_roundscale_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
imm8: i32,
sae: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vrndscalesd(a, b, zero, k, $imm8, $imm4)
};
}
let r = constify_imm8_roundscale!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_scalef_round_ss(a: __m128, b: __m128, rounding: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vscalefss(a, b, zero, 0b11111111, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_scalef_round_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128,
rounding: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let src = src.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vscalefss(a, b, src, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_scalef_round_ss(
k: __mmask8,
a: __m128,
b: __m128,
rounding: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vscalefss(a, b, zero, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefsd, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_scalef_round_sd(a: __m128d, b: __m128d, rounding: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vscalefsd(a, b, zero, 0b11111111, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefsd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_scalef_round_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let src = src.as_f64x2();
macro_rules! call {
($imm4:expr) => {
vscalefsd(a, b, src, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vscalefsd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_scalef_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
rounding: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vscalefsd(a, b, zero, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fmadd_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
let fmadd = constify_imm4_round!(rounding, call);
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fmadd_round_ss(
a: __m128,
k: __mmask8,
b: __m128,
c: __m128,
rounding: i32,
) -> __m128 {
let mut fmadd: f32 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(fmadd, extractb, extractc, $imm4)
};
}
fmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fmadd_round_ss(
k: __mmask8,
a: __m128,
b: __m128,
c: __m128,
rounding: i32,
) -> __m128 {
let mut fmadd: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
fmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask3_fmadd_round_ss(
a: __m128,
b: __m128,
c: __m128,
k: __mmask8,
rounding: i32,
) -> __m128 {
let mut fmadd: f32 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, fmadd, $imm4)
};
}
fmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(c, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fmadd_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
let fmadd = constify_imm4_round!(rounding, call);
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fmadd_round_sd(
a: __m128d,
k: __mmask8,
b: __m128d,
c: __m128d,
rounding: i32,
) -> __m128d {
let mut fmadd: f64 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(fmadd, extractb, extractc, $imm4)
};
}
fmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fmadd_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
c: __m128d,
rounding: i32,
) -> __m128d {
let mut fmadd: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
fmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmadd213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask3_fmadd_round_sd(
a: __m128d,
b: __m128d,
c: __m128d,
k: __mmask8,
rounding: i32,
) -> __m128d {
let mut fmadd: f64 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, fmadd, $imm4)
};
}
fmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(c, 0, fmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fmsub_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
let fmsub = constify_imm4_round!(rounding, call);
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fmsub_round_ss(
a: __m128,
k: __mmask8,
b: __m128,
c: __m128,
rounding: i32,
) -> __m128 {
let mut fmsub: f32 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(fmsub, extractb, extractc, $imm4)
};
}
fmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fmsub_round_ss(
k: __mmask8,
a: __m128,
b: __m128,
c: __m128,
rounding: i32,
) -> __m128 {
let mut fmsub: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
fmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask3_fmsub_round_ss(
a: __m128,
b: __m128,
c: __m128,
k: __mmask8,
rounding: i32,
) -> __m128 {
let mut fmsub: f32 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extractb: f32 = simd_extract(b, 0);
let extractc = -fmsub;
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
fmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(c, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fmsub_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
let fmsub = constify_imm4_round!(rounding, call);
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fmsub_round_sd(
a: __m128d,
k: __mmask8,
b: __m128d,
c: __m128d,
rounding: i32,
) -> __m128d {
let mut fmsub: f64 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(fmsub, extractb, extractc, $imm4)
};
}
fmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fmsub_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
c: __m128d,
rounding: i32,
) -> __m128d {
let mut fmsub: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
fmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfmsub213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask3_fmsub_round_sd(
a: __m128d,
b: __m128d,
c: __m128d,
k: __mmask8,
rounding: i32,
) -> __m128d {
let mut fmsub: f64 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extractb: f64 = simd_extract(b, 0);
let extractc = -fmsub;
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
fmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(c, 0, fmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fnmadd_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
let fnmadd = constify_imm4_round!(rounding, call);
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fnmadd_round_ss(
a: __m128,
k: __mmask8,
b: __m128,
c: __m128,
rounding: i32,
) -> __m128 {
let mut fnmadd: f32 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extracta = -fnmadd;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
fnmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fnmadd_round_ss(
k: __mmask8,
a: __m128,
b: __m128,
c: __m128,
rounding: i32,
) -> __m128 {
let mut fnmadd: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
fnmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask3_fnmadd_round_ss(
a: __m128,
b: __m128,
c: __m128,
k: __mmask8,
rounding: i32,
) -> __m128 {
let mut fnmadd: f32 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, fnmadd, $imm4)
};
}
fnmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(c, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fnmadd_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
let fnmadd = constify_imm4_round!(rounding, call);
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fnmadd_round_sd(
a: __m128d,
k: __mmask8,
b: __m128d,
c: __m128d,
rounding: i32,
) -> __m128d {
let mut fnmadd: f64 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extracta = -fnmadd;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
fnmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fnmadd_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
c: __m128d,
rounding: i32,
) -> __m128d {
let mut fnmadd: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
fnmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmadd213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask3_fnmadd_round_sd(
a: __m128d,
b: __m128d,
c: __m128d,
k: __mmask8,
rounding: i32,
) -> __m128d {
let mut fnmadd: f64 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, fnmadd, $imm4)
};
}
fnmadd = constify_imm4_round!(rounding, call);
}
let r = simd_insert(c, 0, fnmadd);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fnmsub_round_ss(a: __m128, b: __m128, c: __m128, rounding: i32) -> __m128 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
let fnmsub = constify_imm4_round!(rounding, call);
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fnmsub_round_ss(
a: __m128,
k: __mmask8,
b: __m128,
c: __m128,
rounding: i32,
) -> __m128 {
let mut fnmsub: f32 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extracta = -fnmsub;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
fnmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fnmsub_round_ss(
k: __mmask8,
a: __m128,
b: __m128,
c: __m128,
rounding: i32,
) -> __m128 {
let mut fnmsub: f32 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
let extractc: f32 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
fnmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask3_fnmsub_round_ss(
a: __m128,
b: __m128,
c: __m128,
k: __mmask8,
rounding: i32,
) -> __m128 {
let mut fnmsub: f32 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f32 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f32 = simd_extract(b, 0);
let extractc = -fnmsub;
macro_rules! call {
($imm4:expr) => {
vfmadd132ss(extracta, extractb, extractc, $imm4)
};
}
fnmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(c, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fnmsub_round_sd(a: __m128d, b: __m128d, c: __m128d, rounding: i32) -> __m128d {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
let fnmsub = constify_imm4_round!(rounding, call);
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fnmsub_round_sd(
a: __m128d,
k: __mmask8,
b: __m128d,
c: __m128d,
rounding: i32,
) -> __m128d {
let mut fnmsub: f64 = simd_extract(a, 0);
if (k & 0b00000001) != 0 {
let extracta = -fnmsub;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
fnmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fnmsub_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
c: __m128d,
rounding: i32,
) -> __m128d {
let mut fnmsub: f64 = 0.;
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
let extractc: f64 = simd_extract(c, 0);
let extractc = -extractc;
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
fnmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(a, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfnmsub213sd, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask3_fnmsub_round_sd(
a: __m128d,
b: __m128d,
c: __m128d,
k: __mmask8,
rounding: i32,
) -> __m128d {
let mut fnmsub: f64 = simd_extract(c, 0);
if (k & 0b00000001) != 0 {
let extracta: f64 = simd_extract(a, 0);
let extracta = -extracta;
let extractb: f64 = simd_extract(b, 0);
let extractc = -fnmsub;
macro_rules! call {
($imm4:expr) => {
vfmadd132sd(extracta, extractb, extractc, $imm4)
};
}
fnmsub = constify_imm4_round!(rounding, call);
}
let r = simd_insert(c, 0, fnmsub);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fixupimm_ss(a: __m128, b: __m128, c: __m128i, imm8: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let c = c.as_i32x4();
macro_rules! call {
($imm8:expr) => {
vfixupimmss(a, b, c, $imm8, 0b11111111, _MM_FROUND_CUR_DIRECTION)
};
}
let fixupimm = constify_imm8_sae!(imm8, call);
let fixupimm: f32 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fixupimm_ss(
a: __m128,
k: __mmask8,
b: __m128,
c: __m128i,
imm8: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let c = c.as_i32x4();
macro_rules! call {
($imm8:expr) => {
vfixupimmss(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
};
}
let fixupimm = constify_imm8_sae!(imm8, call);
let fixupimm: f32 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fixupimm_ss(
k: __mmask8,
a: __m128,
b: __m128,
c: __m128i,
imm8: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let c = c.as_i32x4();
macro_rules! call {
($imm8:expr) => {
vfixupimmssz(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
};
}
let fixupimm = constify_imm8_sae!(imm8, call);
let fixupimm: f32 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_fixupimm_sd(a: __m128d, b: __m128d, c: __m128i, imm8: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let c = c.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vfixupimmsd(a, b, c, $imm8, 0b11111111, _MM_FROUND_CUR_DIRECTION)
};
}
let fixupimm = constify_imm8_sae!(imm8, call);
let fixupimm: f64 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_fixupimm_sd(
a: __m128d,
k: __mmask8,
b: __m128d,
c: __m128i,
imm8: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let c = c.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vfixupimmsd(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
};
}
let fixupimm = constify_imm8_sae!(imm8, call);
let fixupimm: f64 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_maskz_fixupimm_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
c: __m128i,
imm8: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let c = c.as_i64x2();
macro_rules! call {
($imm8:expr) => {
vfixupimmsdz(a, b, c, $imm8, k, _MM_FROUND_CUR_DIRECTION)
};
}
let fixupimm = constify_imm8_sae!(imm8, call);
let fixupimm: f64 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0, sae = 8))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm_fixupimm_round_ss(
a: __m128,
b: __m128,
c: __m128i,
imm8: i32,
sae: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let c = c.as_i32x4();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmss(a, b, c, $imm8, 0b11111111, $imm4)
};
}
let fixupimm = constify_imm8_roundscale!(imm8, sae, call);
let fixupimm: f32 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm_mask_fixupimm_round_ss(
a: __m128,
k: __mmask8,
b: __m128,
c: __m128i,
imm8: i32,
sae: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let c = c.as_i32x4();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmss(a, b, c, $imm8, k, $imm4)
};
}
let fixupimm = constify_imm8_roundscale!(imm8, sae, call);
let fixupimm: f32 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmss, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm_maskz_fixupimm_round_ss(
k: __mmask8,
a: __m128,
b: __m128,
c: __m128i,
imm8: i32,
sae: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f32x4();
let c = c.as_i32x4();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmssz(a, b, c, $imm8, k, $imm4)
};
}
let fixupimm = constify_imm8_roundscale!(imm8, sae, call);
let fixupimm: f32 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(3, 4)]
pub unsafe fn _mm_fixupimm_round_sd(
a: __m128d,
b: __m128d,
c: __m128i,
imm8: i32,
sae: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let c = c.as_i64x2();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmsd(a, b, c, $imm8, 0b11111111, $imm4)
};
}
let fixupimm = constify_imm8_roundscale!(imm8, sae, call);
let fixupimm: f64 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm_mask_fixupimm_round_sd(
a: __m128d,
k: __mmask8,
b: __m128d,
c: __m128i,
imm8: i32,
sae: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let c = c.as_i64x2();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmsd(a, b, c, $imm8, k, $imm4)
};
}
let fixupimm = constify_imm8_roundscale!(imm8, sae, call);
let fixupimm: f64 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vfixupimmsd, imm8 = 0, sae = 8))]
#[rustc_args_required_const(4, 5)]
pub unsafe fn _mm_maskz_fixupimm_round_sd(
k: __mmask8,
a: __m128d,
b: __m128d,
c: __m128i,
imm8: i32,
sae: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f64x2();
let c = c.as_i64x2();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vfixupimmsdz(a, b, c, $imm8, k, $imm4)
};
}
let fixupimm = constify_imm8_roundscale!(imm8, sae, call);
let fixupimm: f64 = simd_extract(fixupimm, 0);
let r = simd_insert(a, 0, fixupimm);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2sd))]
pub unsafe fn _mm_mask_cvtss_sd(src: __m128d, k: __mmask8, a: __m128d, b: __m128) -> __m128d {
transmute(vcvtss2sd(
a.as_f64x2(),
b.as_f32x4(),
src.as_f64x2(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2sd))]
pub unsafe fn _mm_maskz_cvtss_sd(k: __mmask8, a: __m128d, b: __m128) -> __m128d {
transmute(vcvtss2sd(
a.as_f64x2(),
b.as_f32x4(),
_mm_setzero_pd().as_f64x2(),
k,
_MM_FROUND_CUR_DIRECTION,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2ss))]
pub unsafe fn _mm_mask_cvtsd_ss(src: __m128, k: __mmask8, a: __m128, b: __m128d) -> __m128 {
transmute(vcvtsd2ss(
a.as_f32x4(),
b.as_f64x2(),
src.as_f32x4(),
k,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2ss))]
pub unsafe fn _mm_maskz_cvtsd_ss(k: __mmask8, a: __m128, b: __m128d) -> __m128 {
transmute(vcvtsd2ss(
a.as_f32x4(),
b.as_f64x2(),
_mm_setzero_ps().as_f32x4(),
k,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2sd, sae = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_cvt_roundss_sd(a: __m128d, b: __m128, sae: i32) -> __m128d {
macro_rules! call {
($imm4:expr) => {
vcvtss2sd(
a.as_f64x2(),
b.as_f32x4(),
_mm_setzero_pd().as_f64x2(),
0b11111111,
$imm4,
)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2sd, sae = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_cvt_roundss_sd(
src: __m128d,
k: __mmask8,
a: __m128d,
b: __m128,
sae: i32,
) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f32x4();
let src = src.as_f64x2();
macro_rules! call {
($imm4:expr) => {
vcvtss2sd(a, b, src, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2sd, sae = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_cvt_roundss_sd(k: __mmask8, a: __m128d, b: __m128, sae: i32) -> __m128d {
let a = a.as_f64x2();
let b = b.as_f32x4();
let zero = _mm_setzero_pd().as_f64x2();
macro_rules! call {
($imm4:expr) => {
vcvtss2sd(a, b, zero, k, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2ss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_cvt_roundsd_ss(a: __m128, b: __m128d, rounding: i32) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f64x2();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vcvtsd2ss(a, b, zero, 0b11111111, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2ss, rounding = 8))]
#[rustc_args_required_const(4)]
pub unsafe fn _mm_mask_cvt_roundsd_ss(
src: __m128,
k: __mmask8,
a: __m128,
b: __m128d,
rounding: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f64x2();
let src = src.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vcvtsd2ss(a, b, src, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2ss, rounding = 8))]
#[rustc_args_required_const(3)]
pub unsafe fn _mm_maskz_cvt_roundsd_ss(
k: __mmask8,
a: __m128,
b: __m128d,
rounding: i32,
) -> __m128 {
let a = a.as_f32x4();
let b = b.as_f64x2();
let zero = _mm_setzero_ps().as_f32x4();
macro_rules! call {
($imm4:expr) => {
vcvtsd2ss(a, b, zero, k, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvt_roundss_si32(a: __m128, rounding: i32) -> i32 {
let a = a.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vcvtss2si(a, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2si, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvt_roundss_i32(a: __m128, rounding: i32) -> i32 {
let a = a.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vcvtss2si(a, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2usi, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvt_roundss_u32(a: __m128, rounding: i32) -> u32 {
let a = a.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vcvtss2usi(a, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2si))]
pub unsafe fn _mm_cvtss_i32(a: __m128) -> i32 {
transmute(vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2usi))]
pub unsafe fn _mm_cvtss_u32(a: __m128) -> u32 {
transmute(vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvt_roundsd_si32(a: __m128d, rounding: i32) -> i32 {
let a = a.as_f64x2();
macro_rules! call {
($imm4:expr) => {
vcvtsd2si(a, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2si, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvt_roundsd_i32(a: __m128d, rounding: i32) -> i32 {
let a = a.as_f64x2();
macro_rules! call {
($imm4:expr) => {
vcvtsd2si(a, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2usi, rounding = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvt_roundsd_u32(a: __m128d, rounding: i32) -> u32 {
let a = a.as_f64x2();
macro_rules! call {
($imm4:expr) => {
vcvtsd2usi(a, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2si))]
pub unsafe fn _mm_cvtsd_i32(a: __m128d) -> i32 {
transmute(vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2usi))]
pub unsafe fn _mm_cvtsd_u32(a: __m128d) -> u32 {
transmute(vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_cvt_roundi32_ss(a: __m128, b: i32, rounding: i32) -> __m128 {
let a = a.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vcvtsi2ss(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsi2ss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_cvt_roundsi32_ss(a: __m128, b: i32, rounding: i32) -> __m128 {
let a = a.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vcvtsi2ss(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtusi2ss, rounding = 8))]
#[rustc_args_required_const(2)]
pub unsafe fn _mm_cvt_roundu32_ss(a: __m128, b: u32, rounding: i32) -> __m128 {
let a = a.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vcvtusi2ss(a, b, $imm4)
};
}
let r = constify_imm4_round!(rounding, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsi2ss))]
pub unsafe fn _mm_cvti32_ss(a: __m128, b: i32) -> __m128 {
let b = b as f32;
let r = simd_insert(a, 0, b);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsi2sd))]
pub unsafe fn _mm_cvti32_sd(a: __m128d, b: i32) -> __m128d {
let b = b as f64;
let r = simd_insert(a, 0, b);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvtt_roundss_si32(a: __m128, sae: i32) -> i32 {
let a = a.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vcvtss2si(a, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2si, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvtt_roundss_i32(a: __m128, sae: i32) -> i32 {
let a = a.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vcvtss2si(a, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2usi, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvtt_roundss_u32(a: __m128, sae: i32) -> u32 {
let a = a.as_f32x4();
macro_rules! call {
($imm4:expr) => {
vcvtss2usi(a, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2si))]
pub unsafe fn _mm_cvttss_i32(a: __m128) -> i32 {
transmute(vcvtss2si(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtss2usi))]
pub unsafe fn _mm_cvttss_u32(a: __m128) -> u32 {
transmute(vcvtss2usi(a.as_f32x4(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvtt_roundsd_si32(a: __m128d, sae: i32) -> i32 {
let a = a.as_f64x2();
macro_rules! call {
($imm4:expr) => {
vcvtsd2si(a, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2si, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvtt_roundsd_i32(a: __m128d, sae: i32) -> i32 {
let a = a.as_f64x2();
macro_rules! call {
($imm4:expr) => {
vcvtsd2si(a, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2usi, sae = 8))]
#[rustc_args_required_const(1)]
pub unsafe fn _mm_cvtt_roundsd_u32(a: __m128d, sae: i32) -> u32 {
let a = a.as_f64x2();
macro_rules! call {
($imm4:expr) => {
vcvtsd2usi(a, $imm4)
};
}
let r = constify_imm4_sae!(sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2si))]
pub unsafe fn _mm_cvttsd_i32(a: __m128d) -> i32 {
transmute(vcvtsd2si(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtsd2usi))]
pub unsafe fn _mm_cvttsd_u32(a: __m128d) -> u32 {
transmute(vcvtsd2usi(a.as_f64x2(), _MM_FROUND_CUR_DIRECTION))
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtusi2ss))]
pub unsafe fn _mm_cvtu32_ss(a: __m128, b: u32) -> __m128 {
let b = b as f32;
let r = simd_insert(a, 0, b);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcvtusi2sd))]
pub unsafe fn _mm_cvtu32_sd(a: __m128d, b: u32) -> __m128d {
let b = b as f64;
let r = simd_insert(a, 0, b);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 5, sae = 4))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm_comi_round_ss(a: __m128, b: __m128, imm8: i32, sae: i32) -> i32 {
let a = a.as_f32x4();
let b = b.as_f32x4();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vcomiss(a, b, $imm8, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
#[inline]
#[target_feature(enable = "avx512f")]
#[cfg_attr(test, assert_instr(vcmp, imm8 = 5, sae = 4))]
#[rustc_args_required_const(2, 3)]
pub unsafe fn _mm_comi_round_sd(a: __m128d, b: __m128d, imm8: i32, sae: i32) -> i32 {
let a = a.as_f64x2();
let b = b.as_f64x2();
macro_rules! call {
($imm8:expr, $imm4:expr) => {
vcomisd(a, b, $imm8, $imm4)
};
}
let r = constify_imm5_sae!(imm8, sae, call);
transmute(r)
}
pub const _MM_CMPINT_EQ: _MM_CMPINT_ENUM = 0x00;
pub const _MM_CMPINT_LT: _MM_CMPINT_ENUM = 0x01;
pub const _MM_CMPINT_LE: _MM_CMPINT_ENUM = 0x02;
pub const _MM_CMPINT_FALSE: _MM_CMPINT_ENUM = 0x03;
pub const _MM_CMPINT_NE: _MM_CMPINT_ENUM = 0x04;
pub const _MM_CMPINT_NLT: _MM_CMPINT_ENUM = 0x05;
pub const _MM_CMPINT_NLE: _MM_CMPINT_ENUM = 0x06;
pub const _MM_CMPINT_TRUE: _MM_CMPINT_ENUM = 0x07;
pub const _MM_MANT_NORM_1_2: _MM_MANTISSA_NORM_ENUM = 0x00;
pub const _MM_MANT_NORM_P5_2: _MM_MANTISSA_NORM_ENUM = 0x01;
pub const _MM_MANT_NORM_P5_1: _MM_MANTISSA_NORM_ENUM = 0x02;
pub const _MM_MANT_NORM_P75_1P5: _MM_MANTISSA_NORM_ENUM = 0x03;
pub const _MM_MANT_SIGN_SRC: _MM_MANTISSA_SIGN_ENUM = 0x00;
pub const _MM_MANT_SIGN_ZERO: _MM_MANTISSA_SIGN_ENUM = 0x01;
pub const _MM_MANT_SIGN_NAN: _MM_MANTISSA_SIGN_ENUM = 0x02;
pub const _MM_PERM_AAAA: _MM_PERM_ENUM = 0x00;
pub const _MM_PERM_AAAB: _MM_PERM_ENUM = 0x01;
pub const _MM_PERM_AAAC: _MM_PERM_ENUM = 0x02;
pub const _MM_PERM_AAAD: _MM_PERM_ENUM = 0x03;
pub const _MM_PERM_AABA: _MM_PERM_ENUM = 0x04;
pub const _MM_PERM_AABB: _MM_PERM_ENUM = 0x05;
pub const _MM_PERM_AABC: _MM_PERM_ENUM = 0x06;
pub const _MM_PERM_AABD: _MM_PERM_ENUM = 0x07;
pub const _MM_PERM_AACA: _MM_PERM_ENUM = 0x08;
pub const _MM_PERM_AACB: _MM_PERM_ENUM = 0x09;
pub const _MM_PERM_AACC: _MM_PERM_ENUM = 0x0A;
pub const _MM_PERM_AACD: _MM_PERM_ENUM = 0x0B;
pub const _MM_PERM_AADA: _MM_PERM_ENUM = 0x0C;
pub const _MM_PERM_AADB: _MM_PERM_ENUM = 0x0D;
pub const _MM_PERM_AADC: _MM_PERM_ENUM = 0x0E;
pub const _MM_PERM_AADD: _MM_PERM_ENUM = 0x0F;
pub const _MM_PERM_ABAA: _MM_PERM_ENUM = 0x10;
pub const _MM_PERM_ABAB: _MM_PERM_ENUM = 0x11;
pub const _MM_PERM_ABAC: _MM_PERM_ENUM = 0x12;
pub const _MM_PERM_ABAD: _MM_PERM_ENUM = 0x13;
pub const _MM_PERM_ABBA: _MM_PERM_ENUM = 0x14;
pub const _MM_PERM_ABBB: _MM_PERM_ENUM = 0x15;
pub const _MM_PERM_ABBC: _MM_PERM_ENUM = 0x16;
pub const _MM_PERM_ABBD: _MM_PERM_ENUM = 0x17;
pub const _MM_PERM_ABCA: _MM_PERM_ENUM = 0x18;
pub const _MM_PERM_ABCB: _MM_PERM_ENUM = 0x19;
pub const _MM_PERM_ABCC: _MM_PERM_ENUM = 0x1A;
pub const _MM_PERM_ABCD: _MM_PERM_ENUM = 0x1B;
pub const _MM_PERM_ABDA: _MM_PERM_ENUM = 0x1C;
pub const _MM_PERM_ABDB: _MM_PERM_ENUM = 0x1D;
pub const _MM_PERM_ABDC: _MM_PERM_ENUM = 0x1E;
pub const _MM_PERM_ABDD: _MM_PERM_ENUM = 0x1F;
pub const _MM_PERM_ACAA: _MM_PERM_ENUM = 0x20;
pub const _MM_PERM_ACAB: _MM_PERM_ENUM = 0x21;
pub const _MM_PERM_ACAC: _MM_PERM_ENUM = 0x22;
pub const _MM_PERM_ACAD: _MM_PERM_ENUM = 0x23;
pub const _MM_PERM_ACBA: _MM_PERM_ENUM = 0x24;
pub const _MM_PERM_ACBB: _MM_PERM_ENUM = 0x25;
pub const _MM_PERM_ACBC: _MM_PERM_ENUM = 0x26;
pub const _MM_PERM_ACBD: _MM_PERM_ENUM = 0x27;
pub const _MM_PERM_ACCA: _MM_PERM_ENUM = 0x28;
pub const _MM_PERM_ACCB: _MM_PERM_ENUM = 0x29;
pub const _MM_PERM_ACCC: _MM_PERM_ENUM = 0x2A;
pub const _MM_PERM_ACCD: _MM_PERM_ENUM = 0x2B;
pub const _MM_PERM_ACDA: _MM_PERM_ENUM = 0x2C;
pub const _MM_PERM_ACDB: _MM_PERM_ENUM = 0x2D;
pub const _MM_PERM_ACDC: _MM_PERM_ENUM = 0x2E;
pub const _MM_PERM_ACDD: _MM_PERM_ENUM = 0x2F;
pub const _MM_PERM_ADAA: _MM_PERM_ENUM = 0x30;
pub const _MM_PERM_ADAB: _MM_PERM_ENUM = 0x31;
pub const _MM_PERM_ADAC: _MM_PERM_ENUM = 0x32;
pub const _MM_PERM_ADAD: _MM_PERM_ENUM = 0x33;
pub const _MM_PERM_ADBA: _MM_PERM_ENUM = 0x34;
pub const _MM_PERM_ADBB: _MM_PERM_ENUM = 0x35;
pub const _MM_PERM_ADBC: _MM_PERM_ENUM = 0x36;
pub const _MM_PERM_ADBD: _MM_PERM_ENUM = 0x37;
pub const _MM_PERM_ADCA: _MM_PERM_ENUM = 0x38;
pub const _MM_PERM_ADCB: _MM_PERM_ENUM = 0x39;
pub const _MM_PERM_ADCC: _MM_PERM_ENUM = 0x3A;
pub const _MM_PERM_ADCD: _MM_PERM_ENUM = 0x3B;
pub const _MM_PERM_ADDA: _MM_PERM_ENUM = 0x3C;
pub const _MM_PERM_ADDB: _MM_PERM_ENUM = 0x3D;
pub const _MM_PERM_ADDC: _MM_PERM_ENUM = 0x3E;
pub const _MM_PERM_ADDD: _MM_PERM_ENUM = 0x3F;
pub const _MM_PERM_BAAA: _MM_PERM_ENUM = 0x40;
pub const _MM_PERM_BAAB: _MM_PERM_ENUM = 0x41;
pub const _MM_PERM_BAAC: _MM_PERM_ENUM = 0x42;
pub const _MM_PERM_BAAD: _MM_PERM_ENUM = 0x43;
pub const _MM_PERM_BABA: _MM_PERM_ENUM = 0x44;
pub const _MM_PERM_BABB: _MM_PERM_ENUM = 0x45;
pub const _MM_PERM_BABC: _MM_PERM_ENUM = 0x46;
pub const _MM_PERM_BABD: _MM_PERM_ENUM = 0x47;
pub const _MM_PERM_BACA: _MM_PERM_ENUM = 0x48;
pub const _MM_PERM_BACB: _MM_PERM_ENUM = 0x49;
pub const _MM_PERM_BACC: _MM_PERM_ENUM = 0x4A;
pub const _MM_PERM_BACD: _MM_PERM_ENUM = 0x4B;
pub const _MM_PERM_BADA: _MM_PERM_ENUM = 0x4C;
pub const _MM_PERM_BADB: _MM_PERM_ENUM = 0x4D;
pub const _MM_PERM_BADC: _MM_PERM_ENUM = 0x4E;
pub const _MM_PERM_BADD: _MM_PERM_ENUM = 0x4F;
pub const _MM_PERM_BBAA: _MM_PERM_ENUM = 0x50;
pub const _MM_PERM_BBAB: _MM_PERM_ENUM = 0x51;
pub const _MM_PERM_BBAC: _MM_PERM_ENUM = 0x52;
pub const _MM_PERM_BBAD: _MM_PERM_ENUM = 0x53;
pub const _MM_PERM_BBBA: _MM_PERM_ENUM = 0x54;
pub const _MM_PERM_BBBB: _MM_PERM_ENUM = 0x55;
pub const _MM_PERM_BBBC: _MM_PERM_ENUM = 0x56;
pub const _MM_PERM_BBBD: _MM_PERM_ENUM = 0x57;
pub const _MM_PERM_BBCA: _MM_PERM_ENUM = 0x58;
pub const _MM_PERM_BBCB: _MM_PERM_ENUM = 0x59;
pub const _MM_PERM_BBCC: _MM_PERM_ENUM = 0x5A;
pub const _MM_PERM_BBCD: _MM_PERM_ENUM = 0x5B;
pub const _MM_PERM_BBDA: _MM_PERM_ENUM = 0x5C;
pub const _MM_PERM_BBDB: _MM_PERM_ENUM = 0x5D;
pub const _MM_PERM_BBDC: _MM_PERM_ENUM = 0x5E;
pub const _MM_PERM_BBDD: _MM_PERM_ENUM = 0x5F;
pub const _MM_PERM_BCAA: _MM_PERM_ENUM = 0x60;
pub const _MM_PERM_BCAB: _MM_PERM_ENUM = 0x61;
pub const _MM_PERM_BCAC: _MM_PERM_ENUM = 0x62;
pub const _MM_PERM_BCAD: _MM_PERM_ENUM = 0x63;
pub const _MM_PERM_BCBA: _MM_PERM_ENUM = 0x64;
pub const _MM_PERM_BCBB: _MM_PERM_ENUM = 0x65;
pub const _MM_PERM_BCBC: _MM_PERM_ENUM = 0x66;
pub const _MM_PERM_BCBD: _MM_PERM_ENUM = 0x67;
pub const _MM_PERM_BCCA: _MM_PERM_ENUM = 0x68;
pub const _MM_PERM_BCCB: _MM_PERM_ENUM = 0x69;
pub const _MM_PERM_BCCC: _MM_PERM_ENUM = 0x6A;
pub const _MM_PERM_BCCD: _MM_PERM_ENUM = 0x6B;
pub const _MM_PERM_BCDA: _MM_PERM_ENUM = 0x6C;
pub const _MM_PERM_BCDB: _MM_PERM_ENUM = 0x6D;
pub const _MM_PERM_BCDC: _MM_PERM_ENUM = 0x6E;
pub const _MM_PERM_BCDD: _MM_PERM_ENUM = 0x6F;
pub const _MM_PERM_BDAA: _MM_PERM_ENUM = 0x70;
pub const _MM_PERM_BDAB: _MM_PERM_ENUM = 0x71;
pub const _MM_PERM_BDAC: _MM_PERM_ENUM = 0x72;
pub const _MM_PERM_BDAD: _MM_PERM_ENUM = 0x73;
pub const _MM_PERM_BDBA: _MM_PERM_ENUM = 0x74;
pub const _MM_PERM_BDBB: _MM_PERM_ENUM = 0x75;
pub const _MM_PERM_BDBC: _MM_PERM_ENUM = 0x76;
pub const _MM_PERM_BDBD: _MM_PERM_ENUM = 0x77;
pub const _MM_PERM_BDCA: _MM_PERM_ENUM = 0x78;
pub const _MM_PERM_BDCB: _MM_PERM_ENUM = 0x79;
pub const _MM_PERM_BDCC: _MM_PERM_ENUM = 0x7A;
pub const _MM_PERM_BDCD: _MM_PERM_ENUM = 0x7B;
pub const _MM_PERM_BDDA: _MM_PERM_ENUM = 0x7C;
pub const _MM_PERM_BDDB: _MM_PERM_ENUM = 0x7D;
pub const _MM_PERM_BDDC: _MM_PERM_ENUM = 0x7E;
pub const _MM_PERM_BDDD: _MM_PERM_ENUM = 0x7F;
pub const _MM_PERM_CAAA: _MM_PERM_ENUM = 0x80;
pub const _MM_PERM_CAAB: _MM_PERM_ENUM = 0x81;
pub const _MM_PERM_CAAC: _MM_PERM_ENUM = 0x82;
pub const _MM_PERM_CAAD: _MM_PERM_ENUM = 0x83;
pub const _MM_PERM_CABA: _MM_PERM_ENUM = 0x84;
pub const _MM_PERM_CABB: _MM_PERM_ENUM = 0x85;
pub const _MM_PERM_CABC: _MM_PERM_ENUM = 0x86;
pub const _MM_PERM_CABD: _MM_PERM_ENUM = 0x87;
pub const _MM_PERM_CACA: _MM_PERM_ENUM = 0x88;
pub const _MM_PERM_CACB: _MM_PERM_ENUM = 0x89;
pub const _MM_PERM_CACC: _MM_PERM_ENUM = 0x8A;
pub const _MM_PERM_CACD: _MM_PERM_ENUM = 0x8B;
pub const _MM_PERM_CADA: _MM_PERM_ENUM = 0x8C;
pub const _MM_PERM_CADB: _MM_PERM_ENUM = 0x8D;
pub const _MM_PERM_CADC: _MM_PERM_ENUM = 0x8E;
pub const _MM_PERM_CADD: _MM_PERM_ENUM = 0x8F;
pub const _MM_PERM_CBAA: _MM_PERM_ENUM = 0x90;
pub const _MM_PERM_CBAB: _MM_PERM_ENUM = 0x91;
pub const _MM_PERM_CBAC: _MM_PERM_ENUM = 0x92;
pub const _MM_PERM_CBAD: _MM_PERM_ENUM = 0x93;
pub const _MM_PERM_CBBA: _MM_PERM_ENUM = 0x94;
pub const _MM_PERM_CBBB: _MM_PERM_ENUM = 0x95;
pub const _MM_PERM_CBBC: _MM_PERM_ENUM = 0x96;
pub const _MM_PERM_CBBD: _MM_PERM_ENUM = 0x97;
pub const _MM_PERM_CBCA: _MM_PERM_ENUM = 0x98;
pub const _MM_PERM_CBCB: _MM_PERM_ENUM = 0x99;
pub const _MM_PERM_CBCC: _MM_PERM_ENUM = 0x9A;
pub const _MM_PERM_CBCD: _MM_PERM_ENUM = 0x9B;
pub const _MM_PERM_CBDA: _MM_PERM_ENUM = 0x9C;
pub const _MM_PERM_CBDB: _MM_PERM_ENUM = 0x9D;
pub const _MM_PERM_CBDC: _MM_PERM_ENUM = 0x9E;
pub const _MM_PERM_CBDD: _MM_PERM_ENUM = 0x9F;
pub const _MM_PERM_CCAA: _MM_PERM_ENUM = 0xA0;
pub const _MM_PERM_CCAB: _MM_PERM_ENUM = 0xA1;
pub const _MM_PERM_CCAC: _MM_PERM_ENUM = 0xA2;
pub const _MM_PERM_CCAD: _MM_PERM_ENUM = 0xA3;
pub const _MM_PERM_CCBA: _MM_PERM_ENUM = 0xA4;
pub const _MM_PERM_CCBB: _MM_PERM_ENUM = 0xA5;
pub const _MM_PERM_CCBC: _MM_PERM_ENUM = 0xA6;
pub const _MM_PERM_CCBD: _MM_PERM_ENUM = 0xA7;
pub const _MM_PERM_CCCA: _MM_PERM_ENUM = 0xA8;
pub const _MM_PERM_CCCB: _MM_PERM_ENUM = 0xA9;
pub const _MM_PERM_CCCC: _MM_PERM_ENUM = 0xAA;
pub const _MM_PERM_CCCD: _MM_PERM_ENUM = 0xAB;
pub const _MM_PERM_CCDA: _MM_PERM_ENUM = 0xAC;
pub const _MM_PERM_CCDB: _MM_PERM_ENUM = 0xAD;
pub const _MM_PERM_CCDC: _MM_PERM_ENUM = 0xAE;
pub const _MM_PERM_CCDD: _MM_PERM_ENUM = 0xAF;
pub const _MM_PERM_CDAA: _MM_PERM_ENUM = 0xB0;
pub const _MM_PERM_CDAB: _MM_PERM_ENUM = 0xB1;
pub const _MM_PERM_CDAC: _MM_PERM_ENUM = 0xB2;
pub const _MM_PERM_CDAD: _MM_PERM_ENUM = 0xB3;
pub const _MM_PERM_CDBA: _MM_PERM_ENUM = 0xB4;
pub const _MM_PERM_CDBB: _MM_PERM_ENUM = 0xB5;
pub const _MM_PERM_CDBC: _MM_PERM_ENUM = 0xB6;
pub const _MM_PERM_CDBD: _MM_PERM_ENUM = 0xB7;
pub const _MM_PERM_CDCA: _MM_PERM_ENUM = 0xB8;
pub const _MM_PERM_CDCB: _MM_PERM_ENUM = 0xB9;
pub const _MM_PERM_CDCC: _MM_PERM_ENUM = 0xBA;
pub const _MM_PERM_CDCD: _MM_PERM_ENUM = 0xBB;
pub const _MM_PERM_CDDA: _MM_PERM_ENUM = 0xBC;
pub const _MM_PERM_CDDB: _MM_PERM_ENUM = 0xBD;
pub const _MM_PERM_CDDC: _MM_PERM_ENUM = 0xBE;
pub const _MM_PERM_CDDD: _MM_PERM_ENUM = 0xBF;
pub const _MM_PERM_DAAA: _MM_PERM_ENUM = 0xC0;
pub const _MM_PERM_DAAB: _MM_PERM_ENUM = 0xC1;
pub const _MM_PERM_DAAC: _MM_PERM_ENUM = 0xC2;
pub const _MM_PERM_DAAD: _MM_PERM_ENUM = 0xC3;
pub const _MM_PERM_DABA: _MM_PERM_ENUM = 0xC4;
pub const _MM_PERM_DABB: _MM_PERM_ENUM = 0xC5;
pub const _MM_PERM_DABC: _MM_PERM_ENUM = 0xC6;
pub const _MM_PERM_DABD: _MM_PERM_ENUM = 0xC7;
pub const _MM_PERM_DACA: _MM_PERM_ENUM = 0xC8;
pub const _MM_PERM_DACB: _MM_PERM_ENUM = 0xC9;
pub const _MM_PERM_DACC: _MM_PERM_ENUM = 0xCA;
pub const _MM_PERM_DACD: _MM_PERM_ENUM = 0xCB;
pub const _MM_PERM_DADA: _MM_PERM_ENUM = 0xCC;
pub const _MM_PERM_DADB: _MM_PERM_ENUM = 0xCD;
pub const _MM_PERM_DADC: _MM_PERM_ENUM = 0xCE;
pub const _MM_PERM_DADD: _MM_PERM_ENUM = 0xCF;
pub const _MM_PERM_DBAA: _MM_PERM_ENUM = 0xD0;
pub const _MM_PERM_DBAB: _MM_PERM_ENUM = 0xD1;
pub const _MM_PERM_DBAC: _MM_PERM_ENUM = 0xD2;
pub const _MM_PERM_DBAD: _MM_PERM_ENUM = 0xD3;
pub const _MM_PERM_DBBA: _MM_PERM_ENUM = 0xD4;
pub const _MM_PERM_DBBB: _MM_PERM_ENUM = 0xD5;
pub const _MM_PERM_DBBC: _MM_PERM_ENUM = 0xD6;
pub const _MM_PERM_DBBD: _MM_PERM_ENUM = 0xD7;
pub const _MM_PERM_DBCA: _MM_PERM_ENUM = 0xD8;
pub const _MM_PERM_DBCB: _MM_PERM_ENUM = 0xD9;
pub const _MM_PERM_DBCC: _MM_PERM_ENUM = 0xDA;
pub const _MM_PERM_DBCD: _MM_PERM_ENUM = 0xDB;
pub const _MM_PERM_DBDA: _MM_PERM_ENUM = 0xDC;
pub const _MM_PERM_DBDB: _MM_PERM_ENUM = 0xDD;
pub const _MM_PERM_DBDC: _MM_PERM_ENUM = 0xDE;
pub const _MM_PERM_DBDD: _MM_PERM_ENUM = 0xDF;
pub const _MM_PERM_DCAA: _MM_PERM_ENUM = 0xE0;
pub const _MM_PERM_DCAB: _MM_PERM_ENUM = 0xE1;
pub const _MM_PERM_DCAC: _MM_PERM_ENUM = 0xE2;
pub const _MM_PERM_DCAD: _MM_PERM_ENUM = 0xE3;
pub const _MM_PERM_DCBA: _MM_PERM_ENUM = 0xE4;
pub const _MM_PERM_DCBB: _MM_PERM_ENUM = 0xE5;
pub const _MM_PERM_DCBC: _MM_PERM_ENUM = 0xE6;
pub const _MM_PERM_DCBD: _MM_PERM_ENUM = 0xE7;
pub const _MM_PERM_DCCA: _MM_PERM_ENUM = 0xE8;
pub const _MM_PERM_DCCB: _MM_PERM_ENUM = 0xE9;
pub const _MM_PERM_DCCC: _MM_PERM_ENUM = 0xEA;
pub const _MM_PERM_DCCD: _MM_PERM_ENUM = 0xEB;
pub const _MM_PERM_DCDA: _MM_PERM_ENUM = 0xEC;
pub const _MM_PERM_DCDB: _MM_PERM_ENUM = 0xED;
pub const _MM_PERM_DCDC: _MM_PERM_ENUM = 0xEE;
pub const _MM_PERM_DCDD: _MM_PERM_ENUM = 0xEF;
pub const _MM_PERM_DDAA: _MM_PERM_ENUM = 0xF0;
pub const _MM_PERM_DDAB: _MM_PERM_ENUM = 0xF1;
pub const _MM_PERM_DDAC: _MM_PERM_ENUM = 0xF2;
pub const _MM_PERM_DDAD: _MM_PERM_ENUM = 0xF3;
pub const _MM_PERM_DDBA: _MM_PERM_ENUM = 0xF4;
pub const _MM_PERM_DDBB: _MM_PERM_ENUM = 0xF5;
pub const _MM_PERM_DDBC: _MM_PERM_ENUM = 0xF6;
pub const _MM_PERM_DDBD: _MM_PERM_ENUM = 0xF7;
pub const _MM_PERM_DDCA: _MM_PERM_ENUM = 0xF8;
pub const _MM_PERM_DDCB: _MM_PERM_ENUM = 0xF9;
pub const _MM_PERM_DDCC: _MM_PERM_ENUM = 0xFA;
pub const _MM_PERM_DDCD: _MM_PERM_ENUM = 0xFB;
pub const _MM_PERM_DDDA: _MM_PERM_ENUM = 0xFC;
pub const _MM_PERM_DDDB: _MM_PERM_ENUM = 0xFD;
pub const _MM_PERM_DDDC: _MM_PERM_ENUM = 0xFE;
pub const _MM_PERM_DDDD: _MM_PERM_ENUM = 0xFF;
#[allow(improper_ctypes)]
extern "C" {
#[link_name = "llvm.x86.avx512.pmul.dq.512"]
fn vpmuldq(a: i32x16, b: i32x16) -> i64x8;
#[link_name = "llvm.x86.avx512.pmulu.dq.512"]
fn vpmuludq(a: u32x16, b: u32x16) -> u64x8;
#[link_name = "llvm.x86.avx512.mask.pmaxs.d.512"]
fn vpmaxsd(a: i32x16, b: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.pmaxs.q.512"]
fn vpmaxsq(a: i64x8, b: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.pmaxs.q.256"]
fn vpmaxsq256(a: i64x4, b: i64x4) -> i64x4;
#[link_name = "llvm.x86.avx512.mask.pmaxs.q.128"]
fn vpmaxsq128(a: i64x2, b: i64x2) -> i64x2;
#[link_name = "llvm.x86.avx512.mask.pmins.d.512"]
fn vpminsd(a: i32x16, b: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.pmins.q.512"]
fn vpminsq(a: i64x8, b: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.pmins.q.256"]
fn vpminsq256(a: i64x4, b: i64x4) -> i64x4;
#[link_name = "llvm.x86.avx512.mask.pmins.q.128"]
fn vpminsq128(a: i64x2, b: i64x2) -> i64x2;
#[link_name = "llvm.x86.avx512.mask.pmaxu.d.512"]
fn vpmaxud(a: u32x16, b: u32x16) -> u32x16;
#[link_name = "llvm.x86.avx512.mask.pmaxu.q.512"]
fn vpmaxuq(a: u64x8, b: u64x8) -> u64x8;
#[link_name = "llvm.x86.avx512.mask.pmaxu.q.256"]
fn vpmaxuq256(a: u64x4, b: u64x4) -> u64x4;
#[link_name = "llvm.x86.avx512.mask.pmaxu.q.128"]
fn vpmaxuq128(a: u64x2, b: u64x2) -> u64x2;
#[link_name = "llvm.x86.avx512.mask.pminu.d.512"]
fn vpminud(a: u32x16, b: u32x16) -> u32x16;
#[link_name = "llvm.x86.avx512.mask.pminu.q.512"]
fn vpminuq(a: u64x8, b: u64x8) -> u64x8;
#[link_name = "llvm.x86.avx512.mask.pminu.q.256"]
fn vpminuq256(a: u64x4, b: u64x4) -> u64x4;
#[link_name = "llvm.x86.avx512.mask.pminu.q.128"]
fn vpminuq128(a: u64x2, b: u64x2) -> u64x2;
#[link_name = "llvm.x86.avx512.sqrt.ps.512"]
fn vsqrtps(a: f32x16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.sqrt.pd.512"]
fn vsqrtpd(a: f64x8, rounding: i32) -> f64x8;
#[link_name = "llvm.fma.v16f32"]
fn vfmadd132ps(a: f32x16, b: f32x16, c: f32x16) -> f32x16;
#[link_name = "llvm.fma.v8f64"]
fn vfmadd132pd(a: f64x8, b: f64x8, c: f64x8) -> f64x8;
#[link_name = "llvm.x86.avx512.vfmadd.ps.512"]
fn vfmadd132psround(a: f32x16, b: f32x16, c: f32x16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.vfmadd.pd.512"]
fn vfmadd132pdround(a: f64x8, b: f64x8, c: f64x8, rounding: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.vfmaddsub.ps.512"]
fn vfmaddsub213ps(a: f32x16, b: f32x16, c: f32x16, d: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.vfmaddsub.pd.512"]
fn vfmaddsub213pd(a: f64x8, b: f64x8, c: f64x8, d: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.add.ps.512"]
fn vaddps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.add.pd.512"]
fn vaddpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.sub.ps.512"]
fn vsubps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.sub.pd.512"]
fn vsubpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.mul.ps.512"]
fn vmulps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.mul.pd.512"]
fn vmulpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.div.ps.512"]
fn vdivps(a: f32x16, b: f32x16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.div.pd.512"]
fn vdivpd(a: f64x8, b: f64x8, rounding: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.max.ps.512"]
fn vmaxps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.max.pd.512"]
fn vmaxpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.min.ps.512"]
fn vminps(a: f32x16, b: f32x16, sae: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.min.pd.512"]
fn vminpd(a: f64x8, b: f64x8, sae: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.getexp.ps.512"]
fn vgetexpps(a: f32x16, src: f32x16, m: u16, sae: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.getexp.ps.256"]
fn vgetexpps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
#[link_name = "llvm.x86.avx512.mask.getexp.ps.128"]
fn vgetexpps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.getexp.pd.512"]
fn vgetexppd(a: f64x8, src: f64x8, m: u8, sae: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.getexp.pd.256"]
fn vgetexppd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
#[link_name = "llvm.x86.avx512.mask.getexp.pd.128"]
fn vgetexppd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.rndscale.ps.512"]
fn vrndscaleps(a: f32x16, imm8: i32, src: f32x16, mask: u16, sae: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.rndscale.ps.256"]
fn vrndscaleps256(a: f32x8, imm8: i32, src: f32x8, mask: u8) -> f32x8;
#[link_name = "llvm.x86.avx512.mask.rndscale.ps.128"]
fn vrndscaleps128(a: f32x4, imm8: i32, src: f32x4, mask: u8) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.rndscale.pd.512"]
fn vrndscalepd(a: f64x8, imm8: i32, src: f64x8, mask: u8, sae: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.rndscale.pd.256"]
fn vrndscalepd256(a: f64x4, imm8: i32, src: f64x4, mask: u8) -> f64x4;
#[link_name = "llvm.x86.avx512.mask.rndscale.pd.128"]
fn vrndscalepd128(a: f64x2, imm8: i32, src: f64x2, mask: u8) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.scalef.ps.512"]
fn vscalefps(a: f32x16, b: f32x16, src: f32x16, mask: u16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.scalef.ps.256"]
fn vscalefps256(a: f32x8, b: f32x8, src: f32x8, mask: u8) -> f32x8;
#[link_name = "llvm.x86.avx512.mask.scalef.ps.128"]
fn vscalefps128(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.scalef.pd.512"]
fn vscalefpd(a: f64x8, b: f64x8, src: f64x8, mask: u8, rounding: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.scalef.pd.256"]
fn vscalefpd256(a: f64x4, b: f64x4, src: f64x4, mask: u8) -> f64x4;
#[link_name = "llvm.x86.avx512.mask.scalef.pd.128"]
fn vscalefpd128(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.fixupimm.ps.512"]
fn vfixupimmps(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.fixupimm.ps.256"]
fn vfixupimmps256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
#[link_name = "llvm.x86.avx512.mask.fixupimm.ps.128"]
fn vfixupimmps128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.fixupimm.pd.512"]
fn vfixupimmpd(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.fixupimm.pd.256"]
fn vfixupimmpd256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
#[link_name = "llvm.x86.avx512.mask.fixupimm.pd.128"]
fn vfixupimmpd128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
#[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.512"]
fn vfixupimmpsz(a: f32x16, b: f32x16, c: i32x16, imm8: i32, mask: u16, sae: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.256"]
fn vfixupimmpsz256(a: f32x8, b: f32x8, c: i32x8, imm8: i32, mask: u8) -> f32x8;
#[link_name = "llvm.x86.avx512.maskz.fixupimm.ps.128"]
fn vfixupimmpsz128(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8) -> f32x4;
#[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.512"]
fn vfixupimmpdz(a: f64x8, b: f64x8, c: i64x8, imm8: i32, mask: u8, sae: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.256"]
fn vfixupimmpdz256(a: f64x4, b: f64x4, c: i64x4, imm8: i32, mask: u8) -> f64x4;
#[link_name = "llvm.x86.avx512.maskz.fixupimm.pd.128"]
fn vfixupimmpdz128(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8) -> f64x2;
#[link_name = "llvm.x86.avx512.pternlog.d.512"]
fn vpternlogd(a: i32x16, b: i32x16, c: i32x16, imm8: i32) -> i32x16;
#[link_name = "llvm.x86.avx512.pternlog.d.256"]
fn vpternlogd256(a: i32x8, b: i32x8, c: i32x8, imm8: i32) -> i32x8;
#[link_name = "llvm.x86.avx512.pternlog.d.128"]
fn vpternlogd128(a: i32x4, b: i32x4, c: i32x4, imm8: i32) -> i32x4;
#[link_name = "llvm.x86.avx512.pternlog.q.512"]
fn vpternlogq(a: i64x8, b: i64x8, c: i64x8, imm8: i32) -> i64x8;
#[link_name = "llvm.x86.avx512.pternlog.q.256"]
fn vpternlogq256(a: i64x4, b: i64x4, c: i64x4, imm8: i32) -> i64x4;
#[link_name = "llvm.x86.avx512.pternlog.q.128"]
fn vpternlogq128(a: i64x2, b: i64x2, c: i64x2, imm8: i32) -> i64x2;
#[link_name = "llvm.x86.avx512.mask.getmant.ps.512"]
fn vgetmantps(a: f32x16, mantissas: i32, src: f32x16, m: u16, sae: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.getmant.ps.256"]
fn vgetmantps256(a: f32x8, mantissas: i32, src: f32x8, m: u8) -> f32x8;
#[link_name = "llvm.x86.avx512.mask.getmant.ps.128"]
fn vgetmantps128(a: f32x4, mantissas: i32, src: f32x4, m: u8) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.getmant.pd.512"]
fn vgetmantpd(a: f64x8, mantissas: i32, src: f64x8, m: u8, sae: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.getmant.pd.256"]
fn vgetmantpd256(a: f64x4, mantissas: i32, src: f64x4, m: u8) -> f64x4;
#[link_name = "llvm.x86.avx512.mask.getmant.pd.128"]
fn vgetmantpd128(a: f64x2, mantissas: i32, src: f64x2, m: u8) -> f64x2;
#[link_name = "llvm.x86.avx512.rcp14.ps.512"]
fn vrcp14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
#[link_name = "llvm.x86.avx512.rcp14.ps.256"]
fn vrcp14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
#[link_name = "llvm.x86.avx512.rcp14.ps.128"]
fn vrcp14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
#[link_name = "llvm.x86.avx512.rcp14.pd.512"]
fn vrcp14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
#[link_name = "llvm.x86.avx512.rcp14.pd.256"]
fn vrcp14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
#[link_name = "llvm.x86.avx512.rcp14.pd.128"]
fn vrcp14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
#[link_name = "llvm.x86.avx512.rsqrt14.ps.512"]
fn vrsqrt14ps(a: f32x16, src: f32x16, m: u16) -> f32x16;
#[link_name = "llvm.x86.avx512.rsqrt14.ps.256"]
fn vrsqrt14ps256(a: f32x8, src: f32x8, m: u8) -> f32x8;
#[link_name = "llvm.x86.avx512.rsqrt14.ps.128"]
fn vrsqrt14ps128(a: f32x4, src: f32x4, m: u8) -> f32x4;
#[link_name = "llvm.x86.avx512.rsqrt14.pd.512"]
fn vrsqrt14pd(a: f64x8, src: f64x8, m: u8) -> f64x8;
#[link_name = "llvm.x86.avx512.rsqrt14.pd.256"]
fn vrsqrt14pd256(a: f64x4, src: f64x4, m: u8) -> f64x4;
#[link_name = "llvm.x86.avx512.rsqrt14.pd.128"]
fn vrsqrt14pd128(a: f64x2, src: f64x2, m: u8) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.cvtps2dq.512"]
fn vcvtps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.cvtps2udq.512"]
fn vcvtps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
#[link_name = "llvm.x86.avx512.mask.cvtps2udq.256"]
fn vcvtps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
#[link_name = "llvm.x86.avx512.mask.cvtps2udq.128"]
fn vcvtps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
#[link_name = "llvm.x86.avx512.mask.cvtps2pd.512"]
fn vcvtps2pd(a: f32x8, src: f64x8, mask: u8, sae: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.cvtpd2ps.512"]
fn vcvtpd2ps(a: f64x8, src: f32x8, mask: u8, rounding: i32) -> f32x8;
#[link_name = "llvm.x86.avx512.mask.cvtpd2dq.512"]
fn vcvtpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
#[link_name = "llvm.x86.avx512.mask.cvtpd2udq.512"]
fn vcvtpd2udq(a: f64x8, src: u32x8, mask: u8, rounding: i32) -> u32x8;
#[link_name = "llvm.x86.avx512.mask.cvtpd2udq.256"]
fn vcvtpd2udq256(a: f64x4, src: u32x4, mask: u8) -> u32x4;
#[link_name = "llvm.x86.avx512.mask.cvtpd2udq.128"]
fn vcvtpd2udq128(a: f64x2, src: u32x4, mask: u8) -> u32x4;
#[link_name = "llvm.x86.avx512.sitofp.round.v16f32.v16i32"]
fn vcvtdq2ps(a: i32x16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.uitofp.round.v16f32.v16i32"]
fn vcvtudq2ps(a: u32x16, rounding: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.512"]
fn vcvtps2ph(a: f32x16, sae: i32, src: i16x16, mask: u16) -> i16x16;
#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.256"]
fn vcvtps2ph256(a: f32x8, sae: i32, src: i16x8, mask: u8) -> i16x8;
#[link_name = "llvm.x86.avx512.mask.vcvtps2ph.128"]
fn vcvtps2ph128(a: f32x4, sae: i32, src: i16x8, mask: u8) -> i16x8;
#[link_name = "llvm.x86.avx512.mask.vcvtph2ps.512"]
fn vcvtph2ps(a: i16x16, src: f32x16, mask: u16, sae: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.cvttps2dq.512"]
fn vcvttps2dq(a: f32x16, src: i32x16, mask: u16, rounding: i32) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.cvttps2dq.256"]
fn vcvttps2dq256(a: f32x8, src: i32x8, mask: u8) -> i32x8;
#[link_name = "llvm.x86.avx512.mask.cvttps2dq.128"]
fn vcvttps2dq128(a: f32x4, src: i32x4, mask: u8) -> i32x4;
#[link_name = "llvm.x86.avx512.mask.cvttps2udq.512"]
fn vcvttps2udq(a: f32x16, src: u32x16, mask: u16, rounding: i32) -> u32x16;
#[link_name = "llvm.x86.avx512.mask.cvttps2udq.256"]
fn vcvttps2udq256(a: f32x8, src: u32x8, mask: u8) -> u32x8;
#[link_name = "llvm.x86.avx512.mask.cvttps2udq.128"]
fn vcvttps2udq128(a: f32x4, src: u32x4, mask: u8) -> u32x4;
#[link_name = "llvm.x86.avx512.mask.cvttpd2dq.512"]
fn vcvttpd2dq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> i32x8;
#[link_name = "llvm.x86.avx512.mask.cvttpd2dq.256"]
fn vcvttpd2dq256(a: f64x4, src: i32x4, mask: u8) -> i32x4;
#[link_name = "llvm.x86.avx512.mask.cvttpd2dq.128"]
fn vcvttpd2dq128(a: f64x2, src: i32x4, mask: u8) -> i32x4;
#[link_name = "llvm.x86.avx512.mask.cvttpd2udq.512"]
fn vcvttpd2udq(a: f64x8, src: i32x8, mask: u8, rounding: i32) -> u32x8;
#[link_name = "llvm.x86.avx512.mask.cvttpd2udq.256"]
fn vcvttpd2udq256(a: f64x4, src: i32x4, mask: u8) -> u32x4;
#[link_name = "llvm.x86.avx512.mask.cvttpd2udq.128"]
fn vcvttpd2udq128(a: f64x2, src: i32x4, mask: u8) -> u32x4;
#[link_name = "llvm.x86.avx512.mask.pmov.dw.128"]
fn vpmovdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
#[link_name = "llvm.x86.avx512.mask.pmov.db.256"]
fn vpmovdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
#[link_name = "llvm.x86.avx512.mask.pmov.db.128"]
fn vpmovdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
#[link_name = "llvm.x86.avx512.mask.pmov.qw.256"]
fn vpmovqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
#[link_name = "llvm.x86.avx512.mask.pmov.qw.128"]
fn vpmovqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
#[link_name = "llvm.x86.avx512.mask.pmov.qb.256"]
fn vpmovqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
#[link_name = "llvm.x86.avx512.mask.pmov.qb.128"]
fn vpmovqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
#[link_name = "llvm.x86.avx512.mask.pmov.qd.128"]
fn vpmovqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
#[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.512"]
fn vpmovdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
#[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.256"]
fn vpmovdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.dw.mem.128"]
fn vpmovdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.512"]
fn vpmovsdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
#[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.256"]
fn vpmovsdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovs.dw.mem.128"]
fn vpmovsdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.512"]
fn vpmovusdwmem(mem_addr: *mut i8, a: i32x16, mask: u16);
#[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.256"]
fn vpmovusdwmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovus.dw.mem.128"]
fn vpmovusdwmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.db.mem.512"]
fn vpmovdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
#[link_name = "llvm.x86.avx512.mask.pmov.db.mem.256"]
fn vpmovdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.db.mem.128"]
fn vpmovdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.512"]
fn vpmovsdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
#[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.256"]
fn vpmovsdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovs.db.mem.128"]
fn vpmovsdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.512"]
fn vpmovusdbmem(mem_addr: *mut i8, a: i32x16, mask: u16);
#[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.256"]
fn vpmovusdbmem256(mem_addr: *mut i8, a: i32x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovus.db.mem.128"]
fn vpmovusdbmem128(mem_addr: *mut i8, a: i32x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.512"]
fn vpmovqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.256"]
fn vpmovqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.qw.mem.128"]
fn vpmovqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.512"]
fn vpmovsqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.256"]
fn vpmovsqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovs.qw.mem.128"]
fn vpmovsqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.512"]
fn vpmovusqwmem(mem_addr: *mut i8, a: i64x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.256"]
fn vpmovusqwmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovus.qw.mem.128"]
fn vpmovusqwmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.512"]
fn vpmovqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.256"]
fn vpmovqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.qb.mem.128"]
fn vpmovqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.512"]
fn vpmovsqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.256"]
fn vpmovsqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovs.qb.mem.128"]
fn vpmovsqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.512"]
fn vpmovusqbmem(mem_addr: *mut i8, a: i64x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.256"]
fn vpmovusqbmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovus.qb.mem.128"]
fn vpmovusqbmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.512"]
fn vpmovqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.256"]
fn vpmovqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.qd.mem.128"]
fn vpmovqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.512"]
fn vpmovsqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.256"]
fn vpmovsqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovs.qd.mem.128"]
fn vpmovsqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.512"]
fn vpmovusqdmem(mem_addr: *mut i8, a: i64x8, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.256"]
fn vpmovusqdmem256(mem_addr: *mut i8, a: i64x4, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmovus.qd.mem.128"]
fn vpmovusqdmem128(mem_addr: *mut i8, a: i64x2, mask: u8);
#[link_name = "llvm.x86.avx512.mask.pmov.qb.512"]
fn vpmovqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
#[link_name = "llvm.x86.avx512.mask.pmovs.dw.512"]
fn vpmovsdw(a: i32x16, src: i16x16, mask: u16) -> i16x16;
#[link_name = "llvm.x86.avx512.mask.pmovs.dw.256"]
fn vpmovsdw256(a: i32x8, src: i16x8, mask: u8) -> i16x8;
#[link_name = "llvm.x86.avx512.mask.pmovs.dw.128"]
fn vpmovsdw128(a: i32x4, src: i16x8, mask: u8) -> i16x8;
#[link_name = "llvm.x86.avx512.mask.pmovs.db.512"]
fn vpmovsdb(a: i32x16, src: i8x16, mask: u16) -> i8x16;
#[link_name = "llvm.x86.avx512.mask.pmovs.db.256"]
fn vpmovsdb256(a: i32x8, src: i8x16, mask: u8) -> i8x16;
#[link_name = "llvm.x86.avx512.mask.pmovs.db.128"]
fn vpmovsdb128(a: i32x4, src: i8x16, mask: u8) -> i8x16;
#[link_name = "llvm.x86.avx512.mask.pmovs.qd.512"]
fn vpmovsqd(a: i64x8, src: i32x8, mask: u8) -> i32x8;
#[link_name = "llvm.x86.avx512.mask.pmovs.qd.256"]
fn vpmovsqd256(a: i64x4, src: i32x4, mask: u8) -> i32x4;
#[link_name = "llvm.x86.avx512.mask.pmovs.qd.128"]
fn vpmovsqd128(a: i64x2, src: i32x4, mask: u8) -> i32x4;
#[link_name = "llvm.x86.avx512.mask.pmovs.qw.512"]
fn vpmovsqw(a: i64x8, src: i16x8, mask: u8) -> i16x8;
#[link_name = "llvm.x86.avx512.mask.pmovs.qw.256"]
fn vpmovsqw256(a: i64x4, src: i16x8, mask: u8) -> i16x8;
#[link_name = "llvm.x86.avx512.mask.pmovs.qw.128"]
fn vpmovsqw128(a: i64x2, src: i16x8, mask: u8) -> i16x8;
#[link_name = "llvm.x86.avx512.mask.pmovs.qb.512"]
fn vpmovsqb(a: i64x8, src: i8x16, mask: u8) -> i8x16;
#[link_name = "llvm.x86.avx512.mask.pmovs.qb.256"]
fn vpmovsqb256(a: i64x4, src: i8x16, mask: u8) -> i8x16;
#[link_name = "llvm.x86.avx512.mask.pmovs.qb.128"]
fn vpmovsqb128(a: i64x2, src: i8x16, mask: u8) -> i8x16;
#[link_name = "llvm.x86.avx512.mask.pmovus.dw.512"]
fn vpmovusdw(a: u32x16, src: u16x16, mask: u16) -> u16x16;
#[link_name = "llvm.x86.avx512.mask.pmovus.dw.256"]
fn vpmovusdw256(a: u32x8, src: u16x8, mask: u8) -> u16x8;
#[link_name = "llvm.x86.avx512.mask.pmovus.dw.128"]
fn vpmovusdw128(a: u32x4, src: u16x8, mask: u8) -> u16x8;
#[link_name = "llvm.x86.avx512.mask.pmovus.db.512"]
fn vpmovusdb(a: u32x16, src: u8x16, mask: u16) -> u8x16;
#[link_name = "llvm.x86.avx512.mask.pmovus.db.256"]
fn vpmovusdb256(a: u32x8, src: u8x16, mask: u8) -> u8x16;
#[link_name = "llvm.x86.avx512.mask.pmovus.db.128"]
fn vpmovusdb128(a: u32x4, src: u8x16, mask: u8) -> u8x16;
#[link_name = "llvm.x86.avx512.mask.pmovus.qd.512"]
fn vpmovusqd(a: u64x8, src: u32x8, mask: u8) -> u32x8;
#[link_name = "llvm.x86.avx512.mask.pmovus.qd.256"]
fn vpmovusqd256(a: u64x4, src: u32x4, mask: u8) -> u32x4;
#[link_name = "llvm.x86.avx512.mask.pmovus.qd.128"]
fn vpmovusqd128(a: u64x2, src: u32x4, mask: u8) -> u32x4;
#[link_name = "llvm.x86.avx512.mask.pmovus.qw.512"]
fn vpmovusqw(a: u64x8, src: u16x8, mask: u8) -> u16x8;
#[link_name = "llvm.x86.avx512.mask.pmovus.qw.256"]
fn vpmovusqw256(a: u64x4, src: u16x8, mask: u8) -> u16x8;
#[link_name = "llvm.x86.avx512.mask.pmovus.qw.128"]
fn vpmovusqw128(a: u64x2, src: u16x8, mask: u8) -> u16x8;
#[link_name = "llvm.x86.avx512.mask.pmovus.qb.512"]
fn vpmovusqb(a: u64x8, src: u8x16, mask: u8) -> u8x16;
#[link_name = "llvm.x86.avx512.mask.pmovus.qb.256"]
fn vpmovusqb256(a: u64x4, src: u8x16, mask: u8) -> u8x16;
#[link_name = "llvm.x86.avx512.mask.pmovus.qb.128"]
fn vpmovusqb128(a: u64x2, src: u8x16, mask: u8) -> u8x16;
#[link_name = "llvm.x86.avx512.gather.dpd.512"]
fn vgatherdpd(src: f64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.gather.dps.512"]
fn vgatherdps(src: f32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> f32x16;
#[link_name = "llvm.x86.avx512.gather.qpd.512"]
fn vgatherqpd(src: f64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f64x8;
#[link_name = "llvm.x86.avx512.gather.qps.512"]
fn vgatherqps(src: f32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> f32x8;
#[link_name = "llvm.x86.avx512.gather.dpq.512"]
fn vpgatherdq(src: i64x8, slice: *const i8, offsets: i32x8, mask: i8, scale: i32) -> i64x8;
#[link_name = "llvm.x86.avx512.gather.dpi.512"]
fn vpgatherdd(src: i32x16, slice: *const i8, offsets: i32x16, mask: i16, scale: i32) -> i32x16;
#[link_name = "llvm.x86.avx512.gather.qpq.512"]
fn vpgatherqq(src: i64x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i64x8;
#[link_name = "llvm.x86.avx512.gather.qpi.512"]
fn vpgatherqd(src: i32x8, slice: *const i8, offsets: i64x8, mask: i8, scale: i32) -> i32x8;
#[link_name = "llvm.x86.avx512.scatter.dpd.512"]
fn vscatterdpd(slice: *mut i8, mask: i8, offsets: i32x8, src: f64x8, scale: i32);
#[link_name = "llvm.x86.avx512.scatter.dps.512"]
fn vscatterdps(slice: *mut i8, mask: i16, offsets: i32x16, src: f32x16, scale: i32);
#[link_name = "llvm.x86.avx512.scatter.qpd.512"]
fn vscatterqpd(slice: *mut i8, mask: i8, offsets: i64x8, src: f64x8, scale: i32);
#[link_name = "llvm.x86.avx512.scatter.qps.512"]
fn vscatterqps(slice: *mut i8, mask: i8, offsets: i64x8, src: f32x8, scale: i32);
#[link_name = "llvm.x86.avx512.scatter.dpq.512"]
fn vpscatterdq(slice: *mut i8, mask: i8, offsets: i32x8, src: i64x8, scale: i32);
#[link_name = "llvm.x86.avx512.scatter.dpi.512"]
fn vpscatterdd(slice: *mut i8, mask: i16, offsets: i32x16, src: i32x16, scale: i32);
#[link_name = "llvm.x86.avx512.scatter.qpq.512"]
fn vpscatterqq(slice: *mut i8, mask: i8, offsets: i64x8, src: i64x8, scale: i32);
#[link_name = "llvm.x86.avx512.scatter.qpi.512"]
fn vpscatterqd(slice: *mut i8, mask: i8, offsets: i64x8, src: i32x8, scale: i32);
#[link_name = "llvm.x86.avx512.mask.cmp.ss"]
fn vcmpss(a: __m128, b: __m128, op: i32, m: i8, sae: i32) -> i8;
#[link_name = "llvm.x86.avx512.mask.cmp.sd"]
fn vcmpsd(a: __m128d, b: __m128d, op: i32, m: i8, sae: i32) -> i8;
#[link_name = "llvm.x86.avx512.mask.cmp.ps.512"]
fn vcmpps(a: f32x16, b: f32x16, op: i32, m: i16, sae: i32) -> i16;
#[link_name = "llvm.x86.avx512.mask.cmp.ps.256"]
fn vcmpps256(a: f32x8, b: f32x8, op: i32, m: i8) -> i8;
#[link_name = "llvm.x86.avx512.mask.cmp.ps.128"]
fn vcmpps128(a: f32x4, b: f32x4, op: i32, m: i8) -> i8;
#[link_name = "llvm.x86.avx512.mask.cmp.pd.512"]
fn vcmppd(a: f64x8, b: f64x8, op: i32, m: i8, sae: i32) -> i8;
#[link_name = "llvm.x86.avx512.mask.cmp.pd.256"]
fn vcmppd256(a: f64x4, b: f64x4, op: i32, m: i8) -> i8;
#[link_name = "llvm.x86.avx512.mask.cmp.pd.128"]
fn vcmppd128(a: f64x2, b: f64x2, op: i32, m: i8) -> i8;
#[link_name = "llvm.x86.avx512.mask.ucmp.q.512"]
fn vpcmpuq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
#[link_name = "llvm.x86.avx512.mask.ucmp.q.256"]
fn vpcmpuq256(a: i64x4, b: i64x4, op: i32, m: i8) -> i8;
#[link_name = "llvm.x86.avx512.mask.ucmp.q.128"]
fn vpcmpuq128(a: i64x2, b: i64x2, op: i32, m: i8) -> i8;
#[link_name = "llvm.x86.avx512.mask.cmp.q.512"]
fn vpcmpq(a: i64x8, b: i64x8, op: i32, m: i8) -> i8;
#[link_name = "llvm.x86.avx512.mask.cmp.q.256"]
fn vpcmpq256(a: i64x4, b: i64x4, op: i32, m: i8) -> i8;
#[link_name = "llvm.x86.avx512.mask.cmp.q.128"]
fn vpcmpq128(a: i64x2, b: i64x2, op: i32, m: i8) -> i8;
#[link_name = "llvm.x86.avx512.mask.ucmp.d.512"]
fn vpcmpud(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
#[link_name = "llvm.x86.avx512.mask.ucmp.d.256"]
fn vpcmpud256(a: i32x8, b: i32x8, op: i32, m: i8) -> i8;
#[link_name = "llvm.x86.avx512.mask.ucmp.d.128"]
fn vpcmpud128(a: i32x4, b: i32x4, op: i32, m: i8) -> i8;
#[link_name = "llvm.x86.avx512.mask.cmp.d.512"]
fn vpcmpd(a: i32x16, b: i32x16, op: i32, m: i16) -> i16;
#[link_name = "llvm.x86.avx512.mask.cmp.d.256"]
fn vpcmpd256(a: i32x8, b: i32x8, op: i32, m: i8) -> i8;
#[link_name = "llvm.x86.avx512.mask.cmp.d.128"]
fn vpcmpd128(a: i32x4, b: i32x4, op: i32, m: i8) -> i8;
#[link_name = "llvm.x86.avx512.mask.prol.d.512"]
fn vprold(a: i32x16, i8: i32) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.prol.d.256"]
fn vprold256(a: i32x8, i8: i32) -> i32x8;
#[link_name = "llvm.x86.avx512.mask.prol.d.128"]
fn vprold128(a: i32x4, i8: i32) -> i32x4;
#[link_name = "llvm.x86.avx512.mask.pror.d.512"]
fn vprord(a: i32x16, i8: i32) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.pror.d.256"]
fn vprord256(a: i32x8, i8: i32) -> i32x8;
#[link_name = "llvm.x86.avx512.mask.pror.d.128"]
fn vprord128(a: i32x4, i8: i32) -> i32x4;
#[link_name = "llvm.x86.avx512.mask.prol.q.512"]
fn vprolq(a: i64x8, i8: i32) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.prol.q.256"]
fn vprolq256(a: i64x4, i8: i32) -> i64x4;
#[link_name = "llvm.x86.avx512.mask.prol.q.128"]
fn vprolq128(a: i64x2, i8: i32) -> i64x2;
#[link_name = "llvm.x86.avx512.mask.pror.q.512"]
fn vprorq(a: i64x8, i8: i32) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.pror.q.256"]
fn vprorq256(a: i64x4, i8: i32) -> i64x4;
#[link_name = "llvm.x86.avx512.mask.pror.q.128"]
fn vprorq128(a: i64x2, i8: i32) -> i64x2;
#[link_name = "llvm.x86.avx512.mask.prolv.d.512"]
fn vprolvd(a: i32x16, b: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.prolv.d.256"]
fn vprolvd256(a: i32x8, b: i32x8) -> i32x8;
#[link_name = "llvm.x86.avx512.mask.prolv.d.128"]
fn vprolvd128(a: i32x4, b: i32x4) -> i32x4;
#[link_name = "llvm.x86.avx512.mask.prorv.d.512"]
fn vprorvd(a: i32x16, b: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.prorv.d.256"]
fn vprorvd256(a: i32x8, b: i32x8) -> i32x8;
#[link_name = "llvm.x86.avx512.mask.prorv.d.128"]
fn vprorvd128(a: i32x4, b: i32x4) -> i32x4;
#[link_name = "llvm.x86.avx512.mask.prolv.q.512"]
fn vprolvq(a: i64x8, b: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.prolv.q.256"]
fn vprolvq256(a: i64x4, b: i64x4) -> i64x4;
#[link_name = "llvm.x86.avx512.mask.prolv.q.128"]
fn vprolvq128(a: i64x2, b: i64x2) -> i64x2;
#[link_name = "llvm.x86.avx512.mask.prorv.q.512"]
fn vprorvq(a: i64x8, b: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.prorv.q.256"]
fn vprorvq256(a: i64x4, b: i64x4) -> i64x4;
#[link_name = "llvm.x86.avx512.mask.prorv.q.128"]
fn vprorvq128(a: i64x2, b: i64x2) -> i64x2;
#[link_name = "llvm.x86.avx512.psllv.d.512"]
fn vpsllvd(a: i32x16, b: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.psrlv.d.512"]
fn vpsrlvd(a: i32x16, b: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.psllv.q.512"]
fn vpsllvq(a: i64x8, b: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.psrlv.q.512"]
fn vpsrlvq(a: i64x8, b: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.pslli.d.512"]
fn vpsllid(a: i32x16, imm8: u32) -> i32x16;
#[link_name = "llvm.x86.avx512.psrli.d.512"]
fn vpsrlid(a: i32x16, imm8: u32) -> i32x16;
#[link_name = "llvm.x86.avx512.pslli.q.512"]
fn vpslliq(a: i64x8, imm8: u32) -> i64x8;
#[link_name = "llvm.x86.avx512.psrli.q.512"]
fn vpsrliq(a: i64x8, imm8: u32) -> i64x8;
#[link_name = "llvm.x86.avx512.psll.d.512"]
fn vpslld(a: i32x16, count: i32x4) -> i32x16;
#[link_name = "llvm.x86.avx512.psrl.d.512"]
fn vpsrld(a: i32x16, count: i32x4) -> i32x16;
#[link_name = "llvm.x86.avx512.psll.q.512"]
fn vpsllq(a: i64x8, count: i64x2) -> i64x8;
#[link_name = "llvm.x86.avx512.psrl.q.512"]
fn vpsrlq(a: i64x8, count: i64x2) -> i64x8;
#[link_name = "llvm.x86.avx512.psra.d.512"]
fn vpsrad(a: i32x16, count: i32x4) -> i32x16;
#[link_name = "llvm.x86.avx512.psra.q.512"]
fn vpsraq(a: i64x8, count: i64x2) -> i64x8;
#[link_name = "llvm.x86.avx512.psra.q.256"]
fn vpsraq256(a: i64x4, count: i64x2) -> i64x4;
#[link_name = "llvm.x86.avx512.psra.q.128"]
fn vpsraq128(a: i64x2, count: i64x2) -> i64x2;
#[link_name = "llvm.x86.avx512.psrai.d.512"]
fn vpsraid(a: i32x16, imm8: u32) -> i32x16;
#[link_name = "llvm.x86.avx512.psrai.q.512"]
fn vpsraiq(a: i64x8, imm8: u32) -> i64x8;
#[link_name = "llvm.x86.avx512.psrai.q.256"]
fn vpsraiq256(a: i64x4, imm8: u32) -> i64x4;
#[link_name = "llvm.x86.avx512.psrai.q.128"]
fn vpsraiq128(a: i64x2, imm8: u32) -> i64x2;
#[link_name = "llvm.x86.avx512.psrav.d.512"]
fn vpsravd(a: i32x16, count: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.psrav.q.512"]
fn vpsravq(a: i64x8, count: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.psrav.q.256"]
fn vpsravq256(a: i64x4, count: i64x4) -> i64x4;
#[link_name = "llvm.x86.avx512.psrav.q.128"]
fn vpsravq128(a: i64x2, count: i64x2) -> i64x2;
#[link_name = "llvm.x86.avx512.vpermilvar.ps.512"]
fn vpermilps(a: f32x16, b: i32x16) -> f32x16;
#[link_name = "llvm.x86.avx512.vpermilvar.pd.512"]
fn vpermilpd(a: f64x8, b: i64x8) -> f64x8;
#[link_name = "llvm.x86.avx512.permvar.si.512"]
fn vpermd(a: i32x16, idx: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.permvar.di.512"]
fn vpermq(a: i64x8, idx: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.permvar.di.256"]
fn vpermq256(a: i64x4, idx: i64x4) -> i64x4;
#[link_name = "llvm.x86.avx512.permvar.sf.512"]
fn vpermps(a: f32x16, idx: i32x16) -> f32x16;
#[link_name = "llvm.x86.avx512.permvar.df.512"]
fn vpermpd(a: f64x8, idx: i64x8) -> f64x8;
#[link_name = "llvm.x86.avx512.permvar.df.256"]
fn vpermpd256(a: f64x4, idx: i64x4) -> f64x4;
#[link_name = "llvm.x86.avx512.vpermi2var.d.512"]
fn vpermi2d(a: i32x16, idx: i32x16, b: i32x16) -> i32x16;
#[link_name = "llvm.x86.avx512.vpermi2var.d.256"]
fn vpermi2d256(a: i32x8, idx: i32x8, b: i32x8) -> i32x8;
#[link_name = "llvm.x86.avx512.vpermi2var.d.128"]
fn vpermi2d128(a: i32x4, idx: i32x4, b: i32x4) -> i32x4;
#[link_name = "llvm.x86.avx512.vpermi2var.q.512"]
fn vpermi2q(a: i64x8, idx: i64x8, b: i64x8) -> i64x8;
#[link_name = "llvm.x86.avx512.vpermi2var.q.256"]
fn vpermi2q256(a: i64x4, idx: i64x4, b: i64x4) -> i64x4;
#[link_name = "llvm.x86.avx512.vpermi2var.q.128"]
fn vpermi2q128(a: i64x2, idx: i64x2, b: i64x2) -> i64x2;
#[link_name = "llvm.x86.avx512.vpermi2var.ps.512"]
fn vpermi2ps(a: f32x16, idx: i32x16, b: f32x16) -> f32x16;
#[link_name = "llvm.x86.avx512.vpermi2var.ps.256"]
fn vpermi2ps256(a: f32x8, idx: i32x8, b: f32x8) -> f32x8;
#[link_name = "llvm.x86.avx512.vpermi2var.ps.128"]
fn vpermi2ps128(a: f32x4, idx: i32x4, b: f32x4) -> f32x4;
#[link_name = "llvm.x86.avx512.vpermi2var.pd.512"]
fn vpermi2pd(a: f64x8, idx: i64x8, b: f64x8) -> f64x8;
#[link_name = "llvm.x86.avx512.vpermi2var.pd.256"]
fn vpermi2pd256(a: f64x4, idx: i64x4, b: f64x4) -> f64x4;
#[link_name = "llvm.x86.avx512.vpermi2var.pd.128"]
fn vpermi2pd128(a: f64x2, idx: i64x2, b: f64x2) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.compress.d.512"]
fn vpcompressd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.compress.d.256"]
fn vpcompressd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
#[link_name = "llvm.x86.avx512.mask.compress.d.128"]
fn vpcompressd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
#[link_name = "llvm.x86.avx512.mask.compress.q.512"]
fn vpcompressq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.compress.q.256"]
fn vpcompressq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
#[link_name = "llvm.x86.avx512.mask.compress.q.128"]
fn vpcompressq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
#[link_name = "llvm.x86.avx512.mask.compress.ps.512"]
fn vcompressps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.compress.ps.256"]
fn vcompressps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
#[link_name = "llvm.x86.avx512.mask.compress.ps.128"]
fn vcompressps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.compress.pd.512"]
fn vcompresspd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.compress.pd.256"]
fn vcompresspd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
#[link_name = "llvm.x86.avx512.mask.compress.pd.128"]
fn vcompresspd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.expand.d.512"]
fn vpexpandd(a: i32x16, src: i32x16, mask: u16) -> i32x16;
#[link_name = "llvm.x86.avx512.mask.expand.d.256"]
fn vpexpandd256(a: i32x8, src: i32x8, mask: u8) -> i32x8;
#[link_name = "llvm.x86.avx512.mask.expand.d.128"]
fn vpexpandd128(a: i32x4, src: i32x4, mask: u8) -> i32x4;
#[link_name = "llvm.x86.avx512.mask.expand.q.512"]
fn vpexpandq(a: i64x8, src: i64x8, mask: u8) -> i64x8;
#[link_name = "llvm.x86.avx512.mask.expand.q.256"]
fn vpexpandq256(a: i64x4, src: i64x4, mask: u8) -> i64x4;
#[link_name = "llvm.x86.avx512.mask.expand.q.128"]
fn vpexpandq128(a: i64x2, src: i64x2, mask: u8) -> i64x2;
#[link_name = "llvm.x86.avx512.mask.expand.ps.512"]
fn vexpandps(a: f32x16, src: f32x16, mask: u16) -> f32x16;
#[link_name = "llvm.x86.avx512.mask.expand.ps.256"]
fn vexpandps256(a: f32x8, src: f32x8, mask: u8) -> f32x8;
#[link_name = "llvm.x86.avx512.mask.expand.ps.128"]
fn vexpandps128(a: f32x4, src: f32x4, mask: u8) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.expand.pd.512"]
fn vexpandpd(a: f64x8, src: f64x8, mask: u8) -> f64x8;
#[link_name = "llvm.x86.avx512.mask.expand.pd.256"]
fn vexpandpd256(a: f64x4, src: f64x4, mask: u8) -> f64x4;
#[link_name = "llvm.x86.avx512.mask.expand.pd.128"]
fn vexpandpd128(a: f64x2, src: f64x2, mask: u8) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.add.ss.round"]
fn vaddss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.add.sd.round"]
fn vaddsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.sub.ss.round"]
fn vsubss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.sub.sd.round"]
fn vsubsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.mul.ss.round"]
fn vmulss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.mul.sd.round"]
fn vmulsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.div.ss.round"]
fn vdivss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.div.sd.round"]
fn vdivsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.max.ss.round"]
fn vmaxss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.max.sd.round"]
fn vmaxsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.min.ss.round"]
fn vminss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.min.sd.round"]
fn vminsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.sqrt.ss"]
fn vsqrtss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.sqrt.sd"]
fn vsqrtsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.getexp.ss"]
fn vgetexpss(a: f32x4, b: f32x4, src: f32x4, mask: u8, sae: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.getexp.sd"]
fn vgetexpsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, sae: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.getmant.ss"]
fn vgetmantss(a: f32x4, b: f32x4, mantissas: i32, src: f32x4, m: u8, sae: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.getmant.sd"]
fn vgetmantsd(a: f64x2, b: f64x2, mantissas: i32, src: f64x2, m: u8, sae: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.rsqrt14.ss"]
fn vrsqrt14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
#[link_name = "llvm.x86.avx512.rsqrt14.sd"]
fn vrsqrt14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
#[link_name = "llvm.x86.avx512.rcp14.ss"]
fn vrcp14ss(a: f32x4, b: f32x4, src: f32x4, mask: u8) -> f32x4;
#[link_name = "llvm.x86.avx512.rcp14.sd"]
fn vrcp14sd(a: f64x2, b: f64x2, src: f64x2, mask: u8) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.rndscale.ss"]
fn vrndscaless(a: f32x4, b: f32x4, src: f32x4, mask: u8, imm8: i32, sae: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.rndscale.sd"]
fn vrndscalesd(a: f64x2, b: f64x2, src: f64x2, mask: u8, imm8: i32, sae: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.scalef.ss"]
fn vscalefss(a: f32x4, b: f32x4, src: f32x4, mask: u8, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.scalef.sd"]
fn vscalefsd(a: f64x2, b: f64x2, src: f64x2, mask: u8, rounding: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.vfmadd.f32"]
fn vfmadd132ss(a: f32, b: f32, c: f32, rounding: i32) -> f32;
#[link_name = "llvm.x86.avx512.vfmadd.f64"]
fn vfmadd132sd(a: f64, b: f64, c: f64, rounding: i32) -> f64;
#[link_name = "llvm.x86.avx512.mask.fixupimm.ss"]
fn vfixupimmss(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.mask.fixupimm.sd"]
fn vfixupimmsd(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.maskz.fixupimm.ss"]
fn vfixupimmssz(a: f32x4, b: f32x4, c: i32x4, imm8: i32, mask: u8, sae: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.maskz.fixupimm.sd"]
fn vfixupimmsdz(a: f64x2, b: f64x2, c: i64x2, imm8: i32, mask: u8, sae: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.cvtss2sd.round"]
fn vcvtss2sd(a: f64x2, a: f32x4, src: f64x2, mask: u8, sae: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.mask.cvtsd2ss.round"]
fn vcvtsd2ss(a: f32x4, b: f64x2, src: f32x4, mask: u8, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.vcvtss2si32"]
fn vcvtss2si(a: f32x4, rounding: i32) -> i32;
#[link_name = "llvm.x86.avx512.vcvtss2usi32"]
fn vcvtss2usi(a: f32x4, rounding: i32) -> u32;
#[link_name = "llvm.x86.avx512.vcvtsd2si32"]
fn vcvtsd2si(a: f64x2, rounding: i32) -> i32;
#[link_name = "llvm.x86.avx512.vcvtsd2usi32"]
fn vcvtsd2usi(a: f64x2, rounding: i32) -> u32;
#[link_name = "llvm.x86.avx512.cvtsi2ss32"]
fn vcvtsi2ss(a: f32x4, b: i32, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.cvtsi2sd64"]
fn vcvtsi2sd(a: f64x2, b: i64, rounding: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.cvtusi2ss"]
fn vcvtusi2ss(a: f32x4, b: u32, rounding: i32) -> f32x4;
#[link_name = "llvm.x86.avx512.cvtusi642sd"]
fn vcvtusi2sd(a: f64x2, b: u64, rounding: i32) -> f64x2;
#[link_name = "llvm.x86.avx512.vcomi.ss"]
fn vcomiss(a: f32x4, b: f32x4, imm8: i32, sae: i32) -> i32;
#[link_name = "llvm.x86.avx512.vcomi.sd"]
fn vcomisd(a: f64x2, b: f64x2, imm8: i32, sae: i32) -> i32;
}
#[cfg(test)]
mod tests {
use stdarch_test::simd_test;
use crate::core_arch::x86::*;
use crate::hint::black_box;
use crate::mem::{self};
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_abs_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let r = _mm512_abs_epi32(a);
#[rustfmt::skip]
let e = _mm512_setr_epi32(
0, 1, 1, i32::MAX,
i32::MAX.wrapping_add(1), 100, 100, 32,
0, 1, 1, i32::MAX,
i32::MAX.wrapping_add(1), 100, 100, 32,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_abs_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let r = _mm512_mask_abs_epi32(a, 0, a);
assert_eq_m512i(r, a);
let r = _mm512_mask_abs_epi32(a, 0b00000000_11111111, a);
#[rustfmt::skip]
let e = _mm512_setr_epi32(
0, 1, 1, i32::MAX,
i32::MAX.wrapping_add(1), 100, 100, 32,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_abs_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let r = _mm512_maskz_abs_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_abs_epi32(0b00000000_11111111, a);
#[rustfmt::skip]
let e = _mm512_setr_epi32(
0, 1, 1, i32::MAX,
i32::MAX.wrapping_add(1), 100, 100, 32,
0, 0, 0, 0,
0, 0, 0, 0,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_abs_epi32() {
#[rustfmt::skip]
let a = _mm256_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let r = _mm256_mask_abs_epi32(a, 0, a);
assert_eq_m256i(r, a);
let r = _mm256_mask_abs_epi32(a, 0b00001111, a);
#[rustfmt::skip]
let e = _mm256_setr_epi32(
0, 1, 1, i32::MAX,
i32::MAX.wrapping_add(1), 100, -100, -32,
);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_abs_epi32() {
#[rustfmt::skip]
let a = _mm256_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let r = _mm256_maskz_abs_epi32(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_abs_epi32(0b00001111, a);
#[rustfmt::skip]
let e = _mm256_setr_epi32(
0, 1, 1, i32::MAX,
0, 0, 0, 0,
);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_abs_epi32() {
let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
let r = _mm_mask_abs_epi32(a, 0, a);
assert_eq_m128i(r, a);
let r = _mm_mask_abs_epi32(a, 0b00001111, a);
let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_abs_epi32() {
let a = _mm_setr_epi32(i32::MIN, 100, -100, -32);
let r = _mm_maskz_abs_epi32(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_abs_epi32(0b00001111, a);
let e = _mm_setr_epi32(i32::MAX.wrapping_add(1), 100, 100, 32);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_abs_ps() {
#[rustfmt::skip]
let a = _mm512_setr_ps(
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
);
let r = _mm512_abs_ps(a);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 1., 1., f32::MAX,
f32::MAX, 100., 100., 32.,
0., 1., 1., f32::MAX,
f32::MAX, 100., 100., 32.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_abs_ps() {
#[rustfmt::skip]
let a = _mm512_setr_ps(
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
);
let r = _mm512_mask_abs_ps(a, 0, a);
assert_eq_m512(r, a);
let r = _mm512_mask_abs_ps(a, 0b00000000_11111111, a);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 1., 1., f32::MAX,
f32::MAX, 100., 100., 32.,
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_mov_epi32() {
let src = _mm512_set1_epi32(1);
let a = _mm512_set1_epi32(2);
let r = _mm512_mask_mov_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_mov_epi32(src, 0b11111111_11111111, a);
assert_eq_m512i(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_mov_epi32() {
let a = _mm512_set1_epi32(2);
let r = _mm512_maskz_mov_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_mov_epi32(0b11111111_11111111, a);
assert_eq_m512i(r, a);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_mov_epi32() {
let src = _mm256_set1_epi32(1);
let a = _mm256_set1_epi32(2);
let r = _mm256_mask_mov_epi32(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm256_mask_mov_epi32(src, 0b11111111, a);
assert_eq_m256i(r, a);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_mov_epi32() {
let a = _mm256_set1_epi32(2);
let r = _mm256_maskz_mov_epi32(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_mov_epi32(0b11111111, a);
assert_eq_m256i(r, a);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_mov_epi32() {
let src = _mm_set1_epi32(1);
let a = _mm_set1_epi32(2);
let r = _mm_mask_mov_epi32(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_mov_epi32(src, 0b00001111, a);
assert_eq_m128i(r, a);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_mov_epi32() {
let a = _mm_set1_epi32(2);
let r = _mm_maskz_mov_epi32(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_mov_epi32(0b00001111, a);
assert_eq_m128i(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_mov_ps() {
let src = _mm512_set1_ps(1.);
let a = _mm512_set1_ps(2.);
let r = _mm512_mask_mov_ps(src, 0, a);
assert_eq_m512(r, src);
let r = _mm512_mask_mov_ps(src, 0b11111111_11111111, a);
assert_eq_m512(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_mov_ps() {
let a = _mm512_set1_ps(2.);
let r = _mm512_maskz_mov_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_mov_ps(0b11111111_11111111, a);
assert_eq_m512(r, a);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_mov_ps() {
let src = _mm256_set1_ps(1.);
let a = _mm256_set1_ps(2.);
let r = _mm256_mask_mov_ps(src, 0, a);
assert_eq_m256(r, src);
let r = _mm256_mask_mov_ps(src, 0b11111111, a);
assert_eq_m256(r, a);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_mov_ps() {
let a = _mm256_set1_ps(2.);
let r = _mm256_maskz_mov_ps(0, a);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_mov_ps(0b11111111, a);
assert_eq_m256(r, a);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_mov_ps() {
let src = _mm_set1_ps(1.);
let a = _mm_set1_ps(2.);
let r = _mm_mask_mov_ps(src, 0, a);
assert_eq_m128(r, src);
let r = _mm_mask_mov_ps(src, 0b00001111, a);
assert_eq_m128(r, a);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_mov_ps() {
let a = _mm_set1_ps(2.);
let r = _mm_maskz_mov_ps(0, a);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_mov_ps(0b00001111, a);
assert_eq_m128(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_add_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let b = _mm512_set1_epi32(1);
let r = _mm512_add_epi32(a, b);
#[rustfmt::skip]
let e = _mm512_setr_epi32(
1, 2, 0, i32::MIN,
i32::MIN + 1, 101, -99, -31,
1, 2, 0, i32::MIN,
i32::MIN + 1, 101, -99, -31,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_add_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let b = _mm512_set1_epi32(1);
let r = _mm512_mask_add_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_add_epi32(a, 0b00000000_11111111, a, b);
#[rustfmt::skip]
let e = _mm512_setr_epi32(
1, 2, 0, i32::MIN,
i32::MIN + 1, 101, -99, -31,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_add_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let b = _mm512_set1_epi32(1);
let r = _mm512_maskz_add_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_add_epi32(0b00000000_11111111, a, b);
#[rustfmt::skip]
let e = _mm512_setr_epi32(
1, 2, 0, i32::MIN,
i32::MIN + 1, 101, -99, -31,
0, 0, 0, 0,
0, 0, 0, 0,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_add_epi32() {
let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
let b = _mm256_set1_epi32(1);
let r = _mm256_mask_add_epi32(a, 0, a, b);
assert_eq_m256i(r, a);
let r = _mm256_mask_add_epi32(a, 0b11111111, a, b);
let e = _mm256_set_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_add_epi32() {
let a = _mm256_setr_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
let b = _mm256_set1_epi32(1);
let r = _mm256_maskz_add_epi32(0, a, b);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_add_epi32(0b11111111, a, b);
let e = _mm256_setr_epi32(1, 2, 0, i32::MIN, i32::MIN + 1, 101, -99, -31);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_add_epi32() {
let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
let b = _mm_set1_epi32(1);
let r = _mm_mask_add_epi32(a, 0, a, b);
assert_eq_m128i(r, a);
let r = _mm_mask_add_epi32(a, 0b00001111, a, b);
let e = _mm_set_epi32(2, 0, i32::MIN, i32::MIN + 1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_add_epi32() {
let a = _mm_setr_epi32(1, -1, i32::MAX, i32::MIN);
let b = _mm_set1_epi32(1);
let r = _mm_maskz_add_epi32(0, a, b);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_add_epi32(0b00001111, a, b);
let e = _mm_setr_epi32(2, 0, i32::MIN, i32::MIN + 1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_add_ps() {
#[rustfmt::skip]
let a = _mm512_setr_ps(
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
);
let b = _mm512_set1_ps(1.);
let r = _mm512_add_ps(a, b);
#[rustfmt::skip]
let e = _mm512_setr_ps(
1., 2., 0., f32::MAX,
f32::MIN + 1., 101., -99., -31.,
1., 2., 0., f32::MAX,
f32::MIN + 1., 101., -99., -31.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_add_ps() {
#[rustfmt::skip]
let a = _mm512_setr_ps(
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
);
let b = _mm512_set1_ps(1.);
let r = _mm512_mask_add_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_add_ps(a, 0b00000000_11111111, a, b);
#[rustfmt::skip]
let e = _mm512_setr_ps(
1., 2., 0., f32::MAX,
f32::MIN + 1., 101., -99., -31.,
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_add_ps() {
#[rustfmt::skip]
let a = _mm512_setr_ps(
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
);
let b = _mm512_set1_ps(1.);
let r = _mm512_maskz_add_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_add_ps(0b00000000_11111111, a, b);
#[rustfmt::skip]
let e = _mm512_setr_ps(
1., 2., 0., f32::MAX,
f32::MIN + 1., 101., -99., -31.,
0., 0., 0., 0.,
0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_add_ps() {
let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
let b = _mm256_set1_ps(1.);
let r = _mm256_mask_add_ps(a, 0, a, b);
assert_eq_m256(r, a);
let r = _mm256_mask_add_ps(a, 0b11111111, a, b);
let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_add_ps() {
let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
let b = _mm256_set1_ps(1.);
let r = _mm256_maskz_add_ps(0, a, b);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_add_ps(0b11111111, a, b);
let e = _mm256_set_ps(1., 2., 0., f32::MAX, f32::MIN + 1., 101., -99., -31.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_add_ps() {
let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
let b = _mm_set1_ps(1.);
let r = _mm_mask_add_ps(a, 0, a, b);
assert_eq_m128(r, a);
let r = _mm_mask_add_ps(a, 0b00001111, a, b);
let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_add_ps() {
let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
let b = _mm_set1_ps(1.);
let r = _mm_maskz_add_ps(0, a, b);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_add_ps(0b00001111, a, b);
let e = _mm_set_ps(2., 0., f32::MAX, f32::MIN + 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_sub_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let b = _mm512_set1_epi32(1);
let r = _mm512_sub_epi32(a, b);
#[rustfmt::skip]
let e = _mm512_setr_epi32(
-1, 0, -2, i32::MAX - 1,
i32::MAX, 99, -101, -33,
-1, 0, -2, i32::MAX - 1,
i32::MAX, 99, -101, -33,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_sub_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let b = _mm512_set1_epi32(1);
let r = _mm512_mask_sub_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_sub_epi32(a, 0b00000000_11111111, a, b);
#[rustfmt::skip]
let e = _mm512_setr_epi32(
-1, 0, -2, i32::MAX - 1,
i32::MAX, 99, -101, -33,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_sub_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let b = _mm512_set1_epi32(1);
let r = _mm512_maskz_sub_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_sub_epi32(0b00000000_11111111, a, b);
#[rustfmt::skip]
let e = _mm512_setr_epi32(
-1, 0, -2, i32::MAX - 1,
i32::MAX, 99, -101, -33,
0, 0, 0, 0,
0, 0, 0, 0,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_sub_epi32() {
let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
let b = _mm256_set1_epi32(1);
let r = _mm256_mask_sub_epi32(a, 0, a, b);
assert_eq_m256i(r, a);
let r = _mm256_mask_sub_epi32(a, 0b11111111, a, b);
let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_sub_epi32() {
let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
let b = _mm256_set1_epi32(1);
let r = _mm256_maskz_sub_epi32(0, a, b);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_sub_epi32(0b11111111, a, b);
let e = _mm256_set_epi32(-1, 0, -2, i32::MAX - 1, i32::MAX, 99, -101, -33);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_sub_epi32() {
let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
let b = _mm_set1_epi32(1);
let r = _mm_mask_sub_epi32(a, 0, a, b);
assert_eq_m128i(r, a);
let r = _mm_mask_sub_epi32(a, 0b00001111, a, b);
let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_sub_epi32() {
let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
let b = _mm_set1_epi32(1);
let r = _mm_maskz_sub_epi32(0, a, b);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_sub_epi32(0b00001111, a, b);
let e = _mm_set_epi32(0, -2, i32::MAX - 1, i32::MAX);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_sub_ps() {
#[rustfmt::skip]
let a = _mm512_setr_ps(
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
);
let b = _mm512_set1_ps(1.);
let r = _mm512_sub_ps(a, b);
#[rustfmt::skip]
let e = _mm512_setr_ps(
-1., 0., -2., f32::MAX - 1.,
f32::MIN, 99., -101., -33.,
-1., 0., -2., f32::MAX - 1.,
f32::MIN, 99., -101., -33.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_sub_ps() {
#[rustfmt::skip]
let a = _mm512_setr_ps(
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
);
let b = _mm512_set1_ps(1.);
let r = _mm512_mask_sub_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_sub_ps(a, 0b00000000_11111111, a, b);
#[rustfmt::skip]
let e = _mm512_setr_ps(
-1., 0., -2., f32::MAX - 1.,
f32::MIN, 99., -101., -33.,
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_sub_ps() {
#[rustfmt::skip]
let a = _mm512_setr_ps(
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
);
let b = _mm512_set1_ps(1.);
let r = _mm512_maskz_sub_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_sub_ps(0b00000000_11111111, a, b);
#[rustfmt::skip]
let e = _mm512_setr_ps(
-1., 0., -2., f32::MAX - 1.,
f32::MIN, 99., -101., -33.,
0., 0., 0., 0.,
0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_sub_ps() {
let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
let b = _mm256_set1_ps(1.);
let r = _mm256_mask_sub_ps(a, 0, a, b);
assert_eq_m256(r, a);
let r = _mm256_mask_sub_ps(a, 0b11111111, a, b);
let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_sub_ps() {
let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
let b = _mm256_set1_ps(1.);
let r = _mm256_maskz_sub_ps(0, a, b);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_sub_ps(0b11111111, a, b);
let e = _mm256_set_ps(-1., 0., -2., f32::MAX - 1., f32::MIN, 99., -101., -33.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_sub_ps() {
let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
let b = _mm_set1_ps(1.);
let r = _mm_mask_sub_ps(a, 0, a, b);
assert_eq_m128(r, a);
let r = _mm_mask_sub_ps(a, 0b00001111, a, b);
let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_sub_ps() {
let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
let b = _mm_set1_ps(1.);
let r = _mm_maskz_sub_ps(0, a, b);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_sub_ps(0b00001111, a, b);
let e = _mm_set_ps(0., -2., f32::MAX - 1., f32::MIN);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mullo_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let b = _mm512_set1_epi32(2);
let r = _mm512_mullo_epi32(a, b);
let e = _mm512_setr_epi32(
0, 2, -2, -2, 0, 200, -200, -64, 0, 2, -2, -2, 0, 200, -200, -64,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_mullo_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let b = _mm512_set1_epi32(2);
let r = _mm512_mask_mullo_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_mullo_epi32(a, 0b00000000_11111111, a, b);
#[rustfmt::skip]
let e = _mm512_setr_epi32(
0, 2, -2, -2,
0, 200, -200, -64,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_mullo_epi32() {
#[rustfmt::skip]
let a = _mm512_setr_epi32(
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
0, 1, -1, i32::MAX,
i32::MIN, 100, -100, -32,
);
let b = _mm512_set1_epi32(2);
let r = _mm512_maskz_mullo_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_mullo_epi32(0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(0, 2, -2, -2, 0, 200, -200, -64, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_mullo_epi32() {
let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
let b = _mm256_set1_epi32(2);
let r = _mm256_mask_mullo_epi32(a, 0, a, b);
assert_eq_m256i(r, a);
let r = _mm256_mask_mullo_epi32(a, 0b11111111, a, b);
let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_mullo_epi32() {
let a = _mm256_set_epi32(0, 1, -1, i32::MAX, i32::MIN, 100, -100, -32);
let b = _mm256_set1_epi32(2);
let r = _mm256_maskz_mullo_epi32(0, a, b);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_mullo_epi32(0b11111111, a, b);
let e = _mm256_set_epi32(0, 2, -2, -2, 0, 200, -200, -64);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_mullo_epi32() {
let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
let b = _mm_set1_epi32(2);
let r = _mm_mask_mullo_epi32(a, 0, a, b);
assert_eq_m128i(r, a);
let r = _mm_mask_mullo_epi32(a, 0b00001111, a, b);
let e = _mm_set_epi32(2, -2, -2, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_mullo_epi32() {
let a = _mm_set_epi32(1, -1, i32::MAX, i32::MIN);
let b = _mm_set1_epi32(2);
let r = _mm_maskz_mullo_epi32(0, a, b);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_mullo_epi32(0b00001111, a, b);
let e = _mm_set_epi32(2, -2, -2, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mul_ps() {
#[rustfmt::skip]
let a = _mm512_setr_ps(
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
);
let b = _mm512_set1_ps(2.);
let r = _mm512_mul_ps(a, b);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 2., -2., f32::INFINITY,
f32::NEG_INFINITY, 200., -200., -64.,
0., 2., -2., f32::INFINITY,
f32::NEG_INFINITY, 200., -200.,
-64.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_mul_ps() {
#[rustfmt::skip]
let a = _mm512_setr_ps(
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
);
let b = _mm512_set1_ps(2.);
let r = _mm512_mask_mul_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_mul_ps(a, 0b00000000_11111111, a, b);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 2., -2., f32::INFINITY,
f32::NEG_INFINITY, 200., -200., -64.,
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_mul_ps() {
#[rustfmt::skip]
let a = _mm512_setr_ps(
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
0., 1., -1., f32::MAX,
f32::MIN, 100., -100., -32.,
);
let b = _mm512_set1_ps(2.);
let r = _mm512_maskz_mul_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_mul_ps(0b00000000_11111111, a, b);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 2., -2., f32::INFINITY,
f32::NEG_INFINITY, 200., -200., -64.,
0., 0., 0., 0.,
0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_mul_ps() {
let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
let b = _mm256_set1_ps(2.);
let r = _mm256_mask_mul_ps(a, 0, a, b);
assert_eq_m256(r, a);
let r = _mm256_mask_mul_ps(a, 0b11111111, a, b);
#[rustfmt::skip]
let e = _mm256_set_ps(
0., 2., -2., f32::INFINITY,
f32::NEG_INFINITY, 200., -200., -64.,
);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_mul_ps() {
let a = _mm256_set_ps(0., 1., -1., f32::MAX, f32::MIN, 100., -100., -32.);
let b = _mm256_set1_ps(2.);
let r = _mm256_maskz_mul_ps(0, a, b);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_mul_ps(0b11111111, a, b);
#[rustfmt::skip]
let e = _mm256_set_ps(
0., 2., -2., f32::INFINITY,
f32::NEG_INFINITY, 200., -200., -64.,
);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_mul_ps() {
let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
let b = _mm_set1_ps(2.);
let r = _mm_mask_mul_ps(a, 0, a, b);
assert_eq_m128(r, a);
let r = _mm_mask_mul_ps(a, 0b00001111, a, b);
let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_mul_ps() {
let a = _mm_set_ps(1., -1., f32::MAX, f32::MIN);
let b = _mm_set1_ps(2.);
let r = _mm_maskz_mul_ps(0, a, b);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_mul_ps(0b00001111, a, b);
let e = _mm_set_ps(2., -2., f32::INFINITY, f32::NEG_INFINITY);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_div_ps() {
let a = _mm512_setr_ps(
0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
);
let b = _mm512_setr_ps(
2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
);
let r = _mm512_div_ps(a, b);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 0.5, -0.5, -1.,
50., f32::INFINITY, -50., -16.,
0., 0.5, -0.5, 500.,
f32::NEG_INFINITY, 50., -50., -16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_div_ps() {
let a = _mm512_setr_ps(
0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
);
let b = _mm512_setr_ps(
2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
);
let r = _mm512_mask_div_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_div_ps(a, 0b00000000_11111111, a, b);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 0.5, -0.5, -1.,
50., f32::INFINITY, -50., -16.,
0., 1., -1., 1000.,
-131., 100., -100., -32.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_div_ps() {
let a = _mm512_setr_ps(
0., 1., -1., -2., 100., 100., -100., -32., 0., 1., -1., 1000., -131., 100., -100., -32.,
);
let b = _mm512_setr_ps(
2., 2., 2., 2., 2., 0., 2., 2., 2., 2., 2., 2., 0., 2., 2., 2.,
);
let r = _mm512_maskz_div_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_div_ps(0b00000000_11111111, a, b);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 0.5, -0.5, -1.,
50., f32::INFINITY, -50., -16.,
0., 0., 0., 0.,
0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_div_ps() {
let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
let r = _mm256_mask_div_ps(a, 0, a, b);
assert_eq_m256(r, a);
let r = _mm256_mask_div_ps(a, 0b11111111, a, b);
let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_div_ps() {
let a = _mm256_set_ps(0., 1., -1., -2., 100., 100., -100., -32.);
let b = _mm256_set_ps(2., 2., 2., 2., 2., 0., 2., 2.);
let r = _mm256_maskz_div_ps(0, a, b);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_div_ps(0b11111111, a, b);
let e = _mm256_set_ps(0., 0.5, -0.5, -1., 50., f32::INFINITY, -50., -16.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_div_ps() {
let a = _mm_set_ps(100., 100., -100., -32.);
let b = _mm_set_ps(2., 0., 2., 2.);
let r = _mm_mask_div_ps(a, 0, a, b);
assert_eq_m128(r, a);
let r = _mm_mask_div_ps(a, 0b00001111, a, b);
let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_div_ps() {
let a = _mm_set_ps(100., 100., -100., -32.);
let b = _mm_set_ps(2., 0., 2., 2.);
let r = _mm_maskz_div_ps(0, a, b);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_div_ps(0b00001111, a, b);
let e = _mm_set_ps(50., f32::INFINITY, -50., -16.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_max_epi32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_max_epi32(a, b);
let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_max_epi32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_mask_max_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_max_epi32(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_max_epi32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_maskz_max_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_max_epi32(0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_max_epi32() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm256_mask_max_epi32(a, 0, a, b);
assert_eq_m256i(r, a);
let r = _mm256_mask_max_epi32(a, 0b11111111, a, b);
let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_max_epi32() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm256_maskz_max_epi32(0, a, b);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_max_epi32(0b11111111, a, b);
let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_max_epi32() {
let a = _mm_set_epi32(0, 1, 2, 3);
let b = _mm_set_epi32(3, 2, 1, 0);
let r = _mm_mask_max_epi32(a, 0, a, b);
assert_eq_m128i(r, a);
let r = _mm_mask_max_epi32(a, 0b00001111, a, b);
let e = _mm_set_epi32(3, 2, 2, 3);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_max_epi32() {
let a = _mm_set_epi32(0, 1, 2, 3);
let b = _mm_set_epi32(3, 2, 1, 0);
let r = _mm_maskz_max_epi32(0, a, b);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_max_epi32(0b00001111, a, b);
let e = _mm_set_epi32(3, 2, 2, 3);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_max_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_max_ps(a, b);
let e = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_max_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_mask_max_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_max_ps(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_max_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_maskz_max_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_max_ps(0b00000000_11111111, a, b);
let e = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_max_ps() {
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
let r = _mm256_mask_max_ps(a, 0, a, b);
assert_eq_m256(r, a);
let r = _mm256_mask_max_ps(a, 0b11111111, a, b);
let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_max_ps() {
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
let r = _mm256_maskz_max_ps(0, a, b);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_max_ps(0b11111111, a, b);
let e = _mm256_set_ps(7., 6., 5., 4., 4., 5., 6., 7.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_max_ps() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(3., 2., 1., 0.);
let r = _mm_mask_max_ps(a, 0, a, b);
assert_eq_m128(r, a);
let r = _mm_mask_max_ps(a, 0b00001111, a, b);
let e = _mm_set_ps(3., 2., 2., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_max_ps() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(3., 2., 1., 0.);
let r = _mm_maskz_max_ps(0, a, b);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_mask_max_ps(a, 0b00001111, a, b);
let e = _mm_set_ps(3., 2., 2., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_max_epu32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_max_epu32(a, b);
let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_max_epu32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_mask_max_epu32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_max_epu32(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_max_epu32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_maskz_max_epu32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_max_epu32(0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_max_epu32() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm256_mask_max_epu32(a, 0, a, b);
assert_eq_m256i(r, a);
let r = _mm256_mask_max_epu32(a, 0b11111111, a, b);
let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_max_epu32() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm256_maskz_max_epu32(0, a, b);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_max_epu32(0b11111111, a, b);
let e = _mm256_set_epi32(7, 6, 5, 4, 4, 5, 6, 7);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_max_epu32() {
let a = _mm_set_epi32(0, 1, 2, 3);
let b = _mm_set_epi32(3, 2, 1, 0);
let r = _mm_mask_max_epu32(a, 0, a, b);
assert_eq_m128i(r, a);
let r = _mm_mask_max_epu32(a, 0b00001111, a, b);
let e = _mm_set_epi32(3, 2, 2, 3);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_max_epu32() {
let a = _mm_set_epi32(0, 1, 2, 3);
let b = _mm_set_epi32(3, 2, 1, 0);
let r = _mm_maskz_max_epu32(0, a, b);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_max_epu32(0b00001111, a, b);
let e = _mm_set_epi32(3, 2, 2, 3);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_min_epi32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_min_epi32(a, b);
let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_min_epi32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_mask_min_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_min_epi32(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_min_epi32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_maskz_min_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_min_epi32(0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_min_epi32() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm256_mask_min_epi32(a, 0, a, b);
assert_eq_m256i(r, a);
let r = _mm256_mask_min_epi32(a, 0b11111111, a, b);
let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_min_epi32() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm256_maskz_min_epi32(0, a, b);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_min_epi32(0b11111111, a, b);
let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_min_epi32() {
let a = _mm_set_epi32(0, 1, 2, 3);
let b = _mm_set_epi32(3, 2, 1, 0);
let r = _mm_mask_min_epi32(a, 0, a, b);
assert_eq_m128i(r, a);
let r = _mm_mask_min_epi32(a, 0b00001111, a, b);
let e = _mm_set_epi32(0, 1, 1, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_min_epi32() {
let a = _mm_set_epi32(0, 1, 2, 3);
let b = _mm_set_epi32(3, 2, 1, 0);
let r = _mm_maskz_min_epi32(0, a, b);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_min_epi32(0b00001111, a, b);
let e = _mm_set_epi32(0, 1, 1, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_min_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_min_ps(a, b);
let e = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_min_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_mask_min_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_min_ps(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_min_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_maskz_min_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_min_ps(0b00000000_11111111, a, b);
let e = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_min_ps() {
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
let r = _mm256_mask_min_ps(a, 0, a, b);
assert_eq_m256(r, a);
let r = _mm256_mask_min_ps(a, 0b11111111, a, b);
let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_min_ps() {
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let b = _mm256_set_ps(7., 6., 5., 4., 3., 2., 1., 0.);
let r = _mm256_maskz_min_ps(0, a, b);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_min_ps(0b11111111, a, b);
let e = _mm256_set_ps(0., 1., 2., 3., 3., 2., 1., 0.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_min_ps() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(3., 2., 1., 0.);
let r = _mm_mask_min_ps(a, 0, a, b);
assert_eq_m128(r, a);
let r = _mm_mask_min_ps(a, 0b00001111, a, b);
let e = _mm_set_ps(0., 1., 1., 0.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_min_ps() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(3., 2., 1., 0.);
let r = _mm_maskz_min_ps(0, a, b);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_min_ps(0b00001111, a, b);
let e = _mm_set_ps(0., 1., 1., 0.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_min_epu32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_min_epu32(a, b);
let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 7, 6, 5, 4, 3, 2, 1, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_min_epu32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_mask_min_epu32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_min_epu32(a, 0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_min_epu32() {
let a = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let b = _mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm512_maskz_min_epu32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_min_epu32(0b00000000_11111111, a, b);
let e = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_min_epu32() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm256_mask_min_epu32(a, 0, a, b);
assert_eq_m256i(r, a);
let r = _mm256_mask_min_epu32(a, 0b11111111, a, b);
let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_min_epu32() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let b = _mm256_set_epi32(7, 6, 5, 4, 3, 2, 1, 0);
let r = _mm256_maskz_min_epu32(0, a, b);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_min_epu32(0b11111111, a, b);
let e = _mm256_set_epi32(0, 1, 2, 3, 3, 2, 1, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_min_epu32() {
let a = _mm_set_epi32(0, 1, 2, 3);
let b = _mm_set_epi32(3, 2, 1, 0);
let r = _mm_mask_min_epu32(a, 0, a, b);
assert_eq_m128i(r, a);
let r = _mm_mask_min_epu32(a, 0b00001111, a, b);
let e = _mm_set_epi32(0, 1, 1, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_min_epu32() {
let a = _mm_set_epi32(0, 1, 2, 3);
let b = _mm_set_epi32(3, 2, 1, 0);
let r = _mm_maskz_min_epu32(0, a, b);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_min_epu32(0b00001111, a, b);
let e = _mm_set_epi32(0, 1, 1, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_sqrt_ps() {
let a = _mm512_setr_ps(
0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
);
let r = _mm512_sqrt_ps(a);
let e = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_sqrt_ps() {
let a = _mm512_setr_ps(
0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
);
let r = _mm512_mask_sqrt_ps(a, 0, a);
assert_eq_m512(r, a);
let r = _mm512_mask_sqrt_ps(a, 0b00000000_11111111, a);
let e = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 64., 81., 100., 121., 144., 169., 196., 225.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_sqrt_ps() {
let a = _mm512_setr_ps(
0., 1., 4., 9., 16., 25., 36., 49., 64., 81., 100., 121., 144., 169., 196., 225.,
);
let r = _mm512_maskz_sqrt_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_sqrt_ps(0b00000000_11111111, a);
let e = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_sqrt_ps() {
let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
let r = _mm256_mask_sqrt_ps(a, 0, a);
assert_eq_m256(r, a);
let r = _mm256_mask_sqrt_ps(a, 0b11111111, a);
let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_sqrt_ps() {
let a = _mm256_set_ps(0., 1., 4., 9., 16., 25., 36., 49.);
let r = _mm256_maskz_sqrt_ps(0, a);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_sqrt_ps(0b11111111, a);
let e = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_sqrt_ps() {
let a = _mm_set_ps(0., 1., 4., 9.);
let r = _mm_mask_sqrt_ps(a, 0, a);
assert_eq_m128(r, a);
let r = _mm_mask_sqrt_ps(a, 0b00001111, a);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_sqrt_ps() {
let a = _mm_set_ps(0., 1., 4., 9.);
let r = _mm_maskz_sqrt_ps(0, a);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_sqrt_ps(0b00001111, a);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fmadd_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_set1_ps(1.);
let r = _mm512_fmadd_ps(a, b, c);
let e = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fmadd_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_set1_ps(1.);
let r = _mm512_mask_fmadd_ps(a, 0, b, c);
assert_eq_m512(r, a);
let r = _mm512_mask_fmadd_ps(a, 0b00000000_11111111, b, c);
let e = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fmadd_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_set1_ps(1.);
let r = _mm512_maskz_fmadd_ps(0, a, b, c);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fmadd_ps(0b00000000_11111111, a, b, c);
let e = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fmadd_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_set1_ps(2.);
let r = _mm512_mask3_fmadd_ps(a, b, c, 0);
assert_eq_m512(r, c);
let r = _mm512_mask3_fmadd_ps(a, b, c, 0b00000000_11111111);
let e = _mm512_setr_ps(
2., 3., 4., 5., 6., 7., 8., 9., 2., 2., 2., 2., 2., 2., 2., 2.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_fmadd_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let c = _mm256_set1_ps(1.);
let r = _mm256_mask_fmadd_ps(a, 0, b, c);
assert_eq_m256(r, a);
let r = _mm256_mask_fmadd_ps(a, 0b11111111, b, c);
let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_fmadd_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let c = _mm256_set1_ps(1.);
let r = _mm256_maskz_fmadd_ps(0, a, b, c);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_fmadd_ps(0b11111111, a, b, c);
let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask3_fmadd_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let c = _mm256_set1_ps(1.);
let r = _mm256_mask3_fmadd_ps(a, b, c, 0);
assert_eq_m256(r, c);
let r = _mm256_mask3_fmadd_ps(a, b, c, 0b11111111);
let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_fmadd_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set_ps(0., 1., 2., 3.);
let c = _mm_set1_ps(1.);
let r = _mm_mask_fmadd_ps(a, 0, b, c);
assert_eq_m128(r, a);
let r = _mm_mask_fmadd_ps(a, 0b00001111, b, c);
let e = _mm_set_ps(1., 2., 3., 4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_fmadd_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set_ps(0., 1., 2., 3.);
let c = _mm_set1_ps(1.);
let r = _mm_maskz_fmadd_ps(0, a, b, c);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_fmadd_ps(0b00001111, a, b, c);
let e = _mm_set_ps(1., 2., 3., 4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask3_fmadd_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set_ps(0., 1., 2., 3.);
let c = _mm_set1_ps(1.);
let r = _mm_mask3_fmadd_ps(a, b, c, 0);
assert_eq_m128(r, c);
let r = _mm_mask3_fmadd_ps(a, b, c, 0b00001111);
let e = _mm_set_ps(1., 2., 3., 4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fmsub_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_fmsub_ps(a, b, c);
let e = _mm512_setr_ps(
-1., 0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fmsub_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_set1_ps(1.);
let r = _mm512_mask_fmsub_ps(a, 0, b, c);
assert_eq_m512(r, a);
let r = _mm512_mask_fmsub_ps(a, 0b00000000_11111111, b, c);
let e = _mm512_setr_ps(
-1., 0., 1., 2., 3., 4., 5., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fmsub_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_set1_ps(1.);
let r = _mm512_maskz_fmsub_ps(0, a, b, c);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fmsub_ps(0b00000000_11111111, a, b, c);
let e = _mm512_setr_ps(
-1., 0., 1., 2., 3., 4., 5., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fmsub_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
);
let r = _mm512_mask3_fmsub_ps(a, b, c, 0);
assert_eq_m512(r, c);
let r = _mm512_mask3_fmsub_ps(a, b, c, 0b00000000_11111111);
let e = _mm512_setr_ps(
-1., 0., 1., 2., 3., 4., 5., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_fmsub_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let c = _mm256_set1_ps(1.);
let r = _mm256_mask_fmsub_ps(a, 0, b, c);
assert_eq_m256(r, a);
let r = _mm256_mask_fmsub_ps(a, 0b11111111, b, c);
let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_fmsub_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let c = _mm256_set1_ps(1.);
let r = _mm256_maskz_fmsub_ps(0, a, b, c);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_fmsub_ps(0b11111111, a, b, c);
let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask3_fmsub_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let c = _mm256_set1_ps(1.);
let r = _mm256_mask3_fmsub_ps(a, b, c, 0);
assert_eq_m256(r, c);
let r = _mm256_mask3_fmsub_ps(a, b, c, 0b11111111);
let e = _mm256_set_ps(-1., 0., 1., 2., 3., 4., 5., 6.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_fmsub_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set_ps(0., 1., 2., 3.);
let c = _mm_set1_ps(1.);
let r = _mm_mask_fmsub_ps(a, 0, b, c);
assert_eq_m128(r, a);
let r = _mm_mask_fmsub_ps(a, 0b00001111, b, c);
let e = _mm_set_ps(-1., 0., 1., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_fmsub_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set_ps(0., 1., 2., 3.);
let c = _mm_set1_ps(1.);
let r = _mm_maskz_fmsub_ps(0, a, b, c);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_fmsub_ps(0b00001111, a, b, c);
let e = _mm_set_ps(-1., 0., 1., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask3_fmsub_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set_ps(0., 1., 2., 3.);
let c = _mm_set1_ps(1.);
let r = _mm_mask3_fmsub_ps(a, b, c, 0);
assert_eq_m128(r, c);
let r = _mm_mask3_fmsub_ps(a, b, c, 0b00001111);
let e = _mm_set_ps(-1., 0., 1., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fmaddsub_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_set1_ps(1.);
let r = _mm512_fmaddsub_ps(a, b, c);
let e = _mm512_setr_ps(
-1., 2., 1., 4., 3., 6., 5., 8., 7., 10., 9., 12., 11., 14., 13., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fmaddsub_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_set1_ps(1.);
let r = _mm512_mask_fmaddsub_ps(a, 0, b, c);
assert_eq_m512(r, a);
let r = _mm512_mask_fmaddsub_ps(a, 0b00000000_11111111, b, c);
let e = _mm512_setr_ps(
-1., 2., 1., 4., 3., 6., 5., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fmaddsub_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_set1_ps(1.);
let r = _mm512_maskz_fmaddsub_ps(0, a, b, c);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fmaddsub_ps(0b00000000_11111111, a, b, c);
let e = _mm512_setr_ps(
-1., 2., 1., 4., 3., 6., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fmaddsub_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
);
let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0);
assert_eq_m512(r, c);
let r = _mm512_mask3_fmaddsub_ps(a, b, c, 0b00000000_11111111);
let e = _mm512_setr_ps(
-1., 2., 1., 4., 3., 6., 5., 8., 2., 2., 2., 2., 2., 2., 2., 2.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_fmaddsub_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let c = _mm256_set1_ps(1.);
let r = _mm256_mask_fmaddsub_ps(a, 0, b, c);
assert_eq_m256(r, a);
let r = _mm256_mask_fmaddsub_ps(a, 0b11111111, b, c);
let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_fmaddsub_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let c = _mm256_set1_ps(1.);
let r = _mm256_maskz_fmaddsub_ps(0, a, b, c);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_fmaddsub_ps(0b11111111, a, b, c);
let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask3_fmaddsub_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let c = _mm256_set1_ps(1.);
let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0);
assert_eq_m256(r, c);
let r = _mm256_mask3_fmaddsub_ps(a, b, c, 0b11111111);
let e = _mm256_set_ps(1., 0., 3., 2., 5., 4., 7., 6.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_fmaddsub_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set_ps(0., 1., 2., 3.);
let c = _mm_set1_ps(1.);
let r = _mm_mask_fmaddsub_ps(a, 0, b, c);
assert_eq_m128(r, a);
let r = _mm_mask_fmaddsub_ps(a, 0b00001111, b, c);
let e = _mm_set_ps(1., 0., 3., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_fmaddsub_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set_ps(0., 1., 2., 3.);
let c = _mm_set1_ps(1.);
let r = _mm_maskz_fmaddsub_ps(0, a, b, c);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_fmaddsub_ps(0b00001111, a, b, c);
let e = _mm_set_ps(1., 0., 3., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask3_fmaddsub_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set_ps(0., 1., 2., 3.);
let c = _mm_set1_ps(1.);
let r = _mm_mask3_fmaddsub_ps(a, b, c, 0);
assert_eq_m128(r, c);
let r = _mm_mask3_fmaddsub_ps(a, b, c, 0b00001111);
let e = _mm_set_ps(1., 0., 3., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fmsubadd_ps() {
let a = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
);
let r = _mm512_fmsubadd_ps(a, b, c);
let e = _mm512_setr_ps(
1., 0., 3., 2., 5., 4., 7., 6., 9., 8., 11., 10., 13., 12., 15., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fmsubadd_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_set1_ps(1.);
let r = _mm512_mask_fmsubadd_ps(a, 0, b, c);
assert_eq_m512(r, a);
let r = _mm512_mask_fmsubadd_ps(a, 0b00000000_11111111, b, c);
let e = _mm512_setr_ps(
1., 0., 3., 2., 5., 4., 7., 6., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fmsubadd_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_set1_ps(1.);
let r = _mm512_maskz_fmsubadd_ps(0, a, b, c);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fmsubadd_ps(0b00000000_11111111, a, b, c);
let e = _mm512_setr_ps(
1., 0., 3., 2., 5., 4., 7., 6., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fmsubadd_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
);
let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0);
assert_eq_m512(r, c);
let r = _mm512_mask3_fmsubadd_ps(a, b, c, 0b00000000_11111111);
let e = _mm512_setr_ps(
1., 0., 3., 2., 5., 4., 7., 6., 2., 2., 2., 2., 2., 2., 2., 2.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_fmsubadd_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let c = _mm256_set1_ps(1.);
let r = _mm256_mask_fmsubadd_ps(a, 0, b, c);
assert_eq_m256(r, a);
let r = _mm256_mask_fmsubadd_ps(a, 0b11111111, b, c);
let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_fmsubadd_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let c = _mm256_set1_ps(1.);
let r = _mm256_maskz_fmsubadd_ps(0, a, b, c);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_fmsubadd_ps(0b11111111, a, b, c);
let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask3_fmsubadd_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let c = _mm256_set1_ps(1.);
let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0);
assert_eq_m256(r, c);
let r = _mm256_mask3_fmsubadd_ps(a, b, c, 0b11111111);
let e = _mm256_set_ps(-1., 2., 1., 4., 3., 6., 5., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_fmsubadd_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set_ps(0., 1., 2., 3.);
let c = _mm_set1_ps(1.);
let r = _mm_mask_fmsubadd_ps(a, 0, b, c);
assert_eq_m128(r, a);
let r = _mm_mask_fmsubadd_ps(a, 0b00001111, b, c);
let e = _mm_set_ps(-1., 2., 1., 4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_fmsubadd_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set_ps(0., 1., 2., 3.);
let c = _mm_set1_ps(1.);
let r = _mm_maskz_fmsubadd_ps(0, a, b, c);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_fmsubadd_ps(0b00001111, a, b, c);
let e = _mm_set_ps(-1., 2., 1., 4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask3_fmsubadd_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set_ps(0., 1., 2., 3.);
let c = _mm_set1_ps(1.);
let r = _mm_mask3_fmsubadd_ps(a, b, c, 0);
assert_eq_m128(r, c);
let r = _mm_mask3_fmsubadd_ps(a, b, c, 0b00001111);
let e = _mm_set_ps(-1., 2., 1., 4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fnmadd_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_set1_ps(1.);
let r = _mm512_fnmadd_ps(a, b, c);
let e = _mm512_setr_ps(
1., 0., -1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fnmadd_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_set1_ps(1.);
let r = _mm512_mask_fnmadd_ps(a, 0, b, c);
assert_eq_m512(r, a);
let r = _mm512_mask_fnmadd_ps(a, 0b00000000_11111111, b, c);
let e = _mm512_setr_ps(
1., 0., -1., -2., -3., -4., -5., -6., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fnmadd_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_set1_ps(1.);
let r = _mm512_maskz_fnmadd_ps(0, a, b, c);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fnmadd_ps(0b00000000_11111111, a, b, c);
let e = _mm512_setr_ps(
1., 0., -1., -2., -3., -4., -5., -6., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fnmadd_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
);
let r = _mm512_mask3_fnmadd_ps(a, b, c, 0);
assert_eq_m512(r, c);
let r = _mm512_mask3_fnmadd_ps(a, b, c, 0b00000000_11111111);
let e = _mm512_setr_ps(
1., 0., -1., -2., -3., -4., -5., -6., 2., 2., 2., 2., 2., 2., 2., 2.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_fnmadd_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let c = _mm256_set1_ps(1.);
let r = _mm256_mask_fnmadd_ps(a, 0, b, c);
assert_eq_m256(r, a);
let r = _mm256_mask_fnmadd_ps(a, 0b11111111, b, c);
let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_fnmadd_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let c = _mm256_set1_ps(1.);
let r = _mm256_maskz_fnmadd_ps(0, a, b, c);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_fnmadd_ps(0b11111111, a, b, c);
let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask3_fnmadd_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let c = _mm256_set1_ps(1.);
let r = _mm256_mask3_fnmadd_ps(a, b, c, 0);
assert_eq_m256(r, c);
let r = _mm256_mask3_fnmadd_ps(a, b, c, 0b11111111);
let e = _mm256_set_ps(1., 0., -1., -2., -3., -4., -5., -6.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_fnmadd_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set_ps(0., 1., 2., 3.);
let c = _mm_set1_ps(1.);
let r = _mm_mask_fnmadd_ps(a, 0, b, c);
assert_eq_m128(r, a);
let r = _mm_mask_fnmadd_ps(a, 0b00001111, b, c);
let e = _mm_set_ps(1., 0., -1., -2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_fnmadd_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set_ps(0., 1., 2., 3.);
let c = _mm_set1_ps(1.);
let r = _mm_maskz_fnmadd_ps(0, a, b, c);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_fnmadd_ps(0b00001111, a, b, c);
let e = _mm_set_ps(1., 0., -1., -2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask3_fnmadd_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set_ps(0., 1., 2., 3.);
let c = _mm_set1_ps(1.);
let r = _mm_mask3_fnmadd_ps(a, b, c, 0);
assert_eq_m128(r, c);
let r = _mm_mask3_fnmadd_ps(a, b, c, 0b00001111);
let e = _mm_set_ps(1., 0., -1., -2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fnmsub_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_set1_ps(1.);
let r = _mm512_fnmsub_ps(a, b, c);
let e = _mm512_setr_ps(
-1., -2., -3., -4., -5., -6., -7., -8., -9., -10., -11., -12., -13., -14., -15., -16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fnmsub_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_set1_ps(1.);
let r = _mm512_mask_fnmsub_ps(a, 0, b, c);
assert_eq_m512(r, a);
let r = _mm512_mask_fnmsub_ps(a, 0b00000000_11111111, b, c);
let e = _mm512_setr_ps(
-1., -2., -3., -4., -5., -6., -7., -8., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fnmsub_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_set1_ps(1.);
let r = _mm512_maskz_fnmsub_ps(0, a, b, c);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fnmsub_ps(0b00000000_11111111, a, b, c);
let e = _mm512_setr_ps(
-1., -2., -3., -4., -5., -6., -7., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fnmsub_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let c = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 2., 2., 2., 2., 2., 2., 2., 2.,
);
let r = _mm512_mask3_fnmsub_ps(a, b, c, 0);
assert_eq_m512(r, c);
let r = _mm512_mask3_fnmsub_ps(a, b, c, 0b00000000_11111111);
let e = _mm512_setr_ps(
-1., -2., -3., -4., -5., -6., -7., -8., 2., 2., 2., 2., 2., 2., 2., 2.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_fnmsub_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let c = _mm256_set1_ps(1.);
let r = _mm256_mask_fnmsub_ps(a, 0, b, c);
assert_eq_m256(r, a);
let r = _mm256_mask_fnmsub_ps(a, 0b11111111, b, c);
let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_fnmsub_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let c = _mm256_set1_ps(1.);
let r = _mm256_maskz_fnmsub_ps(0, a, b, c);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_fnmsub_ps(0b11111111, a, b, c);
let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask3_fnmsub_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let c = _mm256_set1_ps(1.);
let r = _mm256_mask3_fnmsub_ps(a, b, c, 0);
assert_eq_m256(r, c);
let r = _mm256_mask3_fnmsub_ps(a, b, c, 0b11111111);
let e = _mm256_set_ps(-1., -2., -3., -4., -5., -6., -7., -8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_fnmsub_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set_ps(0., 1., 2., 3.);
let c = _mm_set1_ps(1.);
let r = _mm_mask_fnmsub_ps(a, 0, b, c);
assert_eq_m128(r, a);
let r = _mm_mask_fnmsub_ps(a, 0b00001111, b, c);
let e = _mm_set_ps(-1., -2., -3., -4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_fnmsub_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set_ps(0., 1., 2., 3.);
let c = _mm_set1_ps(1.);
let r = _mm_maskz_fnmsub_ps(0, a, b, c);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_fnmsub_ps(0b00001111, a, b, c);
let e = _mm_set_ps(-1., -2., -3., -4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask3_fnmsub_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set_ps(0., 1., 2., 3.);
let c = _mm_set1_ps(1.);
let r = _mm_mask3_fnmsub_ps(a, b, c, 0);
assert_eq_m128(r, c);
let r = _mm_mask3_fnmsub_ps(a, b, c, 0b00001111);
let e = _mm_set_ps(-1., -2., -3., -4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_rcp14_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_rcp14_ps(a);
let e = _mm512_set1_ps(0.33333206);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_rcp14_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_mask_rcp14_ps(a, 0, a);
assert_eq_m512(r, a);
let r = _mm512_mask_rcp14_ps(a, 0b11111111_00000000, a);
let e = _mm512_setr_ps(
3., 3., 3., 3., 3., 3., 3., 3., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
0.33333206, 0.33333206, 0.33333206, 0.33333206,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_rcp14_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_maskz_rcp14_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_rcp14_ps(0b11111111_00000000, a);
let e = _mm512_setr_ps(
0., 0., 0., 0., 0., 0., 0., 0., 0.33333206, 0.33333206, 0.33333206, 0.33333206,
0.33333206, 0.33333206, 0.33333206, 0.33333206,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_rcp14_ps() {
let a = _mm256_set1_ps(3.);
let r = _mm256_rcp14_ps(a);
let e = _mm256_set1_ps(0.33333206);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_rcp14_ps() {
let a = _mm256_set1_ps(3.);
let r = _mm256_mask_rcp14_ps(a, 0, a);
assert_eq_m256(r, a);
let r = _mm256_mask_rcp14_ps(a, 0b11111111, a);
let e = _mm256_set1_ps(0.33333206);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_rcp14_ps() {
let a = _mm256_set1_ps(3.);
let r = _mm256_maskz_rcp14_ps(0, a);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_rcp14_ps(0b11111111, a);
let e = _mm256_set1_ps(0.33333206);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_rcp14_ps() {
let a = _mm_set1_ps(3.);
let r = _mm_rcp14_ps(a);
let e = _mm_set1_ps(0.33333206);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_rcp14_ps() {
let a = _mm_set1_ps(3.);
let r = _mm_mask_rcp14_ps(a, 0, a);
assert_eq_m128(r, a);
let r = _mm_mask_rcp14_ps(a, 0b00001111, a);
let e = _mm_set1_ps(0.33333206);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_rcp14_ps() {
let a = _mm_set1_ps(3.);
let r = _mm_maskz_rcp14_ps(0, a);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_rcp14_ps(0b00001111, a);
let e = _mm_set1_ps(0.33333206);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_rsqrt14_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_rsqrt14_ps(a);
let e = _mm512_set1_ps(0.5773392);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_rsqrt14_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_mask_rsqrt14_ps(a, 0, a);
assert_eq_m512(r, a);
let r = _mm512_mask_rsqrt14_ps(a, 0b11111111_00000000, a);
let e = _mm512_setr_ps(
3., 3., 3., 3., 3., 3., 3., 3., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
0.5773392, 0.5773392, 0.5773392,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_rsqrt14_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_maskz_rsqrt14_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_rsqrt14_ps(0b11111111_00000000, a);
let e = _mm512_setr_ps(
0., 0., 0., 0., 0., 0., 0., 0., 0.5773392, 0.5773392, 0.5773392, 0.5773392, 0.5773392,
0.5773392, 0.5773392, 0.5773392,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_rsqrt14_ps() {
let a = _mm256_set1_ps(3.);
let r = _mm256_mask_rsqrt14_ps(a, 0, a);
assert_eq_m256(r, a);
let r = _mm256_mask_rsqrt14_ps(a, 0b11111111, a);
let e = _mm256_set1_ps(0.5773392);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_rsqrt14_ps() {
let a = _mm256_set1_ps(3.);
let r = _mm256_maskz_rsqrt14_ps(0, a);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_rsqrt14_ps(0b11111111, a);
let e = _mm256_set1_ps(0.5773392);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_rsqrt14_ps() {
let a = _mm_set1_ps(3.);
let r = _mm_mask_rsqrt14_ps(a, 0, a);
assert_eq_m128(r, a);
let r = _mm_mask_rsqrt14_ps(a, 0b00001111, a);
let e = _mm_set1_ps(0.5773392);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_rsqrt14_ps() {
let a = _mm_set1_ps(3.);
let r = _mm_maskz_rsqrt14_ps(0, a);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_rsqrt14_ps(0b00001111, a);
let e = _mm_set1_ps(0.5773392);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_getexp_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_getexp_ps(a);
let e = _mm512_set1_ps(1.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_getexp_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_mask_getexp_ps(a, 0, a);
assert_eq_m512(r, a);
let r = _mm512_mask_getexp_ps(a, 0b11111111_00000000, a);
let e = _mm512_setr_ps(
3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_getexp_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_maskz_getexp_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_getexp_ps(0b11111111_00000000, a);
let e = _mm512_setr_ps(
0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_getexp_ps() {
let a = _mm256_set1_ps(3.);
let r = _mm256_getexp_ps(a);
let e = _mm256_set1_ps(1.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_getexp_ps() {
let a = _mm256_set1_ps(3.);
let r = _mm256_mask_getexp_ps(a, 0, a);
assert_eq_m256(r, a);
let r = _mm256_mask_getexp_ps(a, 0b11111111, a);
let e = _mm256_set1_ps(1.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_getexp_ps() {
let a = _mm256_set1_ps(3.);
let r = _mm256_maskz_getexp_ps(0, a);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_getexp_ps(0b11111111, a);
let e = _mm256_set1_ps(1.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_getexp_ps() {
let a = _mm_set1_ps(3.);
let r = _mm_getexp_ps(a);
let e = _mm_set1_ps(1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_getexp_ps() {
let a = _mm_set1_ps(3.);
let r = _mm_mask_getexp_ps(a, 0, a);
assert_eq_m128(r, a);
let r = _mm_mask_getexp_ps(a, 0b00001111, a);
let e = _mm_set1_ps(1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_getexp_ps() {
let a = _mm_set1_ps(3.);
let r = _mm_maskz_getexp_ps(0, a);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_getexp_ps(0b00001111, a);
let e = _mm_set1_ps(1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_roundscale_ps() {
let a = _mm512_set1_ps(1.1);
let r = _mm512_roundscale_ps(a, 0);
let e = _mm512_set1_ps(1.0);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_roundscale_ps() {
let a = _mm512_set1_ps(1.1);
let r = _mm512_mask_roundscale_ps(a, 0, a, 0);
let e = _mm512_set1_ps(1.1);
assert_eq_m512(r, e);
let r = _mm512_mask_roundscale_ps(a, 0b11111111_11111111, a, 0);
let e = _mm512_set1_ps(1.0);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_roundscale_ps() {
let a = _mm512_set1_ps(1.1);
let r = _mm512_maskz_roundscale_ps(0, a, 0);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_roundscale_ps(0b11111111_11111111, a, 0);
let e = _mm512_set1_ps(1.0);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_roundscale_ps() {
let a = _mm256_set1_ps(1.1);
let r = _mm256_roundscale_ps(a, 0);
let e = _mm256_set1_ps(1.0);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_roundscale_ps() {
let a = _mm256_set1_ps(1.1);
let r = _mm256_mask_roundscale_ps(a, 0, a, 0);
let e = _mm256_set1_ps(1.1);
assert_eq_m256(r, e);
let r = _mm256_mask_roundscale_ps(a, 0b11111111, a, 0);
let e = _mm256_set1_ps(1.0);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_roundscale_ps() {
let a = _mm256_set1_ps(1.1);
let r = _mm256_maskz_roundscale_ps(0, a, 0);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_roundscale_ps(0b11111111, a, 0);
let e = _mm256_set1_ps(1.0);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_roundscale_ps() {
let a = _mm_set1_ps(1.1);
let r = _mm_roundscale_ps(a, 0);
let e = _mm_set1_ps(1.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_roundscale_ps() {
let a = _mm_set1_ps(1.1);
let r = _mm_mask_roundscale_ps(a, 0, a, 0);
let e = _mm_set1_ps(1.1);
assert_eq_m128(r, e);
let r = _mm_mask_roundscale_ps(a, 0b00001111, a, 0);
let e = _mm_set1_ps(1.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_roundscale_ps() {
let a = _mm_set1_ps(1.1);
let r = _mm_maskz_roundscale_ps(0, a, 0);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_roundscale_ps(0b00001111, a, 0);
let e = _mm_set1_ps(1.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_scalef_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(3.);
let r = _mm512_scalef_ps(a, b);
let e = _mm512_set1_ps(8.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_scalef_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(3.);
let r = _mm512_mask_scalef_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_scalef_ps(a, 0b11111111_00000000, a, b);
let e = _mm512_set_ps(
8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_scalef_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(3.);
let r = _mm512_maskz_scalef_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_scalef_ps(0b11111111_00000000, a, b);
let e = _mm512_set_ps(
8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_scalef_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set1_ps(3.);
let r = _mm256_scalef_ps(a, b);
let e = _mm256_set1_ps(8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_scalef_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set1_ps(3.);
let r = _mm256_mask_scalef_ps(a, 0, a, b);
assert_eq_m256(r, a);
let r = _mm256_mask_scalef_ps(a, 0b11111111, a, b);
let e = _mm256_set1_ps(8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_scalef_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set1_ps(3.);
let r = _mm256_maskz_scalef_ps(0, a, b);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_scalef_ps(0b11111111, a, b);
let e = _mm256_set1_ps(8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_scalef_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(3.);
let r = _mm_scalef_ps(a, b);
let e = _mm_set1_ps(8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_scalef_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(3.);
let r = _mm_mask_scalef_ps(a, 0, a, b);
assert_eq_m128(r, a);
let r = _mm_mask_scalef_ps(a, 0b00001111, a, b);
let e = _mm_set1_ps(8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_scalef_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(3.);
let r = _mm_maskz_scalef_ps(0, a, b);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_scalef_ps(0b00001111, a, b);
let e = _mm_set1_ps(8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fixupimm_ps() {
let a = _mm512_set1_ps(f32::NAN);
let b = _mm512_set1_ps(f32::MAX);
let c = _mm512_set1_epi32(i32::MAX);
let r = _mm512_fixupimm_ps(a, b, c, 5);
let e = _mm512_set1_ps(0.0);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fixupimm_ps() {
#[rustfmt::skip]
let a = _mm512_set_ps(
f32::NAN, f32::NAN, f32::NAN, f32::NAN,
f32::NAN, f32::NAN, f32::NAN, f32::NAN,
1., 1., 1., 1.,
1., 1., 1., 1.,
);
let b = _mm512_set1_ps(f32::MAX);
let c = _mm512_set1_epi32(i32::MAX);
let r = _mm512_mask_fixupimm_ps(a, 0b11111111_00000000, b, c, 5);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fixupimm_ps() {
#[rustfmt::skip]
let a = _mm512_set_ps(
f32::NAN, f32::NAN, f32::NAN, f32::NAN,
f32::NAN, f32::NAN, f32::NAN, f32::NAN,
1., 1., 1., 1.,
1., 1., 1., 1.,
);
let b = _mm512_set1_ps(f32::MAX);
let c = _mm512_set1_epi32(i32::MAX);
let r = _mm512_maskz_fixupimm_ps(0b11111111_00000000, a, b, c, 5);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_fixupimm_ps() {
let a = _mm256_set1_ps(f32::NAN);
let b = _mm256_set1_ps(f32::MAX);
let c = _mm256_set1_epi32(i32::MAX);
let r = _mm256_fixupimm_ps(a, b, c, 5);
let e = _mm256_set1_ps(0.0);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_fixupimm_ps() {
let a = _mm256_set1_ps(f32::NAN);
let b = _mm256_set1_ps(f32::MAX);
let c = _mm256_set1_epi32(i32::MAX);
let r = _mm256_mask_fixupimm_ps(a, 0b11111111, b, c, 5);
let e = _mm256_set1_ps(0.0);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_fixupimm_ps() {
let a = _mm256_set1_ps(f32::NAN);
let b = _mm256_set1_ps(f32::MAX);
let c = _mm256_set1_epi32(i32::MAX);
let r = _mm256_maskz_fixupimm_ps(0b11111111, a, b, c, 5);
let e = _mm256_set1_ps(0.0);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_fixupimm_ps() {
let a = _mm_set1_ps(f32::NAN);
let b = _mm_set1_ps(f32::MAX);
let c = _mm_set1_epi32(i32::MAX);
let r = _mm_fixupimm_ps(a, b, c, 5);
let e = _mm_set1_ps(0.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_fixupimm_ps() {
let a = _mm_set1_ps(f32::NAN);
let b = _mm_set1_ps(f32::MAX);
let c = _mm_set1_epi32(i32::MAX);
let r = _mm_mask_fixupimm_ps(a, 0b00001111, b, c, 5);
let e = _mm_set1_ps(0.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_fixupimm_ps() {
let a = _mm_set1_ps(f32::NAN);
let b = _mm_set1_ps(f32::MAX);
let c = _mm_set1_epi32(i32::MAX);
let r = _mm_maskz_fixupimm_ps(0b00001111, a, b, c, 5);
let e = _mm_set1_ps(0.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_ternarylogic_epi32() {
let a = _mm512_set1_epi32(1 << 2);
let b = _mm512_set1_epi32(1 << 1);
let c = _mm512_set1_epi32(1 << 0);
let r = _mm512_ternarylogic_epi32(a, b, c, 8);
let e = _mm512_set1_epi32(0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_ternarylogic_epi32() {
let src = _mm512_set1_epi32(1 << 2);
let a = _mm512_set1_epi32(1 << 1);
let b = _mm512_set1_epi32(1 << 0);
let r = _mm512_mask_ternarylogic_epi32(src, 0, a, b, 8);
assert_eq_m512i(r, src);
let r = _mm512_mask_ternarylogic_epi32(src, 0b11111111_11111111, a, b, 8);
let e = _mm512_set1_epi32(0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_ternarylogic_epi32() {
let a = _mm512_set1_epi32(1 << 2);
let b = _mm512_set1_epi32(1 << 1);
let c = _mm512_set1_epi32(1 << 0);
let r = _mm512_maskz_ternarylogic_epi32(0, a, b, c, 9);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_ternarylogic_epi32(0b11111111_11111111, a, b, c, 8);
let e = _mm512_set1_epi32(0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_ternarylogic_epi32() {
let a = _mm256_set1_epi32(1 << 2);
let b = _mm256_set1_epi32(1 << 1);
let c = _mm256_set1_epi32(1 << 0);
let r = _mm256_ternarylogic_epi32(a, b, c, 8);
let e = _mm256_set1_epi32(0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_ternarylogic_epi32() {
let src = _mm256_set1_epi32(1 << 2);
let a = _mm256_set1_epi32(1 << 1);
let b = _mm256_set1_epi32(1 << 0);
let r = _mm256_mask_ternarylogic_epi32(src, 0, a, b, 8);
assert_eq_m256i(r, src);
let r = _mm256_mask_ternarylogic_epi32(src, 0b11111111, a, b, 8);
let e = _mm256_set1_epi32(0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_ternarylogic_epi32() {
let a = _mm256_set1_epi32(1 << 2);
let b = _mm256_set1_epi32(1 << 1);
let c = _mm256_set1_epi32(1 << 0);
let r = _mm256_maskz_ternarylogic_epi32(0, a, b, c, 9);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_ternarylogic_epi32(0b11111111, a, b, c, 8);
let e = _mm256_set1_epi32(0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_ternarylogic_epi32() {
let a = _mm_set1_epi32(1 << 2);
let b = _mm_set1_epi32(1 << 1);
let c = _mm_set1_epi32(1 << 0);
let r = _mm_ternarylogic_epi32(a, b, c, 8);
let e = _mm_set1_epi32(0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_ternarylogic_epi32() {
let src = _mm_set1_epi32(1 << 2);
let a = _mm_set1_epi32(1 << 1);
let b = _mm_set1_epi32(1 << 0);
let r = _mm_mask_ternarylogic_epi32(src, 0, a, b, 8);
assert_eq_m128i(r, src);
let r = _mm_mask_ternarylogic_epi32(src, 0b00001111, a, b, 8);
let e = _mm_set1_epi32(0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_ternarylogic_epi32() {
let a = _mm_set1_epi32(1 << 2);
let b = _mm_set1_epi32(1 << 1);
let c = _mm_set1_epi32(1 << 0);
let r = _mm_maskz_ternarylogic_epi32(0, a, b, c, 9);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_ternarylogic_epi32(0b00001111, a, b, c, 8);
let e = _mm_set1_epi32(0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_getmant_ps() {
let a = _mm512_set1_ps(10.);
let r = _mm512_getmant_ps(a, _MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN);
let e = _mm512_set1_ps(1.25);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_getmant_ps() {
let a = _mm512_set1_ps(10.);
let r = _mm512_mask_getmant_ps(a, 0, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
assert_eq_m512(r, a);
let r = _mm512_mask_getmant_ps(
a,
0b11111111_00000000,
a,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
);
let e = _mm512_setr_ps(
10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_getmant_ps() {
let a = _mm512_set1_ps(10.);
let r = _mm512_maskz_getmant_ps(0, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
assert_eq_m512(r, _mm512_setzero_ps());
let r =
_mm512_maskz_getmant_ps(0b11111111_00000000, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm512_setr_ps(
0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_getmant_ps() {
let a = _mm256_set1_ps(10.);
let r = _mm256_getmant_ps(a, _MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN);
let e = _mm256_set1_ps(1.25);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_getmant_ps() {
let a = _mm256_set1_ps(10.);
let r = _mm256_mask_getmant_ps(a, 0, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
assert_eq_m256(r, a);
let r = _mm256_mask_getmant_ps(a, 0b11111111, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm256_set1_ps(1.25);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_getmant_ps() {
let a = _mm256_set1_ps(10.);
let r = _mm256_maskz_getmant_ps(0, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_getmant_ps(0b11111111, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm256_set1_ps(1.25);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_getmant_ps() {
let a = _mm_set1_ps(10.);
let r = _mm_getmant_ps(a, _MM_MANT_NORM_P75_1P5, _MM_MANT_SIGN_NAN);
let e = _mm_set1_ps(1.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_getmant_ps() {
let a = _mm_set1_ps(10.);
let r = _mm_mask_getmant_ps(a, 0, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
assert_eq_m128(r, a);
let r = _mm_mask_getmant_ps(a, 0b00001111, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set1_ps(1.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_getmant_ps() {
let a = _mm_set1_ps(10.);
let r = _mm_maskz_getmant_ps(0, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_getmant_ps(0b00001111, a, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set1_ps(1.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_add_round_ps() {
let a = _mm512_setr_ps(
0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
);
let b = _mm512_set1_ps(-1.);
let r = _mm512_add_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
#[rustfmt::skip]
let e = _mm512_setr_ps(
-1., 0.5, 1., 2.5,
3., 4.5, 5., 6.5,
7., 8.5, 9., 10.5,
11., 12.5, 13., -0.99999994,
);
assert_eq_m512(r, e);
let r = _mm512_add_round_ps(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
-1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_add_round_ps() {
let a = _mm512_setr_ps(
0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
);
let b = _mm512_set1_ps(-1.);
let r = _mm512_mask_add_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_add_round_ps(
a,
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 1.5, 2., 3.5,
4., 5.5, 6., 7.5,
7., 8.5, 9., 10.5,
11., 12.5, 13., -0.99999994,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_add_round_ps() {
let a = _mm512_setr_ps(
0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
);
let b = _mm512_set1_ps(-1.);
let r = _mm512_maskz_add_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_add_round_ps(
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 0., 0., 0.,
0., 0., 0., 0.,
7., 8.5, 9., 10.5,
11., 12.5, 13., -0.99999994,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_sub_round_ps() {
let a = _mm512_setr_ps(
0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
);
let b = _mm512_set1_ps(1.);
let r = _mm512_sub_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
#[rustfmt::skip]
let e = _mm512_setr_ps(
-1., 0.5, 1., 2.5,
3., 4.5, 5., 6.5,
7., 8.5, 9., 10.5,
11., 12.5, 13., -0.99999994,
);
assert_eq_m512(r, e);
let r = _mm512_sub_round_ps(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
-1., 0.5, 1., 2.5, 3., 4.5, 5., 6.5, 7., 8.5, 9., 10.5, 11., 12.5, 13., -0.9999999,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_sub_round_ps() {
let a = _mm512_setr_ps(
0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
);
let b = _mm512_set1_ps(1.);
let r = _mm512_mask_sub_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_sub_round_ps(
a,
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 1.5, 2., 3.5,
4., 5.5, 6., 7.5,
7., 8.5, 9., 10.5,
11., 12.5, 13., -0.99999994,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_sub_round_ps() {
let a = _mm512_setr_ps(
0., 1.5, 2., 3.5, 4., 5.5, 6., 7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 0.00000007,
);
let b = _mm512_set1_ps(1.);
let r = _mm512_maskz_sub_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_sub_round_ps(
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 0., 0., 0.,
0., 0., 0., 0.,
7., 8.5, 9., 10.5,
11., 12.5, 13., -0.99999994,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mul_round_ps() {
#[rustfmt::skip]
let a = _mm512_setr_ps(
0., 1.5, 2., 3.5,
4., 5.5, 6., 7.5,
8., 9.5, 10., 11.5,
12., 13.5, 14., 0.00000000000000000000007,
);
let b = _mm512_set1_ps(0.1);
let r = _mm512_mul_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 0.15, 0.2, 0.35,
0.4, 0.55, 0.6, 0.75,
0.8, 0.95, 1.0, 1.15,
1.2, 1.35, 1.4, 0.000000000000000000000007000001,
);
assert_eq_m512(r, e);
let r = _mm512_mul_round_ps(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 0.14999999, 0.2, 0.35,
0.4, 0.54999995, 0.59999996, 0.75,
0.8, 0.95, 1.0, 1.15,
1.1999999, 1.3499999, 1.4, 0.000000000000000000000007,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_mul_round_ps() {
#[rustfmt::skip]
let a = _mm512_setr_ps(
0., 1.5, 2., 3.5,
4., 5.5, 6., 7.5,
8., 9.5, 10., 11.5,
12., 13.5, 14., 0.00000000000000000000007,
);
let b = _mm512_set1_ps(0.1);
let r = _mm512_mask_mul_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_mul_round_ps(
a,
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 1.5, 2., 3.5,
4., 5.5, 6., 7.5,
0.8, 0.95, 1.0, 1.15,
1.2, 1.35, 1.4, 0.000000000000000000000007000001,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_mul_round_ps() {
#[rustfmt::skip]
let a = _mm512_setr_ps(
0., 1.5, 2., 3.5,
4., 5.5, 6., 7.5,
8., 9.5, 10., 11.5,
12., 13.5, 14., 0.00000000000000000000007,
);
let b = _mm512_set1_ps(0.1);
let r = _mm512_maskz_mul_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_mul_round_ps(
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 0., 0., 0.,
0., 0., 0., 0.,
0.8, 0.95, 1.0, 1.15,
1.2, 1.35, 1.4, 0.000000000000000000000007000001,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_div_round_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(3.);
let r = _mm512_div_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(0.33333334);
assert_eq_m512(r, e);
let r = _mm512_div_round_ps(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(0.3333333);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_div_round_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(3.);
let r = _mm512_mask_div_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_div_round_ps(
a,
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
0.33333334, 0.33333334, 0.33333334, 0.33333334,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_div_round_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(3.);
let r = _mm512_maskz_div_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_div_round_ps(
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0., 0., 0., 0., 0., 0., 0., 0., 0.33333334, 0.33333334, 0.33333334, 0.33333334,
0.33333334, 0.33333334, 0.33333334, 0.33333334,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_sqrt_round_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_sqrt_round_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(1.7320508);
assert_eq_m512(r, e);
let r = _mm512_sqrt_round_ps(a, _MM_FROUND_TO_POS_INF | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(1.7320509);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_sqrt_round_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_mask_sqrt_round_ps(a, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_sqrt_round_ps(
a,
0b11111111_00000000,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
3., 3., 3., 3., 3., 3., 3., 3., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
1.7320508, 1.7320508, 1.7320508,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_sqrt_round_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_maskz_sqrt_round_ps(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_sqrt_round_ps(
0b11111111_00000000,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0., 0., 0., 0., 0., 0., 0., 0., 1.7320508, 1.7320508, 1.7320508, 1.7320508, 1.7320508,
1.7320508, 1.7320508, 1.7320508,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fmadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_fmadd_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(-0.99999994);
assert_eq_m512(r, e);
let r = _mm512_fmadd_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(-0.9999999);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fmadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r =
_mm512_mask_fmadd_round_ps(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_fmadd_round_ps(
a,
0b00000000_11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
-0.99999994, -0.99999994, -0.99999994, -0.99999994,
-0.99999994, -0.99999994, -0.99999994, -0.99999994,
0.00000007, 0.00000007, 0.00000007, 0.00000007,
0.00000007, 0.00000007, 0.00000007, 0.00000007,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fmadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r =
_mm512_maskz_fmadd_round_ps(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
#[rustfmt::skip]
let r = _mm512_maskz_fmadd_round_ps(
0b00000000_11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
-0.99999994, -0.99999994, -0.99999994, -0.99999994,
-0.99999994, -0.99999994, -0.99999994, -0.99999994,
0., 0., 0., 0.,
0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fmadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r =
_mm512_mask3_fmadd_round_ps(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, c);
let r = _mm512_mask3_fmadd_round_ps(
a,
b,
c,
0b00000000_11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
-0.99999994, -0.99999994, -0.99999994, -0.99999994,
-0.99999994, -0.99999994, -0.99999994, -0.99999994,
-1., -1., -1., -1.,
-1., -1., -1., -1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fmsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(1.);
let r = _mm512_fmsub_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(-0.99999994);
assert_eq_m512(r, e);
let r = _mm512_fmsub_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(-0.9999999);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fmsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(1.);
let r =
_mm512_mask_fmsub_round_ps(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_fmsub_round_ps(
a,
0b00000000_11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
-0.99999994, -0.99999994, -0.99999994, -0.99999994,
-0.99999994, -0.99999994, -0.99999994, -0.99999994,
0.00000007, 0.00000007, 0.00000007, 0.00000007,
0.00000007, 0.00000007, 0.00000007, 0.00000007,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fmsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(1.);
let r =
_mm512_maskz_fmsub_round_ps(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fmsub_round_ps(
0b00000000_11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
-0.99999994, -0.99999994, -0.99999994, -0.99999994,
-0.99999994, -0.99999994, -0.99999994, -0.99999994,
0., 0., 0., 0.,
0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fmsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(1.);
let r =
_mm512_mask3_fmsub_round_ps(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, c);
let r = _mm512_mask3_fmsub_round_ps(
a,
b,
c,
0b00000000_11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
-0.99999994, -0.99999994, -0.99999994, -0.99999994,
-0.99999994, -0.99999994, -0.99999994, -0.99999994,
1., 1., 1., 1.,
1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fmaddsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_fmaddsub_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
#[rustfmt::skip]
let e = _mm512_setr_ps(
1.0000001, -0.99999994, 1.0000001, -0.99999994,
1.0000001, -0.99999994, 1.0000001, -0.99999994,
1.0000001, -0.99999994, 1.0000001, -0.99999994,
1.0000001, -0.99999994, 1.0000001, -0.99999994,
);
assert_eq_m512(r, e);
let r = _mm512_fmaddsub_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
-0.9999999, 1., -0.9999999, 1., -0.9999999,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fmaddsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_mask_fmaddsub_round_ps(
a,
0,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
assert_eq_m512(r, a);
let r = _mm512_mask_fmaddsub_round_ps(
a,
0b00000000_11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
1.0000001, -0.99999994, 1.0000001, -0.99999994,
1.0000001, -0.99999994, 1.0000001, -0.99999994,
0.00000007, 0.00000007, 0.00000007, 0.00000007,
0.00000007, 0.00000007, 0.00000007, 0.00000007,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fmaddsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_maskz_fmaddsub_round_ps(
0,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fmaddsub_round_ps(
0b00000000_11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
1.0000001, -0.99999994, 1.0000001, -0.99999994,
1.0000001, -0.99999994, 1.0000001, -0.99999994,
0., 0., 0., 0.,
0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fmaddsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_mask3_fmaddsub_round_ps(
a,
b,
c,
0,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
assert_eq_m512(r, c);
let r = _mm512_mask3_fmaddsub_round_ps(
a,
b,
c,
0b00000000_11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
1.0000001, -0.99999994, 1.0000001, -0.99999994,
1.0000001, -0.99999994, 1.0000001, -0.99999994,
-1., -1., -1., -1.,
-1., -1., -1., -1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fmsubadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_fmsubadd_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
#[rustfmt::skip]
let e = _mm512_setr_ps(
-0.99999994, 1.0000001, -0.99999994, 1.0000001,
-0.99999994, 1.0000001, -0.99999994, 1.0000001,
-0.99999994, 1.0000001, -0.99999994, 1.0000001,
-0.99999994, 1.0000001, -0.99999994, 1.0000001,
);
assert_eq_m512(r, e);
let r = _mm512_fmsubadd_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
-0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
-0.9999999, 1., -0.9999999, 1., -0.9999999, 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fmsubadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_mask_fmsubadd_round_ps(
a,
0,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
assert_eq_m512(r, a);
let r = _mm512_mask_fmsubadd_round_ps(
a,
0b00000000_11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
-0.99999994, 1.0000001, -0.99999994, 1.0000001,
-0.99999994, 1.0000001, -0.99999994, 1.0000001,
0.00000007, 0.00000007, 0.00000007, 0.00000007,
0.00000007, 0.00000007, 0.00000007, 0.00000007,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fmsubadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_maskz_fmsubadd_round_ps(
0,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fmsubadd_round_ps(
0b00000000_11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
-0.99999994, 1.0000001, -0.99999994, 1.0000001,
-0.99999994, 1.0000001, -0.99999994, 1.0000001,
0., 0., 0., 0.,
0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fmsubadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_mask3_fmsubadd_round_ps(
a,
b,
c,
0,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
assert_eq_m512(r, c);
let r = _mm512_mask3_fmsubadd_round_ps(
a,
b,
c,
0b00000000_11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
-0.99999994, 1.0000001, -0.99999994, 1.0000001,
-0.99999994, 1.0000001, -0.99999994, 1.0000001,
-1., -1., -1., -1.,
-1., -1., -1., -1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fnmadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(1.);
let r = _mm512_fnmadd_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(0.99999994);
assert_eq_m512(r, e);
let r = _mm512_fnmadd_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(0.9999999);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fnmadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(1.);
let r =
_mm512_mask_fnmadd_round_ps(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_fnmadd_round_ps(
a,
0b00000000_11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
0.00000007, 0.00000007,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fnmadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(1.);
let r =
_mm512_maskz_fnmadd_round_ps(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fnmadd_round_ps(
0b00000000_11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fnmadd_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(1.);
let r =
_mm512_mask3_fnmadd_round_ps(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, c);
let r = _mm512_mask3_fnmadd_round_ps(
a,
b,
c,
0b00000000_11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
0.99999994, 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fnmsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r = _mm512_fnmsub_round_ps(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(0.99999994);
assert_eq_m512(r, e);
let r = _mm512_fnmsub_round_ps(a, b, c, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(0.9999999);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fnmsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r =
_mm512_mask_fnmsub_round_ps(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_fnmsub_round_ps(
a,
0b00000000_11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
0.99999994, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007, 0.00000007,
0.00000007, 0.00000007,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fnmsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r =
_mm512_maskz_fnmsub_round_ps(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_fnmsub_round_ps(
0b00000000_11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
0.99999994, 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask3_fnmsub_round_ps() {
let a = _mm512_set1_ps(0.00000007);
let b = _mm512_set1_ps(1.);
let c = _mm512_set1_ps(-1.);
let r =
_mm512_mask3_fnmsub_round_ps(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, c);
let r = _mm512_mask3_fnmsub_round_ps(
a,
b,
c,
0b00000000_11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994, 0.99999994,
0.99999994, -1., -1., -1., -1., -1., -1., -1., -1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_max_round_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_max_round_ps(a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_max_round_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_mask_max_round_ps(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
assert_eq_m512(r, a);
let r = _mm512_mask_max_round_ps(a, 0b00000000_11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_max_round_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_maskz_max_round_ps(0, a, b, _MM_FROUND_CUR_DIRECTION);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_max_round_ps(0b00000000_11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_min_round_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_min_round_ps(a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 7., 6., 5., 4., 3., 2., 1., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_min_round_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_mask_min_round_ps(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
assert_eq_m512(r, a);
let r = _mm512_mask_min_round_ps(a, 0b00000000_11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_min_round_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
);
let r = _mm512_maskz_min_round_ps(0, a, b, _MM_FROUND_CUR_DIRECTION);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_min_round_ps(0b00000000_11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_getexp_round_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_getexp_round_ps(a, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_set1_ps(1.);
assert_eq_m512(r, e);
let r = _mm512_getexp_round_ps(a, _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(1.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_getexp_round_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_mask_getexp_round_ps(a, 0, a, _MM_FROUND_CUR_DIRECTION);
assert_eq_m512(r, a);
let r = _mm512_mask_getexp_round_ps(a, 0b11111111_00000000, a, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_setr_ps(
3., 3., 3., 3., 3., 3., 3., 3., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_getexp_round_ps() {
let a = _mm512_set1_ps(3.);
let r = _mm512_maskz_getexp_round_ps(0, a, _MM_FROUND_CUR_DIRECTION);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_getexp_round_ps(0b11111111_00000000, a, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_setr_ps(
0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_roundscale_round_ps() {
let a = _mm512_set1_ps(1.1);
let r = _mm512_roundscale_round_ps(a, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_set1_ps(1.0);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_roundscale_round_ps() {
let a = _mm512_set1_ps(1.1);
let r = _mm512_mask_roundscale_round_ps(a, 0, a, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_set1_ps(1.1);
assert_eq_m512(r, e);
let r =
_mm512_mask_roundscale_round_ps(a, 0b11111111_11111111, a, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_set1_ps(1.0);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_roundscale_round_ps() {
let a = _mm512_set1_ps(1.1);
let r = _mm512_maskz_roundscale_round_ps(0, a, 0, _MM_FROUND_CUR_DIRECTION);
assert_eq_m512(r, _mm512_setzero_ps());
let r =
_mm512_maskz_roundscale_round_ps(0b11111111_11111111, a, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_set1_ps(1.0);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_scalef_round_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(3.);
let r = _mm512_scalef_round_ps(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(8.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_scalef_round_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(3.);
let r =
_mm512_mask_scalef_round_ps(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, a);
let r = _mm512_mask_scalef_round_ps(
a,
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_set_ps(
8., 8., 8., 8., 8., 8., 8., 8., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_scalef_round_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(3.);
let r =
_mm512_maskz_scalef_round_ps(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_scalef_round_ps(
0b11111111_00000000,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_set_ps(
8., 8., 8., 8., 8., 8., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_fixupimm_round_ps() {
let a = _mm512_set1_ps(f32::NAN);
let b = _mm512_set1_ps(f32::MAX);
let c = _mm512_set1_epi32(i32::MAX);
let r = _mm512_fixupimm_round_ps(a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
let e = _mm512_set1_ps(0.0);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_fixupimm_round_ps() {
#[rustfmt::skip]
let a = _mm512_set_ps(
f32::NAN, f32::NAN, f32::NAN, f32::NAN,
f32::NAN, f32::NAN, f32::NAN, f32::NAN,
1., 1., 1., 1.,
1., 1., 1., 1.,
);
let b = _mm512_set1_ps(f32::MAX);
let c = _mm512_set1_epi32(i32::MAX);
let r = _mm512_mask_fixupimm_round_ps(
a,
0b11111111_00000000,
b,
c,
5,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_fixupimm_round_ps() {
#[rustfmt::skip]
let a = _mm512_set_ps(
f32::NAN, f32::NAN, f32::NAN, f32::NAN,
f32::NAN, f32::NAN, f32::NAN, f32::NAN,
1., 1., 1., 1.,
1., 1., 1., 1.,
);
let b = _mm512_set1_ps(f32::MAX);
let c = _mm512_set1_epi32(i32::MAX);
let r = _mm512_maskz_fixupimm_round_ps(
0b11111111_00000000,
a,
b,
c,
5,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_getmant_round_ps() {
let a = _mm512_set1_ps(10.);
let r = _mm512_getmant_round_ps(
a,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm512_set1_ps(1.25);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_getmant_round_ps() {
let a = _mm512_set1_ps(10.);
let r = _mm512_mask_getmant_round_ps(
a,
0,
a,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
assert_eq_m512(r, a);
let r = _mm512_mask_getmant_round_ps(
a,
0b11111111_00000000,
a,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm512_setr_ps(
10., 10., 10., 10., 10., 10., 10., 10., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_getmant_round_ps() {
let a = _mm512_set1_ps(10.);
let r = _mm512_maskz_getmant_round_ps(
0,
a,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_getmant_round_ps(
0b11111111_00000000,
a,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm512_setr_ps(
0., 0., 0., 0., 0., 0., 0., 0., 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25, 1.25,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtps_epi32() {
let a = _mm512_setr_ps(
0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_cvtps_epi32(a);
let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtps_epi32() {
let a = _mm512_setr_ps(
0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let src = _mm512_set1_epi32(0);
let r = _mm512_mask_cvtps_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvtps_epi32(src, 0b00000000_11111111, a);
let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtps_epi32() {
let a = _mm512_setr_ps(
0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_maskz_cvtps_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvtps_epi32(0b00000000_11111111, a);
let e = _mm512_setr_epi32(0, -1, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtps_epi32() {
let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
let src = _mm256_set1_epi32(0);
let r = _mm256_mask_cvtps_epi32(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm256_mask_cvtps_epi32(src, 0b11111111, a);
let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_cvtps_epi32() {
let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
let r = _mm256_maskz_cvtps_epi32(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_cvtps_epi32(0b11111111, a);
let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtps_epi32() {
let a = _mm_set_ps(12., 13.5, 14., 15.5);
let src = _mm_set1_epi32(0);
let r = _mm_mask_cvtps_epi32(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_cvtps_epi32(src, 0b00001111, a);
let e = _mm_set_epi32(12, 14, 14, 16);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_cvtps_epi32() {
let a = _mm_set_ps(12., 13.5, 14., 15.5);
let r = _mm_maskz_cvtps_epi32(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_cvtps_epi32(0b00001111, a);
let e = _mm_set_epi32(12, 14, 14, 16);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtps_epu32() {
let a = _mm512_setr_ps(
0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_cvtps_epu32(a);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtps_epu32() {
let a = _mm512_setr_ps(
0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let src = _mm512_set1_epi32(0);
let r = _mm512_mask_cvtps_epu32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvtps_epu32(src, 0b00000000_11111111, a);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtps_epu32() {
let a = _mm512_setr_ps(
0., -1.4, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_maskz_cvtps_epu32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvtps_epu32(0b00000000_11111111, a);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cvtps_epu32() {
let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
let r = _mm256_cvtps_epu32(a);
let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtps_epu32() {
let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
let src = _mm256_set1_epi32(0);
let r = _mm256_mask_cvtps_epu32(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm256_mask_cvtps_epu32(src, 0b11111111, a);
let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_cvtps_epu32() {
let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
let r = _mm256_maskz_cvtps_epu32(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_cvtps_epu32(0b11111111, a);
let e = _mm256_set_epi32(8, 10, 10, 12, 12, 14, 14, 16);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cvtps_epu32() {
let a = _mm_set_ps(12., 13.5, 14., 15.5);
let r = _mm_cvtps_epu32(a);
let e = _mm_set_epi32(12, 14, 14, 16);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtps_epu32() {
let a = _mm_set_ps(12., 13.5, 14., 15.5);
let src = _mm_set1_epi32(0);
let r = _mm_mask_cvtps_epu32(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_cvtps_epu32(src, 0b00001111, a);
let e = _mm_set_epi32(12, 14, 14, 16);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_cvtps_epu32() {
let a = _mm_set_ps(12., 13.5, 14., 15.5);
let r = _mm_maskz_cvtps_epu32(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_cvtps_epu32(0b00001111, a);
let e = _mm_set_epi32(12, 14, 14, 16);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepi8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_cvtepi8_epi32(a);
let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepi8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm512_set1_epi32(-1);
let r = _mm512_mask_cvtepi8_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvtepi8_epi32(src, 0b00000000_11111111, a);
let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtepi8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_cvtepi8_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvtepi8_epi32(0b00000000_11111111, a);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtepi8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm256_set1_epi32(-1);
let r = _mm256_mask_cvtepi8_epi32(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm256_mask_cvtepi8_epi32(src, 0b11111111, a);
let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_cvtepi8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm256_maskz_cvtepi8_epi32(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_cvtepi8_epi32(0b11111111, a);
let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtepi8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm_set1_epi32(-1);
let r = _mm_mask_cvtepi8_epi32(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_cvtepi8_epi32(src, 0b00001111, a);
let e = _mm_set_epi32(12, 13, 14, 15);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_cvtepi8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm_maskz_cvtepi8_epi32(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_cvtepi8_epi32(0b00001111, a);
let e = _mm_set_epi32(12, 13, 14, 15);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepu8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_cvtepu8_epi32(a);
let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepu8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm512_set1_epi32(-1);
let r = _mm512_mask_cvtepu8_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvtepu8_epi32(src, 0b00000000_11111111, a);
let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtepu8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_cvtepu8_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvtepu8_epi32(0b00000000_11111111, a);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtepu8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm256_set1_epi32(-1);
let r = _mm256_mask_cvtepu8_epi32(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm256_mask_cvtepu8_epi32(src, 0b11111111, a);
let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_cvtepu8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm256_maskz_cvtepu8_epi32(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_cvtepu8_epi32(0b11111111, a);
let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtepu8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm_set1_epi32(-1);
let r = _mm_mask_cvtepu8_epi32(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_cvtepu8_epi32(src, 0b00001111, a);
let e = _mm_set_epi32(12, 13, 14, 15);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_cvtepu8_epi32() {
let a = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm_maskz_cvtepu8_epi32(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_cvtepu8_epi32(0b00001111, a);
let e = _mm_set_epi32(12, 13, 14, 15);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepi16_epi32() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_cvtepi16_epi32(a);
let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepi16_epi32() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm512_set1_epi32(-1);
let r = _mm512_mask_cvtepi16_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvtepi16_epi32(src, 0b00000000_11111111, a);
let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtepi16_epi32() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_cvtepi16_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvtepi16_epi32(0b00000000_11111111, a);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtepi16_epi32() {
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
let src = _mm256_set1_epi32(-1);
let r = _mm256_mask_cvtepi16_epi32(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm256_mask_cvtepi16_epi32(src, 0b11111111, a);
let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_cvtepi16_epi32() {
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
let r = _mm256_maskz_cvtepi16_epi32(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_cvtepi16_epi32(0b11111111, a);
let e = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtepi16_epi32() {
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
let src = _mm_set1_epi32(-1);
let r = _mm_mask_cvtepi16_epi32(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_cvtepi16_epi32(src, 0b00001111, a);
let e = _mm_set_epi32(4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_cvtepi16_epi32() {
let a = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
let r = _mm_maskz_cvtepi16_epi32(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_cvtepi16_epi32(0b00001111, a);
let e = _mm_set_epi32(4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepu16_epi32() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_cvtepu16_epi32(a);
let e = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepu16_epi32() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm512_set1_epi32(-1);
let r = _mm512_mask_cvtepu16_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvtepu16_epi32(src, 0b00000000_11111111, a);
let e = _mm512_set_epi32(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtepu16_epi32() {
let a = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_cvtepu16_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvtepu16_epi32(0b00000000_11111111, a);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtepu16_epi32() {
let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm256_set1_epi32(-1);
let r = _mm256_mask_cvtepu16_epi32(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm256_mask_cvtepu16_epi32(src, 0b11111111, a);
let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_cvtepu16_epi32() {
let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm256_maskz_cvtepu16_epi32(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_cvtepu16_epi32(0b11111111, a);
let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtepu16_epi32() {
let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm_set1_epi32(-1);
let r = _mm_mask_cvtepu16_epi32(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_cvtepu16_epi32(src, 0b00001111, a);
let e = _mm_set_epi32(12, 13, 14, 15);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_cvtepu16_epi32() {
let a = _mm_set_epi16(8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm_maskz_cvtepu16_epi32(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_cvtepu16_epi32(0b00001111, a);
let e = _mm_set_epi32(12, 13, 14, 15);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepi32_ps() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_cvtepi32_ps(a);
let e = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepi32_ps() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm512_set1_ps(-1.);
let r = _mm512_mask_cvtepi32_ps(src, 0, a);
assert_eq_m512(r, src);
let r = _mm512_mask_cvtepi32_ps(src, 0b00000000_11111111, a);
let e = _mm512_set_ps(
-1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtepi32_ps() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_cvtepi32_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_cvtepi32_ps(0b00000000_11111111, a);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtepi32_ps() {
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let src = _mm256_set1_ps(-1.);
let r = _mm256_mask_cvtepi32_ps(src, 0, a);
assert_eq_m256(r, src);
let r = _mm256_mask_cvtepi32_ps(src, 0b11111111, a);
let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_cvtepi32_ps() {
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let r = _mm256_maskz_cvtepi32_ps(0, a);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_cvtepi32_ps(0b11111111, a);
let e = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtepi32_ps() {
let a = _mm_set_epi32(1, 2, 3, 4);
let src = _mm_set1_ps(-1.);
let r = _mm_mask_cvtepi32_ps(src, 0, a);
assert_eq_m128(r, src);
let r = _mm_mask_cvtepi32_ps(src, 0b00001111, a);
let e = _mm_set_ps(1., 2., 3., 4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_cvtepi32_ps() {
let a = _mm_set_epi32(1, 2, 3, 4);
let r = _mm_maskz_cvtepi32_ps(0, a);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_cvtepi32_ps(0b00001111, a);
let e = _mm_set_ps(1., 2., 3., 4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepu32_ps() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_cvtepu32_ps(a);
let e = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepu32_ps() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm512_set1_ps(-1.);
let r = _mm512_mask_cvtepu32_ps(src, 0, a);
assert_eq_m512(r, src);
let r = _mm512_mask_cvtepu32_ps(src, 0b00000000_11111111, a);
let e = _mm512_set_ps(
-1., -1., -1., -1., -1., -1., -1., -1., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtepu32_ps() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_cvtepu32_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_cvtepu32_ps(0b00000000_11111111, a);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepi32_epi16() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_cvtepi32_epi16(a);
let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepi32_epi16() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm256_set1_epi16(-1);
let r = _mm512_mask_cvtepi32_epi16(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm512_mask_cvtepi32_epi16(src, 0b00000000_11111111, a);
let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtepi32_epi16() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_cvtepi32_epi16(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm512_maskz_cvtepi32_epi16(0b00000000_11111111, a);
let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cvtepi32_epi16() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let r = _mm256_cvtepi32_epi16(a);
let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtepi32_epi16() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let src = _mm_set1_epi16(-1);
let r = _mm256_mask_cvtepi32_epi16(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm256_mask_cvtepi32_epi16(src, 0b11111111, a);
let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_cvtepi32_epi16() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let r = _mm256_maskz_cvtepi32_epi16(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm256_maskz_cvtepi32_epi16(0b11111111, a);
let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cvtepi32_epi16() {
let a = _mm_set_epi32(4, 5, 6, 7);
let r = _mm_cvtepi32_epi16(a);
let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtepi32_epi16() {
let a = _mm_set_epi32(4, 5, 6, 7);
let src = _mm_set1_epi16(0);
let r = _mm_mask_cvtepi32_epi16(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_cvtepi32_epi16(src, 0b00001111, a);
let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_cvtepi32_epi16() {
let a = _mm_set_epi32(4, 5, 6, 7);
let r = _mm_maskz_cvtepi32_epi16(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_cvtepi32_epi16(0b00001111, a);
let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtepi32_epi8() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_cvtepi32_epi8(a);
let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepi32_epi8() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let src = _mm_set1_epi8(-1);
let r = _mm512_mask_cvtepi32_epi8(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm512_mask_cvtepi32_epi8(src, 0b00000000_11111111, a);
let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtepi32_epi8() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_cvtepi32_epi8(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm512_maskz_cvtepi32_epi8(0b00000000_11111111, a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cvtepi32_epi8() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let r = _mm256_cvtepi32_epi8(a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtepi32_epi8() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let src = _mm_set1_epi8(0);
let r = _mm256_mask_cvtepi32_epi8(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm256_mask_cvtepi32_epi8(src, 0b11111111, a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_cvtepi32_epi8() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let r = _mm256_maskz_cvtepi32_epi8(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm256_maskz_cvtepi32_epi8(0b11111111, a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cvtepi32_epi8() {
let a = _mm_set_epi32(4, 5, 6, 7);
let r = _mm_cvtepi32_epi8(a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtepi32_epi8() {
let a = _mm_set_epi32(4, 5, 6, 7);
let src = _mm_set1_epi8(0);
let r = _mm_mask_cvtepi32_epi8(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_cvtepi32_epi8(src, 0b00001111, a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_cvtepi32_epi8() {
let a = _mm_set_epi32(4, 5, 6, 7);
let r = _mm_maskz_cvtepi32_epi8(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_cvtepi32_epi8(0b00001111, a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtsepi32_epi16() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, i32::MIN, i32::MAX,
);
let r = _mm512_cvtsepi32_epi16(a);
#[rustfmt::skip]
let e = _mm256_set_epi16(
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, i16::MIN, i16::MAX,
);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtsepi32_epi16() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, i32::MIN, i32::MAX,
);
let src = _mm256_set1_epi16(-1);
let r = _mm512_mask_cvtsepi32_epi16(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm512_mask_cvtsepi32_epi16(src, 0b00000000_11111111, a);
#[rustfmt::skip]
let e = _mm256_set_epi16(
-1, -1, -1, -1,
-1, -1, -1, -1,
8, 9, 10, 11,
12, 13, i16::MIN, i16::MAX,
);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtsepi32_epi16() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, i32::MIN, i32::MAX,
);
let r = _mm512_maskz_cvtsepi32_epi16(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm512_maskz_cvtsepi32_epi16(0b00000000_11111111, a);
#[rustfmt::skip]
let e = _mm256_set_epi16(
0, 0, 0, 0,
0, 0, 0, 0,
8, 9, 10, 11,
12, 13, i16::MIN, i16::MAX,
);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cvtsepi32_epi16() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let r = _mm256_cvtsepi32_epi16(a);
let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtsepi32_epi16() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let src = _mm_set1_epi16(-1);
let r = _mm256_mask_cvtsepi32_epi16(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm256_mask_cvtsepi32_epi16(src, 0b11111111, a);
let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_cvtsepi32_epi16() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let r = _mm256_maskz_cvtsepi32_epi16(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm256_maskz_cvtsepi32_epi16(0b11111111, a);
let e = _mm_set_epi16(0, 1, 2, 3, 4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cvtsepi32_epi16() {
let a = _mm_set_epi32(4, 5, 6, 7);
let r = _mm_cvtsepi32_epi16(a);
let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtsepi32_epi16() {
let a = _mm_set_epi32(4, 5, 6, 7);
let src = _mm_set1_epi16(0);
let r = _mm_mask_cvtsepi32_epi16(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_cvtsepi32_epi16(src, 0b11111111, a);
let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_cvtsepi32_epi16() {
let a = _mm_set_epi32(4, 5, 6, 7);
let r = _mm_maskz_cvtsepi32_epi16(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_cvtsepi32_epi16(0b11111111, a);
let e = _mm_set_epi16(0, 0, 0, 0, 4, 5, 6, 7);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtsepi32_epi8() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, i32::MIN, i32::MAX,
);
let r = _mm512_cvtsepi32_epi8(a);
#[rustfmt::skip]
let e = _mm_set_epi8(
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, i8::MIN, i8::MAX,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtsepi32_epi8() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, i32::MIN, i32::MAX,
);
let src = _mm_set1_epi8(-1);
let r = _mm512_mask_cvtsepi32_epi8(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm512_mask_cvtsepi32_epi8(src, 0b00000000_11111111, a);
#[rustfmt::skip]
let e = _mm_set_epi8(
-1, -1, -1, -1,
-1, -1, -1, -1,
8, 9, 10, 11,
12, 13, i8::MIN, i8::MAX,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtsepi32_epi8() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, i32::MIN, i32::MAX,
);
let r = _mm512_maskz_cvtsepi32_epi8(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm512_maskz_cvtsepi32_epi8(0b00000000_11111111, a);
#[rustfmt::skip]
let e = _mm_set_epi8(
0, 0, 0, 0,
0, 0, 0, 0,
8, 9, 10, 11,
12, 13, i8::MIN, i8::MAX,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cvtsepi32_epi8() {
let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
let r = _mm256_cvtsepi32_epi8(a);
#[rustfmt::skip]
let e = _mm_set_epi8(
0, 0, 0, 0,
0, 0, 0, 0,
9, 10, 11, 12,
13, 14, 15, 16,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtsepi32_epi8() {
let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
let src = _mm_set1_epi8(0);
let r = _mm256_mask_cvtsepi32_epi8(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm256_mask_cvtsepi32_epi8(src, 0b11111111, a);
#[rustfmt::skip]
let e = _mm_set_epi8(
0, 0, 0, 0,
0, 0, 0, 0,
9, 10, 11, 12,
13, 14, 15, 16,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_cvtsepi32_epi8() {
let a = _mm256_set_epi32(9, 10, 11, 12, 13, 14, 15, 16);
let r = _mm256_maskz_cvtsepi32_epi8(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm256_maskz_cvtsepi32_epi8(0b11111111, a);
#[rustfmt::skip]
let e = _mm_set_epi8(
0, 0, 0, 0,
0, 0, 0, 0,
9, 10, 11, 12,
13, 14, 15, 16,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cvtsepi32_epi8() {
let a = _mm_set_epi32(13, 14, 15, 16);
let r = _mm_cvtsepi32_epi8(a);
#[rustfmt::skip]
let e = _mm_set_epi8(
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
13, 14, 15, 16,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtsepi32_epi8() {
let a = _mm_set_epi32(13, 14, 15, 16);
let src = _mm_set1_epi8(0);
let r = _mm_mask_cvtsepi32_epi8(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_cvtsepi32_epi8(src, 0b00001111, a);
#[rustfmt::skip]
let e = _mm_set_epi8(
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
13, 14, 15, 16,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_cvtsepi32_epi8() {
let a = _mm_set_epi32(13, 14, 15, 16);
let r = _mm_maskz_cvtsepi32_epi8(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_cvtsepi32_epi8(0b00001111, a);
#[rustfmt::skip]
let e = _mm_set_epi8(
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
13, 14, 15, 16,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtusepi32_epi16() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, i32::MIN, i32::MIN,
);
let r = _mm512_cvtusepi32_epi16(a);
let e = _mm256_set_epi16(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtusepi32_epi16() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, i32::MIN, i32::MIN,
);
let src = _mm256_set1_epi16(-1);
let r = _mm512_mask_cvtusepi32_epi16(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm512_mask_cvtusepi32_epi16(src, 0b00000000_11111111, a);
let e = _mm256_set_epi16(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtusepi32_epi16() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, i32::MIN, i32::MIN,
);
let r = _mm512_maskz_cvtusepi32_epi16(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm512_maskz_cvtusepi32_epi16(0b00000000_11111111, a);
let e = _mm256_set_epi16(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cvtusepi32_epi16() {
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let r = _mm256_cvtusepi32_epi16(a);
let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtusepi32_epi16() {
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let src = _mm_set1_epi16(0);
let r = _mm256_mask_cvtusepi32_epi16(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm256_mask_cvtusepi32_epi16(src, 0b11111111, a);
let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_cvtusepi32_epi16() {
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let r = _mm256_maskz_cvtusepi32_epi16(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm256_maskz_cvtusepi32_epi16(0b11111111, a);
let e = _mm_set_epi16(1, 2, 3, 4, 5, 6, 7, 8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cvtusepi32_epi16() {
let a = _mm_set_epi32(5, 6, 7, 8);
let r = _mm_cvtusepi32_epi16(a);
let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtusepi32_epi16() {
let a = _mm_set_epi32(5, 6, 7, 8);
let src = _mm_set1_epi16(0);
let r = _mm_mask_cvtusepi32_epi16(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_cvtusepi32_epi16(src, 0b00001111, a);
let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_cvtusepi32_epi16() {
let a = _mm_set_epi32(5, 6, 7, 8);
let r = _mm_maskz_cvtusepi32_epi16(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_cvtusepi32_epi16(0b00001111, a);
let e = _mm_set_epi16(0, 0, 0, 0, 5, 6, 7, 8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtusepi32_epi8() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, i32::MIN, i32::MIN,
);
let r = _mm512_cvtusepi32_epi8(a);
let e = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -1, -1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtusepi32_epi8() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, i32::MIN, i32::MIN,
);
let src = _mm_set1_epi8(-1);
let r = _mm512_mask_cvtusepi32_epi8(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm512_mask_cvtusepi32_epi8(src, 0b00000000_11111111, a);
let e = _mm_set_epi8(-1, -1, -1, -1, -1, -1, -1, -1, 8, 9, 10, 11, 12, 13, -1, -1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtusepi32_epi8() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
0, 1, 2, 3,
4, 5, 6, 7,
8, 9, 10, 11,
12, 13, i32::MIN, i32::MIN,
);
let r = _mm512_maskz_cvtusepi32_epi8(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm512_maskz_cvtusepi32_epi8(0b00000000_11111111, a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 8, 9, 10, 11, 12, 13, -1, -1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cvtusepi32_epi8() {
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
let r = _mm256_cvtusepi32_epi8(a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtusepi32_epi8() {
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
let src = _mm_set1_epi8(0);
let r = _mm256_mask_cvtusepi32_epi8(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm256_mask_cvtusepi32_epi8(src, 0b11111111, a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_cvtusepi32_epi8() {
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, i32::MAX);
let r = _mm256_maskz_cvtusepi32_epi8(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm256_maskz_cvtusepi32_epi8(0b11111111, a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 1, 2, 3, 4, 5, 6, 7, u8::MAX as i8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cvtusepi32_epi8() {
let a = _mm_set_epi32(5, 6, 7, i32::MAX);
let r = _mm_cvtusepi32_epi8(a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtusepi32_epi8() {
let a = _mm_set_epi32(5, 6, 7, i32::MAX);
let src = _mm_set1_epi8(0);
let r = _mm_mask_cvtusepi32_epi8(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_cvtusepi32_epi8(src, 0b00001111, a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_cvtusepi32_epi8() {
let a = _mm_set_epi32(5, 6, 7, i32::MAX);
let r = _mm_maskz_cvtusepi32_epi8(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_cvtusepi32_epi8(0b00001111, a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 6, 7, u8::MAX as i8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvt_roundps_epi32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_cvt_roundps_epi32(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
assert_eq_m512i(r, e);
let r = _mm512_cvt_roundps_epi32(a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvt_roundps_epi32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let src = _mm512_set1_epi32(0);
let r =
_mm512_mask_cvt_roundps_epi32(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvt_roundps_epi32(
src,
0b00000000_11111111,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvt_roundps_epi32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_maskz_cvt_roundps_epi32(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvt_roundps_epi32(
0b00000000_11111111,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvt_roundps_epu32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_cvt_roundps_epu32(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 10, 10, 12, 12, 14, 14, 16);
assert_eq_m512i(r, e);
let r = _mm512_cvt_roundps_epu32(a, _MM_FROUND_TO_NEG_INF | _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvt_roundps_epu32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let src = _mm512_set1_epi32(0);
let r =
_mm512_mask_cvt_roundps_epu32(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvt_roundps_epu32(
src,
0b00000000_11111111,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvt_roundps_epu32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_maskz_cvt_roundps_epu32(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvt_roundps_epu32(
0b00000000_11111111,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvt_roundepi32_ps() {
let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
let r = _mm512_cvt_roundepi32_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
0., -2., 2., -4., 4., -6., 6., -8., 8., 10., 10., 12., 12., 14., 14., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvt_roundepi32_ps() {
let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
let src = _mm512_set1_ps(0.);
let r =
_mm512_mask_cvt_roundepi32_ps(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, src);
let r = _mm512_mask_cvt_roundepi32_ps(
src,
0b00000000_11111111,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvt_roundepi32_ps() {
let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
let r = _mm512_maskz_cvt_roundepi32_ps(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_cvt_roundepi32_ps(
0b00000000_11111111,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm512_setr_ps(
0., -2., 2., -4., 4., -6., 6., -8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvt_roundepu32_ps() {
let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
let r = _mm512_cvt_roundepu32_ps(a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 4294967300., 2., 4294967300.,
4., 4294967300., 6., 4294967300.,
8., 10., 10., 12.,
12., 14., 14., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvt_roundepu32_ps() {
let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
let src = _mm512_set1_ps(0.);
let r =
_mm512_mask_cvt_roundepu32_ps(src, 0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, src);
let r = _mm512_mask_cvt_roundepu32_ps(
src,
0b00000000_11111111,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 4294967300., 2., 4294967300.,
4., 4294967300., 6., 4294967300.,
0., 0., 0., 0.,
0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvt_roundepu32_ps() {
let a = _mm512_setr_epi32(0, -2, 2, -4, 4, -6, 6, -8, 8, 10, 10, 12, 12, 14, 14, 16);
let r = _mm512_maskz_cvt_roundepu32_ps(0, a, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_cvt_roundepu32_ps(
0b00000000_11111111,
a,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
#[rustfmt::skip]
let e = _mm512_setr_ps(
0., 4294967300., 2., 4294967300.,
4., 4294967300., 6., 4294967300.,
0., 0., 0., 0.,
0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvt_roundps_ph() {
let a = _mm512_set1_ps(1.);
let r = _mm512_cvt_roundps_ph(a, _MM_FROUND_NO_EXC);
let e = _mm256_setr_epi64x(
4323521613979991040,
4323521613979991040,
4323521613979991040,
4323521613979991040,
);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvt_roundps_ph() {
let a = _mm512_set1_ps(1.);
let src = _mm256_set1_epi16(0);
let r = _mm512_mask_cvt_roundps_ph(src, 0, a, _MM_FROUND_NO_EXC);
assert_eq_m256i(r, src);
let r = _mm512_mask_cvt_roundps_ph(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvt_roundps_ph() {
let a = _mm512_set1_ps(1.);
let r = _mm512_maskz_cvt_roundps_ph(0, a, _MM_FROUND_NO_EXC);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm512_maskz_cvt_roundps_ph(0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvt_roundps_ph() {
let a = _mm256_set1_ps(1.);
let src = _mm_set1_epi16(0);
let r = _mm256_mask_cvt_roundps_ph(src, 0, a, _MM_FROUND_NO_EXC);
assert_eq_m128i(r, src);
let r = _mm256_mask_cvt_roundps_ph(src, 0b11111111, a, _MM_FROUND_NO_EXC);
let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_cvt_roundps_ph() {
let a = _mm256_set1_ps(1.);
let r = _mm256_maskz_cvt_roundps_ph(0, a, _MM_FROUND_NO_EXC);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm256_maskz_cvt_roundps_ph(0b11111111, a, _MM_FROUND_NO_EXC);
let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvt_roundps_ph() {
let a = _mm_set1_ps(1.);
let src = _mm_set1_epi16(0);
let r = _mm_mask_cvt_roundps_ph(src, 0, a, _MM_FROUND_NO_EXC);
assert_eq_m128i(r, src);
let r = _mm_mask_cvt_roundps_ph(src, 0b00001111, a, _MM_FROUND_NO_EXC);
let e = _mm_setr_epi64x(4323521613979991040, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_cvt_roundps_ph() {
let a = _mm_set1_ps(1.);
let r = _mm_maskz_cvt_roundps_ph(0, a, _MM_FROUND_NO_EXC);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_cvt_roundps_ph(0b00001111, a, _MM_FROUND_NO_EXC);
let e = _mm_setr_epi64x(4323521613979991040, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtps_ph() {
let a = _mm512_set1_ps(1.);
let r = _mm512_cvtps_ph(a, _MM_FROUND_NO_EXC);
let e = _mm256_setr_epi64x(
4323521613979991040,
4323521613979991040,
4323521613979991040,
4323521613979991040,
);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtps_ph() {
let a = _mm512_set1_ps(1.);
let src = _mm256_set1_epi16(0);
let r = _mm512_mask_cvtps_ph(src, 0, a, _MM_FROUND_NO_EXC);
assert_eq_m256i(r, src);
let r = _mm512_mask_cvtps_ph(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtps_ph() {
let a = _mm512_set1_ps(1.);
let r = _mm512_maskz_cvtps_ph(0, a, _MM_FROUND_NO_EXC);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm512_maskz_cvtps_ph(0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm256_setr_epi64x(4323521613979991040, 4323521613979991040, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtps_ph() {
let a = _mm256_set1_ps(1.);
let src = _mm_set1_epi16(0);
let r = _mm256_mask_cvtps_ph(src, 0, a, _MM_FROUND_NO_EXC);
assert_eq_m128i(r, src);
let r = _mm256_mask_cvtps_ph(src, 0b11111111, a, _MM_FROUND_NO_EXC);
let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_cvtps_ph() {
let a = _mm256_set1_ps(1.);
let r = _mm256_maskz_cvtps_ph(0, a, _MM_FROUND_NO_EXC);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm256_maskz_cvtps_ph(0b11111111, a, _MM_FROUND_NO_EXC);
let e = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtps_ph() {
let a = _mm_set1_ps(1.);
let src = _mm_set1_epi16(0);
let r = _mm_mask_cvtps_ph(src, 0, a, _MM_FROUND_NO_EXC);
assert_eq_m128i(r, src);
let r = _mm_mask_cvtps_ph(src, 0b00001111, a, _MM_FROUND_NO_EXC);
let e = _mm_setr_epi64x(4323521613979991040, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_cvtps_ph() {
let a = _mm_set1_ps(1.);
let r = _mm_maskz_cvtps_ph(0, a, _MM_FROUND_NO_EXC);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_cvtps_ph(0b00001111, a, _MM_FROUND_NO_EXC);
let e = _mm_setr_epi64x(4323521613979991040, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvt_roundph_ps() {
let a = _mm256_setr_epi64x(
4323521613979991040,
4323521613979991040,
4323521613979991040,
4323521613979991040,
);
let r = _mm512_cvt_roundph_ps(a, _MM_FROUND_NO_EXC);
let e = _mm512_set1_ps(1.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvt_roundph_ps() {
let a = _mm256_setr_epi64x(
4323521613979991040,
4323521613979991040,
4323521613979991040,
4323521613979991040,
);
let src = _mm512_set1_ps(0.);
let r = _mm512_mask_cvt_roundph_ps(src, 0, a, _MM_FROUND_NO_EXC);
assert_eq_m512(r, src);
let r = _mm512_mask_cvt_roundph_ps(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvt_roundph_ps() {
let a = _mm256_setr_epi64x(
4323521613979991040,
4323521613979991040,
4323521613979991040,
4323521613979991040,
);
let r = _mm512_maskz_cvt_roundph_ps(0, a, _MM_FROUND_NO_EXC);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_cvt_roundph_ps(0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtph_ps() {
let a = _mm256_setr_epi64x(
4323521613979991040,
4323521613979991040,
4323521613979991040,
4323521613979991040,
);
let r = _mm512_cvtph_ps(a);
let e = _mm512_set1_ps(1.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtph_ps() {
let a = _mm256_setr_epi64x(
4323521613979991040,
4323521613979991040,
4323521613979991040,
4323521613979991040,
);
let src = _mm512_set1_ps(0.);
let r = _mm512_mask_cvtph_ps(src, 0, a);
assert_eq_m512(r, src);
let r = _mm512_mask_cvtph_ps(src, 0b00000000_11111111, a);
let e = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtph_ps() {
let a = _mm256_setr_epi64x(
4323521613979991040,
4323521613979991040,
4323521613979991040,
4323521613979991040,
);
let r = _mm512_maskz_cvtph_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_cvtph_ps(0b00000000_11111111, a);
let e = _mm512_setr_ps(
1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtph_ps() {
let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
let src = _mm256_set1_ps(0.);
let r = _mm256_mask_cvtph_ps(src, 0, a);
assert_eq_m256(r, src);
let r = _mm256_mask_cvtph_ps(src, 0b11111111, a);
let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_cvtph_ps() {
let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
let r = _mm256_maskz_cvtph_ps(0, a);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_cvtph_ps(0b11111111, a);
let e = _mm256_setr_ps(1., 1., 1., 1., 1., 1., 1., 1.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtph_ps() {
let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
let src = _mm_set1_ps(0.);
let r = _mm_mask_cvtph_ps(src, 0, a);
assert_eq_m128(r, src);
let r = _mm_mask_cvtph_ps(src, 0b00001111, a);
let e = _mm_setr_ps(1., 1., 1., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_cvtph_ps() {
let a = _mm_setr_epi64x(4323521613979991040, 4323521613979991040);
let r = _mm_maskz_cvtph_ps(0, a);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_cvtph_ps(0b00001111, a);
let e = _mm_setr_ps(1., 1., 1., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtt_roundps_epi32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_cvtt_roundps_epi32(a, _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtt_roundps_epi32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let src = _mm512_set1_epi32(0);
let r = _mm512_mask_cvtt_roundps_epi32(src, 0, a, _MM_FROUND_NO_EXC);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvtt_roundps_epi32(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtt_roundps_epi32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_maskz_cvtt_roundps_epi32(0, a, _MM_FROUND_NO_EXC);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvtt_roundps_epi32(0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtt_roundps_epu32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_cvtt_roundps_epu32(a, _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtt_roundps_epu32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let src = _mm512_set1_epi32(0);
let r = _mm512_mask_cvtt_roundps_epu32(src, 0, a, _MM_FROUND_NO_EXC);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvtt_roundps_epu32(src, 0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvtt_roundps_epu32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_maskz_cvtt_roundps_epu32(0, a, _MM_FROUND_NO_EXC);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvtt_roundps_epu32(0b00000000_11111111, a, _MM_FROUND_NO_EXC);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvttps_epi32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_cvttps_epi32(a);
let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvttps_epi32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let src = _mm512_set1_epi32(0);
let r = _mm512_mask_cvttps_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvttps_epi32(src, 0b00000000_11111111, a);
let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvttps_epi32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_maskz_cvttps_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvttps_epi32(0b00000000_11111111, a);
let e = _mm512_setr_epi32(0, -1, 2, -3, 4, -5, 6, -7, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvttps_epi32() {
let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
let src = _mm256_set1_epi32(0);
let r = _mm256_mask_cvttps_epi32(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm256_mask_cvttps_epi32(src, 0b11111111, a);
let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_cvttps_epi32() {
let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
let r = _mm256_maskz_cvttps_epi32(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_cvttps_epi32(0b11111111, a);
let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvttps_epi32() {
let a = _mm_set_ps(12., 13.5, 14., 15.5);
let src = _mm_set1_epi32(0);
let r = _mm_mask_cvttps_epi32(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_cvttps_epi32(src, 0b00001111, a);
let e = _mm_set_epi32(12, 13, 14, 15);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_cvttps_epi32() {
let a = _mm_set_ps(12., 13.5, 14., 15.5);
let r = _mm_maskz_cvttps_epi32(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_cvttps_epi32(0b00001111, a);
let e = _mm_set_epi32(12, 13, 14, 15);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvttps_epu32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_cvttps_epu32(a);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvttps_epu32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let src = _mm512_set1_epi32(0);
let r = _mm512_mask_cvttps_epu32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_cvttps_epu32(src, 0b00000000_11111111, a);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_cvttps_epu32() {
let a = _mm512_setr_ps(
0., -1.5, 2., -3.5, 4., -5.5, 6., -7.5, 8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5,
);
let r = _mm512_maskz_cvttps_epu32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_cvttps_epu32(0b00000000_11111111, a);
let e = _mm512_setr_epi32(0, -1, 2, -1, 4, -1, 6, -1, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cvttps_epu32() {
let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
let r = _mm256_cvttps_epu32(a);
let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvttps_epu32() {
let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
let src = _mm256_set1_epi32(0);
let r = _mm256_mask_cvttps_epu32(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm256_mask_cvttps_epu32(src, 0b11111111, a);
let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_cvttps_epu32() {
let a = _mm256_set_ps(8., 9.5, 10., 11.5, 12., 13.5, 14., 15.5);
let r = _mm256_maskz_cvttps_epu32(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_cvttps_epu32(0b11111111, a);
let e = _mm256_set_epi32(8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cvttps_epu32() {
let a = _mm_set_ps(12., 13.5, 14., 15.5);
let r = _mm_cvttps_epu32(a);
let e = _mm_set_epi32(12, 13, 14, 15);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvttps_epu32() {
let a = _mm_set_ps(12., 13.5, 14., 15.5);
let src = _mm_set1_epi32(0);
let r = _mm_mask_cvttps_epu32(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_cvttps_epu32(src, 0b00001111, a);
let e = _mm_set_epi32(12, 13, 14, 15);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_cvttps_epu32() {
let a = _mm_set_ps(12., 13.5, 14., 15.5);
let r = _mm_maskz_cvttps_epu32(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_cvttps_epu32(0b00001111, a);
let e = _mm_set_epi32(12, 13, 14, 15);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_i32gather_ps() {
let mut arr = [0f32; 256];
for i in 0..256 {
arr[i] = i as f32;
}
#[rustfmt::skip]
let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
120, 128, 136, 144, 152, 160, 168, 176);
let r = _mm512_i32gather_ps(index, arr.as_ptr() as *const u8, 4);
#[rustfmt::skip]
assert_eq_m512(r, _mm512_setr_ps(0., 16., 32., 48., 64., 80., 96., 112.,
120., 128., 136., 144., 152., 160., 168., 176.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_i32gather_ps() {
let mut arr = [0f32; 256];
for i in 0..256 {
arr[i] = i as f32;
}
let src = _mm512_set1_ps(2.);
let mask = 0b10101010_10101010;
#[rustfmt::skip]
let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
120, 128, 136, 144, 152, 160, 168, 176);
let r = _mm512_mask_i32gather_ps(src, mask, index, arr.as_ptr() as *const u8, 4);
#[rustfmt::skip]
assert_eq_m512(r, _mm512_setr_ps(2., 16., 2., 48., 2., 80., 2., 112.,
2., 128., 2., 144., 2., 160., 2., 176.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_i32gather_epi32() {
let mut arr = [0i32; 256];
for i in 0..256 {
arr[i] = i as i32;
}
#[rustfmt::skip]
let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
120, 128, 136, 144, 152, 160, 168, 176);
let r = _mm512_i32gather_epi32(index, arr.as_ptr() as *const u8, 4);
#[rustfmt::skip]
assert_eq_m512i(r, _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
120, 128, 136, 144, 152, 160, 168, 176));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_i32gather_epi32() {
let mut arr = [0i32; 256];
for i in 0..256 {
arr[i] = i as i32;
}
let src = _mm512_set1_epi32(2);
let mask = 0b10101010_10101010;
let index = _mm512_setr_epi32(
0, 16, 32, 48, 64, 80, 96, 112, 128, 144, 160, 176, 192, 208, 224, 240,
);
let r = _mm512_mask_i32gather_epi32(src, mask, index, arr.as_ptr() as *const u8, 4);
assert_eq_m512i(
r,
_mm512_setr_epi32(2, 16, 2, 48, 2, 80, 2, 112, 2, 144, 2, 176, 2, 208, 2, 240),
);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_i32scatter_ps() {
let mut arr = [0f32; 256];
#[rustfmt::skip]
let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
128, 144, 160, 176, 192, 208, 224, 240);
let src = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
_mm512_i32scatter_ps(arr.as_mut_ptr() as *mut u8, index, src, 4);
let mut expected = [0f32; 256];
for i in 0..16 {
expected[i * 16] = (i + 1) as f32;
}
assert_eq!(&arr[..], &expected[..],);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_i32scatter_ps() {
let mut arr = [0f32; 256];
let mask = 0b10101010_10101010;
#[rustfmt::skip]
let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
128, 144, 160, 176, 192, 208, 224, 240);
let src = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
_mm512_mask_i32scatter_ps(arr.as_mut_ptr() as *mut u8, mask, index, src, 4);
let mut expected = [0f32; 256];
for i in 0..8 {
expected[i * 32 + 16] = 2. * (i + 1) as f32;
}
assert_eq!(&arr[..], &expected[..],);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_i32scatter_epi32() {
let mut arr = [0i32; 256];
#[rustfmt::skip]
let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
128, 144, 160, 176, 192, 208, 224, 240);
let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
_mm512_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, index, src, 4);
let mut expected = [0i32; 256];
for i in 0..16 {
expected[i * 16] = (i + 1) as i32;
}
assert_eq!(&arr[..], &expected[..],);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_i32scatter_epi32() {
let mut arr = [0i32; 256];
let mask = 0b10101010_10101010;
#[rustfmt::skip]
let index = _mm512_setr_epi32(0, 16, 32, 48, 64, 80, 96, 112,
128, 144, 160, 176, 192, 208, 224, 240);
let src = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
_mm512_mask_i32scatter_epi32(arr.as_mut_ptr() as *mut u8, mask, index, src, 4);
let mut expected = [0i32; 256];
for i in 0..8 {
expected[i * 32 + 16] = 2 * (i + 1) as i32;
}
assert_eq!(&arr[..], &expected[..],);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmplt_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let m = _mm512_cmplt_ps_mask(a, b);
assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmplt_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let mask = 0b01100110_01100110;
let r = _mm512_mask_cmplt_ps_mask(mask, a, b);
assert_eq!(r, 0b00000100_00000100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpnlt_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
assert_eq!(_mm512_cmpnlt_ps_mask(a, b), !_mm512_cmplt_ps_mask(a, b));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpnlt_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let mask = 0b01111010_01111010;
assert_eq!(_mm512_mask_cmpnlt_ps_mask(mask, a, b), 0b01111010_01111010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpnle_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let m = _mm512_cmpnle_ps_mask(b, a);
assert_eq!(m, 0b00001101_00001101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpnle_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let mask = 0b01100110_01100110;
let r = _mm512_mask_cmpnle_ps_mask(mask, b, a);
assert_eq!(r, 0b00000100_00000100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmple_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
assert_eq!(_mm512_cmple_ps_mask(a, b), 0b00100101_00100101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmple_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.,
0., 1., -1., f32::MAX, f32::NAN, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let mask = 0b01111010_01111010;
assert_eq!(_mm512_mask_cmple_ps_mask(mask, a, b), 0b00100000_00100000);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpeq_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
#[rustfmt::skip]
let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
let m = _mm512_cmpeq_ps_mask(b, a);
assert_eq!(m, 0b11001101_11001101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpeq_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
#[rustfmt::skip]
let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmpeq_ps_mask(mask, b, a);
assert_eq!(r, 0b01001000_01001000);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpneq_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
#[rustfmt::skip]
let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
let m = _mm512_cmpneq_ps_mask(b, a);
assert_eq!(m, 0b00110010_00110010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpneq_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, f32::NAN, -100.);
#[rustfmt::skip]
let b = _mm512_set_ps(0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.,
0., 1., 13., 42., f32::MAX, f32::MIN, f32::NAN, -100.);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmpneq_ps_mask(mask, b, a);
assert_eq!(r, 0b00110010_00110010)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmp_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let m = _mm512_cmp_ps_mask(a, b, _CMP_LT_OQ);
assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmp_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let mask = 0b01100110_01100110;
let r = _mm512_mask_cmp_ps_mask(mask, a, b, _CMP_LT_OQ);
assert_eq!(r, 0b00000100_00000100);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cmp_ps_mask() {
let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
let b = _mm256_set1_ps(-1.);
let m = _mm256_cmp_ps_mask(a, b, _CMP_LT_OQ);
assert_eq!(m, 0b00000101);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cmp_ps_mask() {
let a = _mm256_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
let b = _mm256_set1_ps(-1.);
let mask = 0b01100110;
let r = _mm256_mask_cmp_ps_mask(mask, a, b, _CMP_LT_OQ);
assert_eq!(r, 0b00000100);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cmp_ps_mask() {
let a = _mm_set_ps(0., 1., -1., 13.);
let b = _mm_set1_ps(1.);
let m = _mm_cmp_ps_mask(a, b, _CMP_LT_OQ);
assert_eq!(m, 0b00001010);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cmp_ps_mask() {
let a = _mm_set_ps(0., 1., -1., 13.);
let b = _mm_set1_ps(1.);
let mask = 0b11111111;
let r = _mm_mask_cmp_ps_mask(mask, a, b, _CMP_LT_OQ);
assert_eq!(r, 0b00001010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmp_round_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let m = _mm512_cmp_round_ps_mask(a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmp_round_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.,
0., 1., -1., 13., f32::MAX, f32::MIN, 100., -100.);
let b = _mm512_set1_ps(-1.);
let mask = 0b01100110_01100110;
let r = _mm512_mask_cmp_round_ps_mask(mask, a, b, _CMP_LT_OQ, _MM_FROUND_CUR_DIRECTION);
assert_eq!(r, 0b00000100_00000100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpord_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
#[rustfmt::skip]
let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
let m = _mm512_cmpord_ps_mask(a, b);
assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpord_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
#[rustfmt::skip]
let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
let mask = 0b11000011_11000011;
let m = _mm512_mask_cmpord_ps_mask(mask, a, b);
assert_eq!(m, 0b00000001_00000001);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpunord_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
#[rustfmt::skip]
let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
let m = _mm512_cmpunord_ps_mask(a, b);
assert_eq!(m, 0b11111010_11111010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpunord_ps_mask() {
#[rustfmt::skip]
let a = _mm512_set_ps(f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, -1., f32::NAN, 0.,
f32::NAN, f32::MAX, f32::NAN, f32::MIN, f32::NAN, 1., f32::NAN, 2.);
#[rustfmt::skip]
let b = _mm512_set_ps(f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 0.,
f32::NAN, f32::NAN, f32::NAN, f32::NAN, f32::MIN, f32::MAX, -1., 2.);
let mask = 0b00001111_00001111;
let m = _mm512_mask_cmpunord_ps_mask(mask, a, b);
assert_eq!(m, 0b000001010_00001010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cmp_ss_mask() {
let a = _mm_setr_ps(2., 1., 1., 1.);
let b = _mm_setr_ps(1., 2., 2., 2.);
let m = _mm_cmp_ss_mask(a, b, _CMP_GE_OS);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cmp_ss_mask() {
let a = _mm_setr_ps(2., 1., 1., 1.);
let b = _mm_setr_ps(1., 2., 2., 2.);
let m = _mm_mask_cmp_ss_mask(0b10, a, b, _CMP_GE_OS);
assert_eq!(m, 0);
let m = _mm_mask_cmp_ss_mask(0b1, a, b, _CMP_GE_OS);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cmp_round_ss_mask() {
let a = _mm_setr_ps(2., 1., 1., 1.);
let b = _mm_setr_ps(1., 2., 2., 2.);
let m = _mm_cmp_round_ss_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cmp_round_ss_mask() {
let a = _mm_setr_ps(2., 1., 1., 1.);
let b = _mm_setr_ps(1., 2., 2., 2.);
let m = _mm_mask_cmp_round_ss_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 0);
let m = _mm_mask_cmp_round_ss_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cmp_sd_mask() {
let a = _mm_setr_pd(2., 1.);
let b = _mm_setr_pd(1., 2.);
let m = _mm_cmp_sd_mask(a, b, _CMP_GE_OS);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cmp_sd_mask() {
let a = _mm_setr_pd(2., 1.);
let b = _mm_setr_pd(1., 2.);
let m = _mm_mask_cmp_sd_mask(0b10, a, b, _CMP_GE_OS);
assert_eq!(m, 0);
let m = _mm_mask_cmp_sd_mask(0b1, a, b, _CMP_GE_OS);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cmp_round_sd_mask() {
let a = _mm_setr_pd(2., 1.);
let b = _mm_setr_pd(1., 2.);
let m = _mm_cmp_round_sd_mask(a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cmp_round_sd_mask() {
let a = _mm_setr_pd(2., 1.);
let b = _mm_setr_pd(1., 2.);
let m = _mm_mask_cmp_round_sd_mask(0b10, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 0);
let m = _mm_mask_cmp_round_sd_mask(0b1, a, b, _CMP_GE_OS, _MM_FROUND_CUR_DIRECTION);
assert_eq!(m, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmplt_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let m = _mm512_cmplt_epu32_mask(a, b);
assert_eq!(m, 0b11001111_11001111);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmplt_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmplt_epu32_mask(mask, a, b);
assert_eq!(r, 0b01001010_01001010);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cmplt_epu32_mask() {
let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
let b = _mm256_set1_epi32(1);
let r = _mm256_cmplt_epu32_mask(a, b);
assert_eq!(r, 0b10000000);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cmplt_epu32_mask() {
let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 101, 100, 99);
let b = _mm256_set1_epi32(1);
let mask = 0b11111111;
let r = _mm256_mask_cmplt_epu32_mask(mask, a, b);
assert_eq!(r, 0b10000000);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cmplt_epu32_mask() {
let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
let b = _mm_set1_epi32(1);
let r = _mm_cmplt_epu32_mask(a, b);
assert_eq!(r, 0b00001000);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cmplt_epu32_mask() {
let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
let b = _mm_set1_epi32(1);
let mask = 0b11111111;
let r = _mm_mask_cmplt_epu32_mask(mask, a, b);
assert_eq!(r, 0b00001000);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpgt_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let m = _mm512_cmpgt_epu32_mask(b, a);
assert_eq!(m, 0b11001111_11001111);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpgt_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmpgt_epu32_mask(mask, b, a);
assert_eq!(r, 0b01001010_01001010);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cmpgt_epu32_mask() {
let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
let b = _mm256_set1_epi32(1);
let r = _mm256_cmpgt_epu32_mask(a, b);
assert_eq!(r, 0b00111111);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cmpgt_epu32_mask() {
let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 99, 100, 101);
let b = _mm256_set1_epi32(1);
let mask = 0b11111111;
let r = _mm256_mask_cmpgt_epu32_mask(mask, a, b);
assert_eq!(r, 0b00111111);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cmpgt_epu32_mask() {
let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
let b = _mm_set1_epi32(1);
let r = _mm_cmpgt_epu32_mask(a, b);
assert_eq!(r, 0b00000011);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cmpgt_epu32_mask() {
let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
let b = _mm_set1_epi32(1);
let mask = 0b11111111;
let r = _mm_mask_cmpgt_epu32_mask(mask, a, b);
assert_eq!(r, 0b00000011);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmple_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
assert_eq!(
_mm512_cmple_epu32_mask(a, b),
!_mm512_cmpgt_epu32_mask(a, b)
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmple_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01111010_01111010;
assert_eq!(
_mm512_mask_cmple_epu32_mask(mask, a, b),
0b01111010_01111010
);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cmple_epu32_mask() {
let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
let b = _mm256_set1_epi32(1);
let r = _mm256_cmple_epu32_mask(a, b);
assert_eq!(r, 0b11000000)
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cmple_epu32_mask() {
let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 200, 100, 101);
let b = _mm256_set1_epi32(1);
let mask = 0b11111111;
let r = _mm256_mask_cmple_epu32_mask(mask, a, b);
assert_eq!(r, 0b11000000)
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cmple_epu32_mask() {
let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
let b = _mm_set1_epi32(1);
let r = _mm_cmple_epu32_mask(a, b);
assert_eq!(r, 0b00001100)
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cmple_epu32_mask() {
let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
let b = _mm_set1_epi32(1);
let mask = 0b11111111;
let r = _mm_mask_cmple_epu32_mask(mask, a, b);
assert_eq!(r, 0b00001100)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpge_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
assert_eq!(
_mm512_cmpge_epu32_mask(a, b),
!_mm512_cmplt_epu32_mask(a, b)
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpge_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01111010_01111010;
assert_eq!(_mm512_mask_cmpge_epu32_mask(mask, a, b), 0b01100000_0110000);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cmpge_epu32_mask() {
let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
let b = _mm256_set1_epi32(1);
let r = _mm256_cmpge_epu32_mask(a, b);
assert_eq!(r, 0b01111111)
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cmpge_epu32_mask() {
let a = _mm256_set_epi32(0, 1, 2, u32::MAX as i32, i32::MAX, 300, 100, 200);
let b = _mm256_set1_epi32(1);
let mask = 0b11111111;
let r = _mm256_mask_cmpge_epu32_mask(mask, a, b);
assert_eq!(r, 0b01111111)
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cmpge_epu32_mask() {
let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
let b = _mm_set1_epi32(1);
let r = _mm_cmpge_epu32_mask(a, b);
assert_eq!(r, 0b00000111)
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cmpge_epu32_mask() {
let a = _mm_set_epi32(0, 1, 2, u32::MAX as i32);
let b = _mm_set1_epi32(1);
let mask = 0b11111111;
let r = _mm_mask_cmpge_epu32_mask(mask, a, b);
assert_eq!(r, 0b00000111)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpeq_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
#[rustfmt::skip]
let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let m = _mm512_cmpeq_epu32_mask(b, a);
assert_eq!(m, 0b11001111_11001111);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpeq_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
#[rustfmt::skip]
let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmpeq_epu32_mask(mask, b, a);
assert_eq!(r, 0b01001010_01001010);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cmpeq_epu32_mask() {
let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let m = _mm256_cmpeq_epu32_mask(b, a);
assert_eq!(m, 0b11001111);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cmpeq_epu32_mask() {
let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let mask = 0b01111010;
let r = _mm256_mask_cmpeq_epu32_mask(mask, b, a);
assert_eq!(r, 0b01001010);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cmpeq_epu32_mask() {
let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
let b = _mm_set_epi32(0, 1, 13, 42);
let m = _mm_cmpeq_epu32_mask(b, a);
assert_eq!(m, 0b00001100);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cmpeq_epu32_mask() {
let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
let b = _mm_set_epi32(0, 1, 13, 42);
let mask = 0b11111111;
let r = _mm_mask_cmpeq_epu32_mask(mask, b, a);
assert_eq!(r, 0b00001100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpneq_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
#[rustfmt::skip]
let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let m = _mm512_cmpneq_epu32_mask(b, a);
assert_eq!(m, !_mm512_cmpeq_epu32_mask(b, a));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpneq_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
#[rustfmt::skip]
let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmpneq_epu32_mask(mask, b, a);
assert_eq!(r, 0b00110010_00110010);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cmpneq_epu32_mask() {
let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
let r = _mm256_cmpneq_epu32_mask(b, a);
assert_eq!(r, 0b00110000);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cmpneq_epu32_mask() {
let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, -100, 100);
let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, -100, 100);
let mask = 0b11111111;
let r = _mm256_mask_cmpneq_epu32_mask(mask, b, a);
assert_eq!(r, 0b00110000);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cmpneq_epu32_mask() {
let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
let b = _mm_set_epi32(0, 1, 13, 42);
let r = _mm_cmpneq_epu32_mask(b, a);
assert_eq!(r, 0b00000011);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cmpneq_epu32_mask() {
let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
let b = _mm_set_epi32(0, 1, 13, 42);
let mask = 0b11111111;
let r = _mm_mask_cmpneq_epu32_mask(mask, b, a);
assert_eq!(r, 0b00000011);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmp_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let m = _mm512_cmp_epu32_mask(a, b, _MM_CMPINT_LT);
assert_eq!(m, 0b11001111_11001111);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmp_epu32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmp_epu32_mask(mask, a, b, _MM_CMPINT_LT);
assert_eq!(r, 0b01001010_01001010);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cmp_epu32_mask() {
let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm256_set1_epi32(-1);
let m = _mm256_cmp_epu32_mask(a, b, _MM_CMPINT_LT);
assert_eq!(m, 0b11001111);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cmp_epu32_mask() {
let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm256_set1_epi32(-1);
let mask = 0b11111111;
let r = _mm256_mask_cmp_epu32_mask(mask, a, b, _MM_CMPINT_LT);
assert_eq!(r, 0b11001111);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cmp_epu32_mask() {
let a = _mm_set_epi32(0, 1, -1, i32::MAX);
let b = _mm_set1_epi32(1);
let m = _mm_cmp_epu32_mask(a, b, _MM_CMPINT_LT);
assert_eq!(m, 0b00001000);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cmp_epu32_mask() {
let a = _mm_set_epi32(0, 1, -1, i32::MAX);
let b = _mm_set1_epi32(1);
let mask = 0b11111111;
let r = _mm_mask_cmp_epu32_mask(mask, a, b, _MM_CMPINT_LT);
assert_eq!(r, 0b00001000);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmplt_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let m = _mm512_cmplt_epi32_mask(a, b);
assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmplt_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01100110_01100110;
let r = _mm512_mask_cmplt_epi32_mask(mask, a, b);
assert_eq!(r, 0b00000100_00000100);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cmplt_epi32_mask() {
let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
let b = _mm256_set1_epi32(-1);
let r = _mm256_cmplt_epi32_mask(a, b);
assert_eq!(r, 0b00000101);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cmplt_epi32_mask() {
let a = _mm256_set_epi32(0, 1, -1, 101, i32::MAX, i32::MIN, 100, -100);
let b = _mm256_set1_epi32(-1);
let mask = 0b11111111;
let r = _mm256_mask_cmplt_epi32_mask(mask, a, b);
assert_eq!(r, 0b00000101);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cmplt_epi32_mask() {
let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
let b = _mm_set1_epi32(-1);
let r = _mm_cmplt_epi32_mask(a, b);
assert_eq!(r, 0b00000101);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cmplt_epi32_mask() {
let a = _mm_set_epi32(i32::MAX, i32::MIN, 100, -100);
let b = _mm_set1_epi32(-1);
let mask = 0b11111111;
let r = _mm_mask_cmplt_epi32_mask(mask, a, b);
assert_eq!(r, 0b00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpgt_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let m = _mm512_cmpgt_epi32_mask(b, a);
assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpgt_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01100110_01100110;
let r = _mm512_mask_cmpgt_epi32_mask(mask, b, a);
assert_eq!(r, 0b00000100_00000100);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cmpgt_epi32_mask() {
let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
let b = _mm256_set1_epi32(-1);
let r = _mm256_cmpgt_epi32_mask(a, b);
assert_eq!(r, 0b11011010);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cmpgt_epi32_mask() {
let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
let b = _mm256_set1_epi32(-1);
let mask = 0b11111111;
let r = _mm256_mask_cmpgt_epi32_mask(mask, a, b);
assert_eq!(r, 0b11011010);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cmpgt_epi32_mask() {
let a = _mm_set_epi32(0, 1, -1, 13);
let b = _mm_set1_epi32(-1);
let r = _mm_cmpgt_epi32_mask(a, b);
assert_eq!(r, 0b00001101);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cmpgt_epi32_mask() {
let a = _mm_set_epi32(0, 1, -1, 13);
let b = _mm_set1_epi32(-1);
let mask = 0b11111111;
let r = _mm_mask_cmpgt_epi32_mask(mask, a, b);
assert_eq!(r, 0b00001101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmple_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
assert_eq!(
_mm512_cmple_epi32_mask(a, b),
!_mm512_cmpgt_epi32_mask(a, b)
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmple_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01111010_01111010;
assert_eq!(_mm512_mask_cmple_epi32_mask(mask, a, b), 0b01100000_0110000);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cmple_epi32_mask() {
let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
let b = _mm256_set1_epi32(-1);
let r = _mm256_cmple_epi32_mask(a, b);
assert_eq!(r, 0b00100101)
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cmple_epi32_mask() {
let a = _mm256_set_epi32(0, 1, -1, 200, i32::MAX, i32::MIN, 100, -100);
let b = _mm256_set1_epi32(-1);
let mask = 0b11111111;
let r = _mm256_mask_cmple_epi32_mask(mask, a, b);
assert_eq!(r, 0b00100101)
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cmple_epi32_mask() {
let a = _mm_set_epi32(0, 1, -1, 200);
let b = _mm_set1_epi32(-1);
let r = _mm_cmple_epi32_mask(a, b);
assert_eq!(r, 0b00000010)
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cmple_epi32_mask() {
let a = _mm_set_epi32(0, 1, -1, 200);
let b = _mm_set1_epi32(-1);
let mask = 0b11111111;
let r = _mm_mask_cmple_epi32_mask(mask, a, b);
assert_eq!(r, 0b00000010)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpge_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
assert_eq!(
_mm512_cmpge_epi32_mask(a, b),
!_mm512_cmplt_epi32_mask(a, b)
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpge_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01111010_01111010;
assert_eq!(
_mm512_mask_cmpge_epi32_mask(mask, a, b),
0b01111010_01111010
);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cmpge_epi32_mask() {
let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm256_set1_epi32(-1);
let r = _mm256_cmpge_epi32_mask(a, b);
assert_eq!(r, 0b11111010)
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cmpge_epi32_mask() {
let a = _mm256_set_epi32(0, 1, -1, u32::MAX as i32, i32::MAX, i32::MIN, 100, -100);
let b = _mm256_set1_epi32(-1);
let mask = 0b11111111;
let r = _mm256_mask_cmpge_epi32_mask(mask, a, b);
assert_eq!(r, 0b11111010)
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cmpge_epi32_mask() {
let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
let b = _mm_set1_epi32(-1);
let r = _mm_cmpge_epi32_mask(a, b);
assert_eq!(r, 0b00001111)
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cmpge_epi32_mask() {
let a = _mm_set_epi32(0, 1, -1, u32::MAX as i32);
let b = _mm_set1_epi32(-1);
let mask = 0b11111111;
let r = _mm_mask_cmpge_epi32_mask(mask, a, b);
assert_eq!(r, 0b00001111)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpeq_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
#[rustfmt::skip]
let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let m = _mm512_cmpeq_epi32_mask(b, a);
assert_eq!(m, 0b11001111_11001111);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpeq_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
#[rustfmt::skip]
let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmpeq_epi32_mask(mask, b, a);
assert_eq!(r, 0b01001010_01001010);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cmpeq_epi32_mask() {
let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let m = _mm256_cmpeq_epi32_mask(b, a);
assert_eq!(m, 0b11001111);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cmpeq_epi32_mask() {
let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let mask = 0b01111010;
let r = _mm256_mask_cmpeq_epi32_mask(mask, b, a);
assert_eq!(r, 0b01001010);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cmpeq_epi32_mask() {
let a = _mm_set_epi32(0, 1, -1, 13);
let b = _mm_set_epi32(0, 1, 13, 42);
let m = _mm_cmpeq_epi32_mask(b, a);
assert_eq!(m, 0b00001100);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cmpeq_epi32_mask() {
let a = _mm_set_epi32(0, 1, -1, 13);
let b = _mm_set_epi32(0, 1, 13, 42);
let mask = 0b11111111;
let r = _mm_mask_cmpeq_epi32_mask(mask, b, a);
assert_eq!(r, 0b00001100);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmpneq_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
#[rustfmt::skip]
let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let m = _mm512_cmpneq_epi32_mask(b, a);
assert_eq!(m, !_mm512_cmpeq_epi32_mask(b, a));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmpneq_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100,
0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
#[rustfmt::skip]
let b = _mm512_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100,
0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let mask = 0b01111010_01111010;
let r = _mm512_mask_cmpneq_epi32_mask(mask, b, a);
assert_eq!(r, 0b00110010_00110010)
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cmpneq_epi32_mask() {
let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let m = _mm256_cmpneq_epi32_mask(b, a);
assert_eq!(m, !_mm256_cmpeq_epi32_mask(b, a));
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cmpneq_epi32_mask() {
let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, -100, 100);
let b = _mm256_set_epi32(0, 1, 13, 42, i32::MAX, i32::MIN, 100, -100);
let mask = 0b11111111;
let r = _mm256_mask_cmpneq_epi32_mask(mask, b, a);
assert_eq!(r, 0b00110011)
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cmpneq_epi32_mask() {
let a = _mm_set_epi32(0, 1, -1, 13);
let b = _mm_set_epi32(0, 1, 13, 42);
let r = _mm_cmpneq_epi32_mask(b, a);
assert_eq!(r, 0b00000011)
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cmpneq_epi32_mask() {
let a = _mm_set_epi32(0, 1, -1, 13);
let b = _mm_set_epi32(0, 1, 13, 42);
let mask = 0b11111111;
let r = _mm_mask_cmpneq_epi32_mask(mask, b, a);
assert_eq!(r, 0b00000011)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cmp_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let m = _mm512_cmp_epi32_mask(a, b, _MM_CMPINT_LT);
assert_eq!(m, 0b00000101_00000101);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cmp_epi32_mask() {
#[rustfmt::skip]
let a = _mm512_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100,
0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
let b = _mm512_set1_epi32(-1);
let mask = 0b01100110_01100110;
let r = _mm512_mask_cmp_epi32_mask(mask, a, b, _MM_CMPINT_LT);
assert_eq!(r, 0b00000100_00000100);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_cmp_epi32_mask() {
let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
let b = _mm256_set1_epi32(-1);
let m = _mm256_cmp_epi32_mask(a, b, _MM_CMPINT_LT);
assert_eq!(m, 0b00000101);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cmp_epi32_mask() {
let a = _mm256_set_epi32(0, 1, -1, 13, i32::MAX, i32::MIN, 100, -100);
let b = _mm256_set1_epi32(-1);
let mask = 0b01100110;
let r = _mm256_mask_cmp_epi32_mask(mask, a, b, _MM_CMPINT_LT);
assert_eq!(r, 0b00000100);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_cmp_epi32_mask() {
let a = _mm_set_epi32(0, 1, -1, 13);
let b = _mm_set1_epi32(1);
let m = _mm_cmp_epi32_mask(a, b, _MM_CMPINT_LT);
assert_eq!(m, 0b00001010);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cmp_epi32_mask() {
let a = _mm_set_epi32(0, 1, -1, 13);
let b = _mm_set1_epi32(1);
let mask = 0b11111111;
let r = _mm_mask_cmp_epi32_mask(mask, a, b, _MM_CMPINT_LT);
assert_eq!(r, 0b00001010);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set_epi8() {
let r = _mm512_set1_epi8(2);
assert_eq_m512i(
r,
_mm512_set_epi8(
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2,
),
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set_epi16() {
let r = _mm512_set1_epi16(2);
assert_eq_m512i(
r,
_mm512_set_epi16(
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2,
),
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set_epi32() {
let r = _mm512_setr_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(
r,
_mm512_set_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setr_epi32() {
let r = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
assert_eq_m512i(
r,
_mm512_setr_epi32(15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0),
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set1_epi8() {
let r = _mm512_set_epi8(
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2, 2, 2, 2,
);
assert_eq_m512i(r, _mm512_set1_epi8(2));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set1_epi16() {
let r = _mm512_set_epi16(
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
2, 2, 2,
);
assert_eq_m512i(r, _mm512_set1_epi16(2));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set1_epi32() {
let r = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, _mm512_set1_epi32(2));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setzero_si512() {
assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_si512());
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setzero_epi32() {
assert_eq_m512i(_mm512_set1_epi32(0), _mm512_setzero_epi32());
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set_ps() {
let r = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(
r,
_mm512_set_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
),
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setr_ps() {
let r = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
assert_eq_m512(
r,
_mm512_setr_ps(
15., 14., 13., 12., 11., 10., 9., 8., 7., 6., 5., 4., 3., 2., 1., 0.,
),
)
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set1_ps() {
#[rustfmt::skip]
let expected = _mm512_set_ps(2., 2., 2., 2., 2., 2., 2., 2.,
2., 2., 2., 2., 2., 2., 2., 2.);
assert_eq_m512(expected, _mm512_set1_ps(2.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set4_epi32() {
let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
assert_eq_m512i(r, _mm512_set4_epi32(4, 3, 2, 1));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set4_ps() {
let r = _mm512_set_ps(
4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
);
assert_eq_m512(r, _mm512_set4_ps(4., 3., 2., 1.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setr4_epi32() {
let r = _mm512_set_epi32(4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1, 4, 3, 2, 1);
assert_eq_m512i(r, _mm512_setr4_epi32(1, 2, 3, 4));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setr4_ps() {
let r = _mm512_set_ps(
4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1., 4., 3., 2., 1.,
);
assert_eq_m512(r, _mm512_setr4_ps(1., 2., 3., 4.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setzero_ps() {
assert_eq_m512(_mm512_setzero_ps(), _mm512_set1_ps(0.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setzero() {
assert_eq_m512(_mm512_setzero(), _mm512_set1_ps(0.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_loadu_pd() {
let a = &[4., 3., 2., 5., 8., 9., 64., 50.];
let p = a.as_ptr();
let r = _mm512_loadu_pd(black_box(p));
let e = _mm512_setr_pd(4., 3., 2., 5., 8., 9., 64., 50.);
assert_eq_m512d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_storeu_pd() {
let a = _mm512_set1_pd(9.);
let mut r = _mm512_undefined_pd();
_mm512_storeu_pd(&mut r as *mut _ as *mut f64, a);
assert_eq_m512d(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_loadu_ps() {
let a = &[
4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
];
let p = a.as_ptr();
let r = _mm512_loadu_ps(black_box(p));
let e = _mm512_setr_ps(
4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_storeu_ps() {
let a = _mm512_set1_ps(9.);
let mut r = _mm512_undefined_ps();
_mm512_storeu_ps(&mut r as *mut _ as *mut f32, a);
assert_eq_m512(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_setr_pd() {
let r = _mm512_set_pd(0., 1., 2., 3., 4., 5., 6., 7.);
assert_eq_m512d(r, _mm512_setr_pd(7., 6., 5., 4., 3., 2., 1., 0.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_set_pd() {
let r = _mm512_setr_pd(0., 1., 2., 3., 4., 5., 6., 7.);
assert_eq_m512d(r, _mm512_set_pd(7., 6., 5., 4., 3., 2., 1., 0.));
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_rol_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_rol_epi32(a, 1);
let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_rol_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_mask_rol_epi32(a, 0, a, 1);
assert_eq_m512i(r, a);
let r = _mm512_mask_rol_epi32(a, 0b11111111_11111111, a, 1);
let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_rol_epi32() {
let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
let r = _mm512_maskz_rol_epi32(0, a, 1);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_rol_epi32(0b00000000_11111111, a, 1);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_rol_epi32() {
let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
let r = _mm256_rol_epi32(a, 1);
let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_rol_epi32() {
let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
let r = _mm256_mask_rol_epi32(a, 0, a, 1);
assert_eq_m256i(r, a);
let r = _mm256_mask_rol_epi32(a, 0b11111111, a, 1);
let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_rol_epi32() {
let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
let r = _mm256_maskz_rol_epi32(0, a, 1);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_rol_epi32(0b11111111, a, 1);
let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_rol_epi32() {
let a = _mm_set_epi32(1 << 31, 1, 1, 1);
let r = _mm_rol_epi32(a, 1);
let e = _mm_set_epi32(1 << 0, 2, 2, 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_rol_epi32() {
let a = _mm_set_epi32(1 << 31, 1, 1, 1);
let r = _mm_mask_rol_epi32(a, 0, a, 1);
assert_eq_m128i(r, a);
let r = _mm_mask_rol_epi32(a, 0b00001111, a, 1);
let e = _mm_set_epi32(1 << 0, 2, 2, 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_rol_epi32() {
let a = _mm_set_epi32(1 << 31, 1, 1, 1);
let r = _mm_maskz_rol_epi32(0, a, 1);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_rol_epi32(0b00001111, a, 1);
let e = _mm_set_epi32(1 << 0, 2, 2, 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_ror_epi32() {
let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let r = _mm512_ror_epi32(a, 1);
let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_ror_epi32() {
let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let r = _mm512_mask_ror_epi32(a, 0, a, 1);
assert_eq_m512i(r, a);
let r = _mm512_mask_ror_epi32(a, 0b11111111_11111111, a, 1);
let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_ror_epi32() {
let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
let r = _mm512_maskz_ror_epi32(0, a, 1);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_ror_epi32(0b00000000_11111111, a, 1);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_ror_epi32() {
let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
let r = _mm256_ror_epi32(a, 1);
let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_ror_epi32() {
let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
let r = _mm256_mask_ror_epi32(a, 0, a, 1);
assert_eq_m256i(r, a);
let r = _mm256_mask_ror_epi32(a, 0b11111111, a, 1);
let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_ror_epi32() {
let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
let r = _mm256_maskz_ror_epi32(0, a, 1);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_ror_epi32(0b11111111, a, 1);
let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_ror_epi32() {
let a = _mm_set_epi32(1 << 0, 2, 2, 2);
let r = _mm_ror_epi32(a, 1);
let e = _mm_set_epi32(1 << 31, 1, 1, 1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_ror_epi32() {
let a = _mm_set_epi32(1 << 0, 2, 2, 2);
let r = _mm_mask_ror_epi32(a, 0, a, 1);
assert_eq_m128i(r, a);
let r = _mm_mask_ror_epi32(a, 0b00001111, a, 1);
let e = _mm_set_epi32(1 << 31, 1, 1, 1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_ror_epi32() {
let a = _mm_set_epi32(1 << 0, 2, 2, 2);
let r = _mm_maskz_ror_epi32(0, a, 1);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_ror_epi32(0b00001111, a, 1);
let e = _mm_set_epi32(1 << 31, 1, 1, 1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_slli_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_slli_epi32(a, 1);
let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_slli_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_mask_slli_epi32(a, 0, a, 1);
assert_eq_m512i(r, a);
let r = _mm512_mask_slli_epi32(a, 0b11111111_11111111, a, 1);
let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_slli_epi32() {
let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
let r = _mm512_maskz_slli_epi32(0, a, 1);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_slli_epi32(0b00000000_11111111, a, 1);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_slli_epi32() {
let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
let r = _mm256_mask_slli_epi32(a, 0, a, 1);
assert_eq_m256i(r, a);
let r = _mm256_mask_slli_epi32(a, 0b11111111, a, 1);
let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_slli_epi32() {
let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
let r = _mm256_maskz_slli_epi32(0, a, 1);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_slli_epi32(0b11111111, a, 1);
let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_slli_epi32() {
let a = _mm_set_epi32(1 << 31, 1, 1, 1);
let r = _mm_mask_slli_epi32(a, 0, a, 1);
assert_eq_m128i(r, a);
let r = _mm_mask_slli_epi32(a, 0b00001111, a, 1);
let e = _mm_set_epi32(0, 2, 2, 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_slli_epi32() {
let a = _mm_set_epi32(1 << 31, 1, 1, 1);
let r = _mm_maskz_slli_epi32(0, a, 1);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_slli_epi32(0b00001111, a, 1);
let e = _mm_set_epi32(0, 2, 2, 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_srli_epi32() {
let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let r = _mm512_srli_epi32(a, 1);
let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_srli_epi32() {
let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let r = _mm512_mask_srli_epi32(a, 0, a, 1);
assert_eq_m512i(r, a);
let r = _mm512_mask_srli_epi32(a, 0b11111111_11111111, a, 1);
let e = _mm512_set_epi32(0 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_srli_epi32() {
let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
let r = _mm512_maskz_srli_epi32(0, a, 1);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_srli_epi32(0b00000000_11111111, a, 1);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0 << 31);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_srli_epi32() {
let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
let r = _mm256_mask_srli_epi32(a, 0, a, 1);
assert_eq_m256i(r, a);
let r = _mm256_mask_srli_epi32(a, 0b11111111, a, 1);
let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_srli_epi32() {
let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
let r = _mm256_maskz_srli_epi32(0, a, 1);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_srli_epi32(0b11111111, a, 1);
let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_srli_epi32() {
let a = _mm_set_epi32(1 << 5, 0, 0, 0);
let r = _mm_mask_srli_epi32(a, 0, a, 1);
assert_eq_m128i(r, a);
let r = _mm_mask_srli_epi32(a, 0b00001111, a, 1);
let e = _mm_set_epi32(1 << 4, 0, 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_srli_epi32() {
let a = _mm_set_epi32(1 << 5, 0, 0, 0);
let r = _mm_maskz_srli_epi32(0, a, 1);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_srli_epi32(0b00001111, a, 1);
let e = _mm_set_epi32(1 << 4, 0, 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_rolv_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let b = _mm512_set1_epi32(1);
let r = _mm512_rolv_epi32(a, b);
let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_rolv_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let b = _mm512_set1_epi32(1);
let r = _mm512_mask_rolv_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_rolv_epi32(a, 0b11111111_11111111, a, b);
let e = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_rolv_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
let b = _mm512_set1_epi32(1);
let r = _mm512_maskz_rolv_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_rolv_epi32(0b00000000_11111111, a, b);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_rolv_epi32() {
let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
let b = _mm256_set1_epi32(1);
let r = _mm256_rolv_epi32(a, b);
let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_rolv_epi32() {
let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
let b = _mm256_set1_epi32(1);
let r = _mm256_mask_rolv_epi32(a, 0, a, b);
assert_eq_m256i(r, a);
let r = _mm256_mask_rolv_epi32(a, 0b11111111, a, b);
let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_rolv_epi32() {
let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
let b = _mm256_set1_epi32(1);
let r = _mm256_maskz_rolv_epi32(0, a, b);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_rolv_epi32(0b11111111, a, b);
let e = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_rolv_epi32() {
let a = _mm_set_epi32(1 << 31, 1, 1, 1);
let b = _mm_set1_epi32(1);
let r = _mm_rolv_epi32(a, b);
let e = _mm_set_epi32(1 << 0, 2, 2, 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_rolv_epi32() {
let a = _mm_set_epi32(1 << 31, 1, 1, 1);
let b = _mm_set1_epi32(1);
let r = _mm_mask_rolv_epi32(a, 0, a, b);
assert_eq_m128i(r, a);
let r = _mm_mask_rolv_epi32(a, 0b00001111, a, b);
let e = _mm_set_epi32(1 << 0, 2, 2, 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_rolv_epi32() {
let a = _mm_set_epi32(1 << 31, 1, 1, 1);
let b = _mm_set1_epi32(1);
let r = _mm_maskz_rolv_epi32(0, a, b);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_rolv_epi32(0b00001111, a, b);
let e = _mm_set_epi32(1 << 0, 2, 2, 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_rorv_epi32() {
let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let b = _mm512_set1_epi32(1);
let r = _mm512_rorv_epi32(a, b);
let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_rorv_epi32() {
let a = _mm512_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let b = _mm512_set1_epi32(1);
let r = _mm512_mask_rorv_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_rorv_epi32(a, 0b11111111_11111111, a, b);
let e = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_rorv_epi32() {
let a = _mm512_set_epi32(3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1 << 0);
let b = _mm512_set1_epi32(1);
let r = _mm512_maskz_rorv_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_rorv_epi32(0b00000000_11111111, a, b);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_rorv_epi32() {
let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
let b = _mm256_set1_epi32(1);
let r = _mm256_rorv_epi32(a, b);
let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_rorv_epi32() {
let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
let b = _mm256_set1_epi32(1);
let r = _mm256_mask_rorv_epi32(a, 0, a, b);
assert_eq_m256i(r, a);
let r = _mm256_mask_rorv_epi32(a, 0b11111111, a, b);
let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_rorv_epi32() {
let a = _mm256_set_epi32(1 << 0, 2, 2, 2, 2, 2, 2, 2);
let b = _mm256_set1_epi32(1);
let r = _mm256_maskz_rorv_epi32(0, a, b);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_rorv_epi32(0b11111111, a, b);
let e = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_rorv_epi32() {
let a = _mm_set_epi32(1 << 0, 2, 2, 2);
let b = _mm_set1_epi32(1);
let r = _mm_rorv_epi32(a, b);
let e = _mm_set_epi32(1 << 31, 1, 1, 1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_rorv_epi32() {
let a = _mm_set_epi32(1 << 0, 2, 2, 2);
let b = _mm_set1_epi32(1);
let r = _mm_mask_rorv_epi32(a, 0, a, b);
assert_eq_m128i(r, a);
let r = _mm_mask_rorv_epi32(a, 0b00001111, a, b);
let e = _mm_set_epi32(1 << 31, 1, 1, 1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_rorv_epi32() {
let a = _mm_set_epi32(1 << 0, 2, 2, 2);
let b = _mm_set1_epi32(1);
let r = _mm_maskz_rorv_epi32(0, a, b);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_rorv_epi32(0b00001111, a, b);
let e = _mm_set_epi32(1 << 31, 1, 1, 1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_sllv_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let count = _mm512_set1_epi32(1);
let r = _mm512_sllv_epi32(a, count);
let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_sllv_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let count = _mm512_set1_epi32(1);
let r = _mm512_mask_sllv_epi32(a, 0, a, count);
assert_eq_m512i(r, a);
let r = _mm512_mask_sllv_epi32(a, 0b11111111_11111111, a, count);
let e = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_sllv_epi32() {
let a = _mm512_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1 << 31);
let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_maskz_sllv_epi32(0, a, count);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_sllv_epi32(0b00000000_11111111, a, count);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_sllv_epi32() {
let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
let count = _mm256_set1_epi32(1);
let r = _mm256_mask_sllv_epi32(a, 0, a, count);
assert_eq_m256i(r, a);
let r = _mm256_mask_sllv_epi32(a, 0b11111111, a, count);
let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_sllv_epi32() {
let a = _mm256_set_epi32(1 << 31, 1, 1, 1, 1, 1, 1, 1);
let count = _mm256_set1_epi32(1);
let r = _mm256_maskz_sllv_epi32(0, a, count);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_sllv_epi32(0b11111111, a, count);
let e = _mm256_set_epi32(0, 2, 2, 2, 2, 2, 2, 2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_sllv_epi32() {
let a = _mm_set_epi32(1 << 31, 1, 1, 1);
let count = _mm_set1_epi32(1);
let r = _mm_mask_sllv_epi32(a, 0, a, count);
assert_eq_m128i(r, a);
let r = _mm_mask_sllv_epi32(a, 0b00001111, a, count);
let e = _mm_set_epi32(0, 2, 2, 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_sllv_epi32() {
let a = _mm_set_epi32(1 << 31, 1, 1, 1);
let count = _mm_set1_epi32(1);
let r = _mm_maskz_sllv_epi32(0, a, count);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_sllv_epi32(0b00001111, a, count);
let e = _mm_set_epi32(0, 2, 2, 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_srlv_epi32() {
let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let count = _mm512_set1_epi32(1);
let r = _mm512_srlv_epi32(a, count);
let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_srlv_epi32() {
let a = _mm512_set_epi32(0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
let count = _mm512_set1_epi32(1);
let r = _mm512_mask_srlv_epi32(a, 0, a, count);
assert_eq_m512i(r, a);
let r = _mm512_mask_srlv_epi32(a, 0b11111111_11111111, a, count);
let e = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_srlv_epi32() {
let a = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0);
let count = _mm512_set_epi32(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1);
let r = _mm512_maskz_srlv_epi32(0, a, count);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_srlv_epi32(0b00000000_11111111, a, count);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_srlv_epi32() {
let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
let count = _mm256_set1_epi32(1);
let r = _mm256_mask_srlv_epi32(a, 0, a, count);
assert_eq_m256i(r, a);
let r = _mm256_mask_srlv_epi32(a, 0b11111111, a, count);
let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_srlv_epi32() {
let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
let count = _mm256_set1_epi32(1);
let r = _mm256_maskz_srlv_epi32(0, a, count);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_srlv_epi32(0b11111111, a, count);
let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_srlv_epi32() {
let a = _mm_set_epi32(1 << 5, 0, 0, 0);
let count = _mm_set1_epi32(1);
let r = _mm_mask_srlv_epi32(a, 0, a, count);
assert_eq_m128i(r, a);
let r = _mm_mask_srlv_epi32(a, 0b00001111, a, count);
let e = _mm_set_epi32(1 << 4, 0, 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_srlv_epi32() {
let a = _mm_set_epi32(1 << 5, 0, 0, 0);
let count = _mm_set1_epi32(1);
let r = _mm_maskz_srlv_epi32(0, a, count);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_srlv_epi32(0b00001111, a, count);
let e = _mm_set_epi32(1 << 4, 0, 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_sll_epi32() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
1 << 31, 1 << 0, 1 << 1, 1 << 2,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
);
let count = _mm_set_epi32(0, 0, 0, 2);
let r = _mm512_sll_epi32(a, count);
#[rustfmt::skip]
let e = _mm512_set_epi32(
0, 1 << 2, 1 << 3, 1 << 4,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_sll_epi32() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
1 << 31, 1 << 0, 1 << 1, 1 << 2,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
);
let count = _mm_set_epi32(0, 0, 0, 2);
let r = _mm512_mask_sll_epi32(a, 0, a, count);
assert_eq_m512i(r, a);
let r = _mm512_mask_sll_epi32(a, 0b11111111_11111111, a, count);
#[rustfmt::skip]
let e = _mm512_set_epi32(
0, 1 << 2, 1 << 3, 1 << 4,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_sll_epi32() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
1 << 31, 1 << 0, 1 << 1, 1 << 2,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 31,
);
let count = _mm_set_epi32(2, 0, 0, 2);
let r = _mm512_maskz_sll_epi32(0, a, count);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_sll_epi32(0b00000000_11111111, a, count);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_sll_epi32() {
let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
let count = _mm_set_epi32(0, 0, 0, 1);
let r = _mm256_mask_sll_epi32(a, 0, a, count);
assert_eq_m256i(r, a);
let r = _mm256_mask_sll_epi32(a, 0b11111111, a, count);
let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_sll_epi32() {
let a = _mm256_set_epi32(1 << 13, 0, 0, 0, 0, 0, 0, 0);
let count = _mm_set_epi32(0, 0, 0, 1);
let r = _mm256_maskz_sll_epi32(0, a, count);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_sll_epi32(0b11111111, a, count);
let e = _mm256_set_epi32(1 << 14, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_sll_epi32() {
let a = _mm_set_epi32(1 << 13, 0, 0, 0);
let count = _mm_set_epi32(0, 0, 0, 1);
let r = _mm_mask_sll_epi32(a, 0, a, count);
assert_eq_m128i(r, a);
let r = _mm_mask_sll_epi32(a, 0b00001111, a, count);
let e = _mm_set_epi32(1 << 14, 0, 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_sll_epi32() {
let a = _mm_set_epi32(1 << 13, 0, 0, 0);
let count = _mm_set_epi32(0, 0, 0, 1);
let r = _mm_maskz_sll_epi32(0, a, count);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_sll_epi32(0b00001111, a, count);
let e = _mm_set_epi32(1 << 14, 0, 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_srl_epi32() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
1 << 31, 1 << 0, 1 << 1, 1 << 2,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
);
let count = _mm_set_epi32(0, 0, 0, 2);
let r = _mm512_srl_epi32(a, count);
let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_srl_epi32() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
1 << 31, 1 << 0, 1 << 1, 1 << 2,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
);
let count = _mm_set_epi32(0, 0, 0, 2);
let r = _mm512_mask_srl_epi32(a, 0, a, count);
assert_eq_m512i(r, a);
let r = _mm512_mask_srl_epi32(a, 0b11111111_11111111, a, count);
let e = _mm512_set_epi32(1 << 29, 0, 0, 1 << 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_srl_epi32() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
1 << 31, 1 << 0, 1 << 1, 1 << 2,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 31,
);
let count = _mm_set_epi32(2, 0, 0, 2);
let r = _mm512_maskz_srl_epi32(0, a, count);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_srl_epi32(0b00000000_11111111, a, count);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 29);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_srl_epi32() {
let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
let count = _mm_set_epi32(0, 0, 0, 1);
let r = _mm256_mask_srl_epi32(a, 0, a, count);
assert_eq_m256i(r, a);
let r = _mm256_mask_srl_epi32(a, 0b11111111, a, count);
let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_srl_epi32() {
let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
let count = _mm_set_epi32(0, 0, 0, 1);
let r = _mm256_maskz_srl_epi32(0, a, count);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_srl_epi32(0b11111111, a, count);
let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_srl_epi32() {
let a = _mm_set_epi32(1 << 5, 0, 0, 0);
let count = _mm_set_epi32(0, 0, 0, 1);
let r = _mm_mask_srl_epi32(a, 0, a, count);
assert_eq_m128i(r, a);
let r = _mm_mask_srl_epi32(a, 0b00001111, a, count);
let e = _mm_set_epi32(1 << 4, 0, 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_srl_epi32() {
let a = _mm_set_epi32(1 << 5, 0, 0, 0);
let count = _mm_set_epi32(0, 0, 0, 1);
let r = _mm_maskz_srl_epi32(0, a, count);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_srl_epi32(0b00001111, a, count);
let e = _mm_set_epi32(1 << 4, 0, 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_sra_epi32() {
let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
let count = _mm_set_epi32(1, 0, 0, 2);
let r = _mm512_sra_epi32(a, count);
let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_sra_epi32() {
let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
let count = _mm_set_epi32(0, 0, 0, 2);
let r = _mm512_mask_sra_epi32(a, 0, a, count);
assert_eq_m512i(r, a);
let r = _mm512_mask_sra_epi32(a, 0b11111111_11111111, a, count);
let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_sra_epi32() {
let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
let count = _mm_set_epi32(2, 0, 0, 2);
let r = _mm512_maskz_sra_epi32(0, a, count);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_sra_epi32(0b00000000_11111111, a, count);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_sra_epi32() {
let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
let count = _mm_set_epi32(0, 0, 0, 1);
let r = _mm256_mask_sra_epi32(a, 0, a, count);
assert_eq_m256i(r, a);
let r = _mm256_mask_sra_epi32(a, 0b11111111, a, count);
let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_sra_epi32() {
let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
let count = _mm_set_epi32(0, 0, 0, 1);
let r = _mm256_maskz_sra_epi32(0, a, count);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_sra_epi32(0b11111111, a, count);
let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_sra_epi32() {
let a = _mm_set_epi32(1 << 5, 0, 0, 0);
let count = _mm_set_epi32(0, 0, 0, 1);
let r = _mm_mask_sra_epi32(a, 0, a, count);
assert_eq_m128i(r, a);
let r = _mm_mask_sra_epi32(a, 0b00001111, a, count);
let e = _mm_set_epi32(1 << 4, 0, 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_sra_epi32() {
let a = _mm_set_epi32(1 << 5, 0, 0, 0);
let count = _mm_set_epi32(0, 0, 0, 1);
let r = _mm_maskz_sra_epi32(0, a, count);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_sra_epi32(0b00001111, a, count);
let e = _mm_set_epi32(1 << 4, 0, 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_srav_epi32() {
let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0);
let r = _mm512_srav_epi32(a, count);
let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_srav_epi32() {
let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16);
let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1);
let r = _mm512_mask_srav_epi32(a, 0, a, count);
assert_eq_m512i(r, a);
let r = _mm512_mask_srav_epi32(a, 0b11111111_11111111, a, count);
let e = _mm512_set_epi32(2, -2, 4, -4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_srav_epi32() {
let a = _mm512_set_epi32(8, -8, 16, -15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15, -14);
let count = _mm512_set_epi32(2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2);
let r = _mm512_maskz_srav_epi32(0, a, count);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_srav_epi32(0b00000000_11111111, a, count);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -4, -4);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_srav_epi32() {
let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
let count = _mm256_set1_epi32(1);
let r = _mm256_mask_srav_epi32(a, 0, a, count);
assert_eq_m256i(r, a);
let r = _mm256_mask_srav_epi32(a, 0b11111111, a, count);
let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_srav_epi32() {
let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
let count = _mm256_set1_epi32(1);
let r = _mm256_maskz_srav_epi32(0, a, count);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_srav_epi32(0b11111111, a, count);
let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_srav_epi32() {
let a = _mm_set_epi32(1 << 5, 0, 0, 0);
let count = _mm_set1_epi32(1);
let r = _mm_mask_srav_epi32(a, 0, a, count);
assert_eq_m128i(r, a);
let r = _mm_mask_srav_epi32(a, 0b00001111, a, count);
let e = _mm_set_epi32(1 << 4, 0, 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_srav_epi32() {
let a = _mm_set_epi32(1 << 5, 0, 0, 0);
let count = _mm_set1_epi32(1);
let r = _mm_maskz_srav_epi32(0, a, count);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_srav_epi32(0b00001111, a, count);
let e = _mm_set_epi32(1 << 4, 0, 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_srai_epi32() {
let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 16, -15);
let r = _mm512_srai_epi32(a, 2);
let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, -4);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_srai_epi32() {
let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
let r = _mm512_mask_srai_epi32(a, 0, a, 2);
assert_eq_m512i(r, a);
let r = _mm512_mask_srai_epi32(a, 0b11111111_11111111, a, 2);
let e = _mm512_set_epi32(2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_srai_epi32() {
let a = _mm512_set_epi32(8, -8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 15, -15);
let r = _mm512_maskz_srai_epi32(0, a, 2);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_srai_epi32(0b00000000_11111111, a, 2);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3, -4);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_srai_epi32() {
let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
let r = _mm256_mask_srai_epi32(a, 0, a, 1);
assert_eq_m256i(r, a);
let r = _mm256_mask_srai_epi32(a, 0b11111111, a, 1);
let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_srai_epi32() {
let a = _mm256_set_epi32(1 << 5, 0, 0, 0, 0, 0, 0, 0);
let r = _mm256_maskz_srai_epi32(0, a, 1);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_srai_epi32(0b11111111, a, 1);
let e = _mm256_set_epi32(1 << 4, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_srai_epi32() {
let a = _mm_set_epi32(1 << 5, 0, 0, 0);
let r = _mm_mask_srai_epi32(a, 0, a, 1);
assert_eq_m128i(r, a);
let r = _mm_mask_srai_epi32(a, 0b00001111, a, 1);
let e = _mm_set_epi32(1 << 4, 0, 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_srai_epi32() {
let a = _mm_set_epi32(1 << 5, 0, 0, 0);
let r = _mm_maskz_srai_epi32(0, a, 1);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_srai_epi32(0b00001111, a, 1);
let e = _mm_set_epi32(1 << 4, 0, 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_permute_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_permute_ps(a, 0b11111111);
let e = _mm512_setr_ps(
3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_permute_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_mask_permute_ps(a, 0, a, 0b11111111);
assert_eq_m512(r, a);
let r = _mm512_mask_permute_ps(a, 0b11111111_11111111, a, 0b111111111);
let e = _mm512_setr_ps(
3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_permute_ps() {
let a = _mm512_setr_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_maskz_permute_ps(0, a, 0b11111111);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_permute_ps(0b11111111_11111111, a, 0b11111111);
let e = _mm512_setr_ps(
3., 3., 3., 3., 7., 7., 7., 7., 11., 11., 11., 11., 15., 15., 15., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_permute_ps() {
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let r = _mm256_mask_permute_ps(a, 0, a, 0b11111111);
assert_eq_m256(r, a);
let r = _mm256_mask_permute_ps(a, 0b11111111, a, 0b11111111);
let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_permute_ps() {
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let r = _mm256_maskz_permute_ps(0, a, 0b11111111);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_permute_ps(0b11111111, a, 0b11111111);
let e = _mm256_set_ps(0., 0., 0., 0., 4., 4., 4., 4.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_permute_ps() {
let a = _mm_set_ps(0., 1., 2., 3.);
let r = _mm_mask_permute_ps(a, 0, a, 0b11111111);
assert_eq_m128(r, a);
let r = _mm_mask_permute_ps(a, 0b00001111, a, 0b11111111);
let e = _mm_set_ps(0., 0., 0., 0.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_permute_ps() {
let a = _mm_set_ps(0., 1., 2., 3.);
let r = _mm_maskz_permute_ps(0, a, 0b11111111);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_permute_ps(0b00001111, a, 0b11111111);
let e = _mm_set_ps(0., 0., 0., 0.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_permutevar_epi32() {
let idx = _mm512_set1_epi32(1);
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_permutevar_epi32(idx, a);
let e = _mm512_set1_epi32(14);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_permutevar_epi32() {
let idx = _mm512_set1_epi32(1);
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_mask_permutevar_epi32(a, 0, idx, a);
assert_eq_m512i(r, a);
let r = _mm512_mask_permutevar_epi32(a, 0b11111111_11111111, idx, a);
let e = _mm512_set1_epi32(14);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_permutevar_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_set1_epi32(0b01);
let r = _mm512_permutevar_ps(a, b);
let e = _mm512_set_ps(
2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_permutevar_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_set1_epi32(0b01);
let r = _mm512_mask_permutevar_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_permutevar_ps(a, 0b11111111_11111111, a, b);
let e = _mm512_set_ps(
2., 2., 2., 2., 6., 6., 6., 6., 10., 10., 10., 10., 14., 14., 14., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_permutevar_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let b = _mm512_set1_epi32(0b01);
let r = _mm512_maskz_permutevar_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_permutevar_ps(0b00000000_11111111, a, b);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 10., 10., 10., 10., 14., 14., 14., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_permutevar_ps() {
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let b = _mm256_set1_epi32(0b01);
let r = _mm256_mask_permutevar_ps(a, 0, a, b);
assert_eq_m256(r, a);
let r = _mm256_mask_permutevar_ps(a, 0b11111111, a, b);
let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_permutevar_ps() {
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let b = _mm256_set1_epi32(0b01);
let r = _mm256_maskz_permutevar_ps(0, a, b);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_permutevar_ps(0b11111111, a, b);
let e = _mm256_set_ps(2., 2., 2., 2., 6., 6., 6., 6.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_permutevar_ps() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set1_epi32(0b01);
let r = _mm_mask_permutevar_ps(a, 0, a, b);
assert_eq_m128(r, a);
let r = _mm_mask_permutevar_ps(a, 0b00001111, a, b);
let e = _mm_set_ps(2., 2., 2., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_permutevar_ps() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set1_epi32(0b01);
let r = _mm_maskz_permutevar_ps(0, a, b);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_permutevar_ps(0b00001111, a, b);
let e = _mm_set_ps(2., 2., 2., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_permutexvar_epi32() {
let idx = _mm512_set1_epi32(1);
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_permutexvar_epi32(idx, a);
let e = _mm512_set1_epi32(14);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_permutexvar_epi32() {
let idx = _mm512_set1_epi32(1);
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_mask_permutexvar_epi32(a, 0, idx, a);
assert_eq_m512i(r, a);
let r = _mm512_mask_permutexvar_epi32(a, 0b11111111_11111111, idx, a);
let e = _mm512_set1_epi32(14);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_permutexvar_epi32() {
let idx = _mm512_set1_epi32(1);
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_permutexvar_epi32(0, idx, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_permutexvar_epi32(0b00000000_11111111, idx, a);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 14, 14, 14, 14, 14, 14, 14, 14);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_permutexvar_epi32() {
let idx = _mm256_set1_epi32(1);
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let r = _mm256_permutexvar_epi32(idx, a);
let e = _mm256_set1_epi32(6);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_permutexvar_epi32() {
let idx = _mm256_set1_epi32(1);
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let r = _mm256_mask_permutexvar_epi32(a, 0, idx, a);
assert_eq_m256i(r, a);
let r = _mm256_mask_permutexvar_epi32(a, 0b11111111, idx, a);
let e = _mm256_set1_epi32(6);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_permutexvar_epi32() {
let idx = _mm256_set1_epi32(1);
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let r = _mm256_maskz_permutexvar_epi32(0, idx, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_permutexvar_epi32(0b11111111, idx, a);
let e = _mm256_set1_epi32(6);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_permutexvar_ps() {
let idx = _mm512_set1_epi32(1);
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_permutexvar_ps(idx, a);
let e = _mm512_set1_ps(14.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_permutexvar_ps() {
let idx = _mm512_set1_epi32(1);
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_mask_permutexvar_ps(a, 0, idx, a);
assert_eq_m512(r, a);
let r = _mm512_mask_permutexvar_ps(a, 0b11111111_11111111, idx, a);
let e = _mm512_set1_ps(14.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_permutexvar_ps() {
let idx = _mm512_set1_epi32(1);
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_maskz_permutexvar_ps(0, idx, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_permutexvar_ps(0b00000000_11111111, idx, a);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 14., 14., 14., 14., 14., 14., 14., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_permutexvar_ps() {
let idx = _mm256_set1_epi32(1);
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let r = _mm256_permutexvar_ps(idx, a);
let e = _mm256_set1_ps(6.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_permutexvar_ps() {
let idx = _mm256_set1_epi32(1);
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let r = _mm256_mask_permutexvar_ps(a, 0, idx, a);
assert_eq_m256(r, a);
let r = _mm256_mask_permutexvar_ps(a, 0b11111111, idx, a);
let e = _mm256_set1_ps(6.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_permutexvar_ps() {
let idx = _mm256_set1_epi32(1);
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let r = _mm256_maskz_permutexvar_ps(0, idx, a);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_permutexvar_ps(0b11111111, idx, a);
let e = _mm256_set1_ps(6.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_permutex2var_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
#[rustfmt::skip]
let idx = _mm512_set_epi32(
1, 1 << 4, 2, 1 << 4,
3, 1 << 4, 4, 1 << 4,
5, 1 << 4, 6, 1 << 4,
7, 1 << 4, 8, 1 << 4,
);
let b = _mm512_set1_epi32(100);
let r = _mm512_permutex2var_epi32(a, idx, b);
let e = _mm512_set_epi32(
14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_permutex2var_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
#[rustfmt::skip]
let idx = _mm512_set_epi32(
1, 1 << 4, 2, 1 << 4,
3, 1 << 4, 4, 1 << 4,
5, 1 << 4, 6, 1 << 4,
7, 1 << 4, 8, 1 << 4,
);
let b = _mm512_set1_epi32(100);
let r = _mm512_mask_permutex2var_epi32(a, 0, idx, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_permutex2var_epi32(a, 0b11111111_11111111, idx, b);
let e = _mm512_set_epi32(
14, 100, 13, 100, 12, 100, 11, 100, 10, 100, 9, 100, 8, 100, 7, 100,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_permutex2var_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
#[rustfmt::skip]
let idx = _mm512_set_epi32(
1, 1 << 4, 2, 1 << 4,
3, 1 << 4, 4, 1 << 4,
5, 1 << 4, 6, 1 << 4,
7, 1 << 4, 8, 1 << 4,
);
let b = _mm512_set1_epi32(100);
let r = _mm512_maskz_permutex2var_epi32(0, a, idx, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_permutex2var_epi32(0b00000000_11111111, a, idx, b);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 10, 100, 9, 100, 8, 100, 7, 100);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask2_permutex2var_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
#[rustfmt::skip]
let idx = _mm512_set_epi32(
1000, 1 << 4, 2000, 1 << 4,
3000, 1 << 4, 4000, 1 << 4,
5, 1 << 4, 6, 1 << 4,
7, 1 << 4, 8, 1 << 4,
);
let b = _mm512_set1_epi32(100);
let r = _mm512_mask2_permutex2var_epi32(a, idx, 0, b);
assert_eq_m512i(r, idx);
let r = _mm512_mask2_permutex2var_epi32(a, idx, 0b00000000_11111111, b);
#[rustfmt::skip]
let e = _mm512_set_epi32(
1000, 1 << 4, 2000, 1 << 4,
3000, 1 << 4, 4000, 1 << 4,
10, 100, 9, 100,
8, 100, 7, 100,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_permutex2var_epi32() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
let b = _mm256_set1_epi32(100);
let r = _mm256_permutex2var_epi32(a, idx, b);
let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_permutex2var_epi32() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
let b = _mm256_set1_epi32(100);
let r = _mm256_mask_permutex2var_epi32(a, 0, idx, b);
assert_eq_m256i(r, a);
let r = _mm256_mask_permutex2var_epi32(a, 0b11111111, idx, b);
let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_permutex2var_epi32() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
let b = _mm256_set1_epi32(100);
let r = _mm256_maskz_permutex2var_epi32(0, a, idx, b);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_permutex2var_epi32(0b11111111, a, idx, b);
let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask2_permutex2var_epi32() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
let b = _mm256_set1_epi32(100);
let r = _mm256_mask2_permutex2var_epi32(a, idx, 0, b);
assert_eq_m256i(r, idx);
let r = _mm256_mask2_permutex2var_epi32(a, idx, 0b11111111, b);
let e = _mm256_set_epi32(6, 100, 5, 100, 4, 100, 3, 100);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_permutex2var_epi32() {
let a = _mm_set_epi32(0, 1, 2, 3);
let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
let b = _mm_set1_epi32(100);
let r = _mm_permutex2var_epi32(a, idx, b);
let e = _mm_set_epi32(2, 100, 1, 100);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_permutex2var_epi32() {
let a = _mm_set_epi32(0, 1, 2, 3);
let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
let b = _mm_set1_epi32(100);
let r = _mm_mask_permutex2var_epi32(a, 0, idx, b);
assert_eq_m128i(r, a);
let r = _mm_mask_permutex2var_epi32(a, 0b00001111, idx, b);
let e = _mm_set_epi32(2, 100, 1, 100);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_permutex2var_epi32() {
let a = _mm_set_epi32(0, 1, 2, 3);
let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
let b = _mm_set1_epi32(100);
let r = _mm_maskz_permutex2var_epi32(0, a, idx, b);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_permutex2var_epi32(0b00001111, a, idx, b);
let e = _mm_set_epi32(2, 100, 1, 100);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask2_permutex2var_epi32() {
let a = _mm_set_epi32(0, 1, 2, 3);
let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
let b = _mm_set1_epi32(100);
let r = _mm_mask2_permutex2var_epi32(a, idx, 0, b);
assert_eq_m128i(r, idx);
let r = _mm_mask2_permutex2var_epi32(a, idx, 0b00001111, b);
let e = _mm_set_epi32(2, 100, 1, 100);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_permutex2var_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
#[rustfmt::skip]
let idx = _mm512_set_epi32(
1, 1 << 4, 2, 1 << 4,
3, 1 << 4, 4, 1 << 4,
5, 1 << 4, 6, 1 << 4,
7, 1 << 4, 8, 1 << 4,
);
let b = _mm512_set1_ps(100.);
let r = _mm512_permutex2var_ps(a, idx, b);
let e = _mm512_set_ps(
14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_permutex2var_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
#[rustfmt::skip]
let idx = _mm512_set_epi32(
1, 1 << 4, 2, 1 << 4,
3, 1 << 4, 4, 1 << 4,
5, 1 << 4, 6, 1 << 4,
7, 1 << 4, 8, 1 << 4,
);
let b = _mm512_set1_ps(100.);
let r = _mm512_mask_permutex2var_ps(a, 0, idx, b);
assert_eq_m512(r, a);
let r = _mm512_mask_permutex2var_ps(a, 0b11111111_11111111, idx, b);
let e = _mm512_set_ps(
14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_permutex2var_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
#[rustfmt::skip]
let idx = _mm512_set_epi32(
1, 1 << 4, 2, 1 << 4,
3, 1 << 4, 4, 1 << 4,
5, 1 << 4, 6, 1 << 4,
7, 1 << 4, 8, 1 << 4,
);
let b = _mm512_set1_ps(100.);
let r = _mm512_maskz_permutex2var_ps(0, a, idx, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_permutex2var_ps(0b00000000_11111111, a, idx, b);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 10., 100., 9., 100., 8., 100., 7., 100.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask2_permutex2var_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
#[rustfmt::skip]
let idx = _mm512_set_epi32(
1, 1 << 4, 2, 1 << 4,
3, 1 << 4, 4, 1 << 4,
5, 1 << 4, 6, 1 << 4,
7, 1 << 4, 8, 1 << 4,
);
let b = _mm512_set1_ps(100.);
let r = _mm512_mask2_permutex2var_ps(a, idx, 0, b);
assert_eq_m512(r, _mm512_castsi512_ps(idx));
let r = _mm512_mask2_permutex2var_ps(a, idx, 0b11111111_11111111, b);
let e = _mm512_set_ps(
14., 100., 13., 100., 12., 100., 11., 100., 10., 100., 9., 100., 8., 100., 7., 100.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_permutex2var_ps() {
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
let b = _mm256_set1_ps(100.);
let r = _mm256_permutex2var_ps(a, idx, b);
let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_permutex2var_ps() {
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
let b = _mm256_set1_ps(100.);
let r = _mm256_mask_permutex2var_ps(a, 0, idx, b);
assert_eq_m256(r, a);
let r = _mm256_mask_permutex2var_ps(a, 0b11111111, idx, b);
let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_permutex2var_ps() {
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
let b = _mm256_set1_ps(100.);
let r = _mm256_maskz_permutex2var_ps(0, a, idx, b);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_permutex2var_ps(0b11111111, a, idx, b);
let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask2_permutex2var_ps() {
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let idx = _mm256_set_epi32(1, 1 << 3, 2, 1 << 3, 3, 1 << 3, 4, 1 << 3);
let b = _mm256_set1_ps(100.);
let r = _mm256_mask2_permutex2var_ps(a, idx, 0, b);
assert_eq_m256(r, _mm256_castsi256_ps(idx));
let r = _mm256_mask2_permutex2var_ps(a, idx, 0b11111111, b);
let e = _mm256_set_ps(6., 100., 5., 100., 4., 100., 3., 100.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_permutex2var_ps() {
let a = _mm_set_ps(0., 1., 2., 3.);
let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
let b = _mm_set1_ps(100.);
let r = _mm_permutex2var_ps(a, idx, b);
let e = _mm_set_ps(2., 100., 1., 100.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_permutex2var_ps() {
let a = _mm_set_ps(0., 1., 2., 3.);
let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
let b = _mm_set1_ps(100.);
let r = _mm_mask_permutex2var_ps(a, 0, idx, b);
assert_eq_m128(r, a);
let r = _mm_mask_permutex2var_ps(a, 0b00001111, idx, b);
let e = _mm_set_ps(2., 100., 1., 100.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_permutex2var_ps() {
let a = _mm_set_ps(0., 1., 2., 3.);
let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
let b = _mm_set1_ps(100.);
let r = _mm_maskz_permutex2var_ps(0, a, idx, b);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_permutex2var_ps(0b00001111, a, idx, b);
let e = _mm_set_ps(2., 100., 1., 100.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask2_permutex2var_ps() {
let a = _mm_set_ps(0., 1., 2., 3.);
let idx = _mm_set_epi32(1, 1 << 2, 2, 1 << 2);
let b = _mm_set1_ps(100.);
let r = _mm_mask2_permutex2var_ps(a, idx, 0, b);
assert_eq_m128(r, _mm_castsi128_ps(idx));
let r = _mm_mask2_permutex2var_ps(a, idx, 0b00001111, b);
let e = _mm_set_ps(2., 100., 1., 100.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_shuffle_epi32() {
let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
let r = _mm512_shuffle_epi32(a, _MM_PERM_AADD);
let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_shuffle_epi32() {
let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
let r = _mm512_mask_shuffle_epi32(a, 0, a, _MM_PERM_AADD);
assert_eq_m512i(r, a);
let r = _mm512_mask_shuffle_epi32(a, 0b11111111_11111111, a, _MM_PERM_AADD);
let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 8, 8, 1, 1, 16, 16, 9, 9);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_shuffle_epi32() {
let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
let r = _mm512_maskz_shuffle_epi32(0, a, _MM_PERM_AADD);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_shuffle_epi32(0b00000000_11111111, a, _MM_PERM_AADD);
let e = _mm512_setr_epi32(8, 8, 1, 1, 16, 16, 9, 9, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_shuffle_epi32() {
let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
let r = _mm256_mask_shuffle_epi32(a, 0, a, _MM_PERM_AADD);
assert_eq_m256i(r, a);
let r = _mm256_mask_shuffle_epi32(a, 0b11111111, a, _MM_PERM_AADD);
let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_shuffle_epi32() {
let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
let r = _mm256_maskz_shuffle_epi32(0, a, _MM_PERM_AADD);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_shuffle_epi32(0b11111111, a, _MM_PERM_AADD);
let e = _mm256_set_epi32(8, 8, 1, 1, 16, 16, 9, 9);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_shuffle_epi32() {
let a = _mm_set_epi32(1, 4, 5, 8);
let r = _mm_mask_shuffle_epi32(a, 0, a, _MM_PERM_AADD);
assert_eq_m128i(r, a);
let r = _mm_mask_shuffle_epi32(a, 0b00001111, a, _MM_PERM_AADD);
let e = _mm_set_epi32(8, 8, 1, 1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_shuffle_epi32() {
let a = _mm_set_epi32(1, 4, 5, 8);
let r = _mm_maskz_shuffle_epi32(0, a, _MM_PERM_AADD);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_shuffle_epi32(0b00001111, a, _MM_PERM_AADD);
let e = _mm_set_epi32(8, 8, 1, 1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_shuffle_ps() {
let a = _mm512_setr_ps(
1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
);
let b = _mm512_setr_ps(
2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
);
let r = _mm512_shuffle_ps(a, b, 0x0F);
let e = _mm512_setr_ps(
8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_shuffle_ps() {
let a = _mm512_setr_ps(
1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
);
let b = _mm512_setr_ps(
2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
);
let r = _mm512_mask_shuffle_ps(a, 0, a, b, 0x0F);
assert_eq_m512(r, a);
let r = _mm512_mask_shuffle_ps(a, 0b11111111_11111111, a, b, 0x0F);
let e = _mm512_setr_ps(
8., 8., 2., 2., 16., 16., 10., 10., 8., 8., 2., 2., 16., 16., 10., 10.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_shuffle_ps() {
let a = _mm512_setr_ps(
1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
);
let b = _mm512_setr_ps(
2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
);
let r = _mm512_maskz_shuffle_ps(0, a, b, 0x0F);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_shuffle_ps(0b00000000_11111111, a, b, 0x0F);
let e = _mm512_setr_ps(
8., 8., 2., 2., 16., 16., 10., 10., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_shuffle_ps() {
let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
let r = _mm256_mask_shuffle_ps(a, 0, a, b, 0x0F);
assert_eq_m256(r, a);
let r = _mm256_mask_shuffle_ps(a, 0b11111111, a, b, 0x0F);
let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_shuffle_ps() {
let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
let r = _mm256_maskz_shuffle_ps(0, a, b, 0x0F);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_shuffle_ps(0b11111111, a, b, 0x0F);
let e = _mm256_set_ps(7., 7., 1., 1., 15., 15., 9., 9.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_shuffle_ps() {
let a = _mm_set_ps(1., 4., 5., 8.);
let b = _mm_set_ps(2., 3., 6., 7.);
let r = _mm_mask_shuffle_ps(a, 0, a, b, 0x0F);
assert_eq_m128(r, a);
let r = _mm_mask_shuffle_ps(a, 0b00001111, a, b, 0x0F);
let e = _mm_set_ps(7., 7., 1., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_shuffle_ps() {
let a = _mm_set_ps(1., 4., 5., 8.);
let b = _mm_set_ps(2., 3., 6., 7.);
let r = _mm_maskz_shuffle_ps(0, a, b, 0x0F);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_shuffle_ps(0b00001111, a, b, 0x0F);
let e = _mm_set_ps(7., 7., 1., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_shuffle_i32x4() {
let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
let r = _mm512_shuffle_i32x4(a, b, 0b0000);
let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_shuffle_i32x4() {
let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
let r = _mm512_mask_shuffle_i32x4(a, 0, a, b, 0b0000);
assert_eq_m512i(r, a);
let r = _mm512_mask_shuffle_i32x4(a, 0b11111111_11111111, a, b, 0b0000);
let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 2, 3, 6, 7, 2, 3, 6, 7);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_shuffle_i32x4() {
let a = _mm512_setr_epi32(1, 4, 5, 8, 9, 12, 13, 16, 1, 4, 5, 8, 9, 12, 13, 16);
let b = _mm512_setr_epi32(2, 3, 6, 7, 10, 11, 14, 15, 2, 3, 6, 7, 10, 11, 14, 15);
let r = _mm512_maskz_shuffle_i32x4(0, a, b, 0b0000);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_shuffle_i32x4(0b00000000_11111111, a, b, 0b0000);
let e = _mm512_setr_epi32(1, 4, 5, 8, 1, 4, 5, 8, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_shuffle_i32x4() {
let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
let r = _mm256_shuffle_i32x4(a, b, 0b00);
let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_shuffle_i32x4() {
let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
let r = _mm256_mask_shuffle_i32x4(a, 0, a, b, 0b00);
assert_eq_m256i(r, a);
let r = _mm256_mask_shuffle_i32x4(a, 0b11111111, a, b, 0b00);
let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_shuffle_i32x4() {
let a = _mm256_set_epi32(1, 4, 5, 8, 9, 12, 13, 16);
let b = _mm256_set_epi32(2, 3, 6, 7, 10, 11, 14, 15);
let r = _mm256_maskz_shuffle_i32x4(0, a, b, 0b00);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_shuffle_i32x4(0b11111111, a, b, 0b00);
let e = _mm256_set_epi32(10, 11, 14, 15, 9, 12, 13, 16);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_shuffle_f32x4() {
let a = _mm512_setr_ps(
1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
);
let b = _mm512_setr_ps(
2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
);
let r = _mm512_shuffle_f32x4(a, b, 0b00000000);
let e = _mm512_setr_ps(
1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_shuffle_f32x4() {
let a = _mm512_setr_ps(
1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
);
let b = _mm512_setr_ps(
2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
);
let r = _mm512_mask_shuffle_f32x4(a, 0, a, b, 0b00000000);
assert_eq_m512(r, a);
let r = _mm512_mask_shuffle_f32x4(a, 0b11111111_11111111, a, b, 0b00000000);
let e = _mm512_setr_ps(
1., 4., 5., 8., 1., 4., 5., 8., 2., 3., 6., 7., 2., 3., 6., 7.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_shuffle_f32x4() {
let a = _mm512_setr_ps(
1., 4., 5., 8., 9., 12., 13., 16., 1., 4., 5., 8., 9., 12., 13., 16.,
);
let b = _mm512_setr_ps(
2., 3., 6., 7., 10., 11., 14., 15., 2., 3., 6., 7., 10., 11., 14., 15.,
);
let r = _mm512_maskz_shuffle_f32x4(0, a, b, 0b00000000);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_shuffle_f32x4(0b00000000_11111111, a, b, 0b00000000);
let e = _mm512_setr_ps(
1., 4., 5., 8., 1., 4., 5., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_shuffle_f32x4() {
let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
let r = _mm256_shuffle_f32x4(a, b, 0b00);
let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_shuffle_f32x4() {
let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
let r = _mm256_mask_shuffle_f32x4(a, 0, a, b, 0b00);
assert_eq_m256(r, a);
let r = _mm256_mask_shuffle_f32x4(a, 0b11111111, a, b, 0b00);
let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_shuffle_f32x4() {
let a = _mm256_set_ps(1., 4., 5., 8., 9., 12., 13., 16.);
let b = _mm256_set_ps(2., 3., 6., 7., 10., 11., 14., 15.);
let r = _mm256_maskz_shuffle_f32x4(0, a, b, 0b00);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_shuffle_f32x4(0b11111111, a, b, 0b00);
let e = _mm256_set_ps(10., 11., 14., 15., 9., 12., 13., 16.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_extractf32x4_ps() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let r = _mm512_extractf32x4_ps(a, 0b1);
let e = _mm_setr_ps(5., 6., 7., 8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_extractf32x4_ps() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let src = _mm_set1_ps(100.);
let r = _mm512_mask_extractf32x4_ps(src, 0, a, 0b1);
assert_eq_m128(r, src);
let r = _mm512_mask_extractf32x4_ps(src, 0b11111111, a, 0b1);
let e = _mm_setr_ps(5., 6., 7., 8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_extractf32x4_ps() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let r = _mm512_maskz_extractf32x4_ps(0, a, 0b1);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm512_maskz_extractf32x4_ps(0b00000001, a, 0b1);
let e = _mm_setr_ps(5., 0., 0., 0.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_extractf32x4_ps() {
let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
let r = _mm256_extractf32x4_ps(a, 0b1);
let e = _mm_set_ps(1., 2., 3., 4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_extractf32x4_ps() {
let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
let src = _mm_set1_ps(100.);
let r = _mm256_mask_extractf32x4_ps(src, 0, a, 0b1);
assert_eq_m128(r, src);
let r = _mm256_mask_extractf32x4_ps(src, 0b00001111, a, 0b1);
let e = _mm_set_ps(1., 2., 3., 4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_extractf32x4_ps() {
let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
let r = _mm256_maskz_extractf32x4_ps(0, a, 0b1);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm256_maskz_extractf32x4_ps(0b00001111, a, 0b1);
let e = _mm_set_ps(1., 2., 3., 4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_extracti32x4_epi32() {
let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r = _mm512_extracti32x4_epi32(a, 0b1);
let e = _mm_setr_epi32(5, 6, 7, 8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_extracti32x4_epi32() {
let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let src = _mm_set1_epi32(100);
let r = _mm512_mask_extracti32x4_epi32(src, 0, a, 0x1);
assert_eq_m128i(r, src);
let r = _mm512_mask_extracti32x4_epi32(src, 0b11111111, a, 0b1);
let e = _mm_setr_epi32(5, 6, 7, 8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm512_maskz_extracti32x4_epi32() {
let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r = _mm512_maskz_extracti32x4_epi32(0, a, 0b1);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm512_maskz_extracti32x4_epi32(0b00000001, a, 0x1);
let e = _mm_setr_epi32(5, 0, 0, 0);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_extracti32x4_epi32() {
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let r = _mm256_extracti32x4_epi32(a, 0b1);
let e = _mm_set_epi32(1, 2, 3, 4);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_extracti32x4_epi32() {
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let src = _mm_set1_epi32(100);
let r = _mm256_mask_extracti32x4_epi32(src, 0, a, 0b1);
assert_eq_m128i(r, src);
let r = _mm256_mask_extracti32x4_epi32(src, 0b00001111, a, 0b1);
let e = _mm_set_epi32(1, 2, 3, 4);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_extracti32x4_epi32() {
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let r = _mm256_maskz_extracti32x4_epi32(0, a, 0b1);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm256_maskz_extracti32x4_epi32(0b00001111, a, 0b1);
let e = _mm_set_epi32(1, 2, 3, 4);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_moveldup_ps() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let r = _mm512_moveldup_ps(a);
let e = _mm512_setr_ps(
1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_moveldup_ps() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let r = _mm512_mask_moveldup_ps(a, 0, a);
assert_eq_m512(r, a);
let r = _mm512_mask_moveldup_ps(a, 0b11111111_11111111, a);
let e = _mm512_setr_ps(
1., 1., 3., 3., 5., 5., 7., 7., 9., 9., 11., 11., 13., 13., 15., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_moveldup_ps() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let r = _mm512_maskz_moveldup_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_moveldup_ps(0b00000000_11111111, a);
let e = _mm512_setr_ps(
1., 1., 3., 3., 5., 5., 7., 7., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_moveldup_ps() {
let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
let r = _mm256_mask_moveldup_ps(a, 0, a);
assert_eq_m256(r, a);
let r = _mm256_mask_moveldup_ps(a, 0b11111111, a);
let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_moveldup_ps() {
let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
let r = _mm256_maskz_moveldup_ps(0, a);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_moveldup_ps(0b11111111, a);
let e = _mm256_set_ps(2., 2., 4., 4., 6., 6., 8., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_moveldup_ps() {
let a = _mm_set_ps(1., 2., 3., 4.);
let r = _mm_mask_moveldup_ps(a, 0, a);
assert_eq_m128(r, a);
let r = _mm_mask_moveldup_ps(a, 0b00001111, a);
let e = _mm_set_ps(2., 2., 4., 4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_moveldup_ps() {
let a = _mm_set_ps(1., 2., 3., 4.);
let r = _mm_maskz_moveldup_ps(0, a);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_moveldup_ps(0b00001111, a);
let e = _mm_set_ps(2., 2., 4., 4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_movehdup_ps() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let r = _mm512_movehdup_ps(a);
let e = _mm512_setr_ps(
2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_movehdup_ps() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let r = _mm512_mask_movehdup_ps(a, 0, a);
assert_eq_m512(r, a);
let r = _mm512_mask_movehdup_ps(a, 0b11111111_11111111, a);
let e = _mm512_setr_ps(
2., 2., 4., 4., 6., 6., 8., 8., 10., 10., 12., 12., 14., 14., 16., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_movehdup_ps() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let r = _mm512_maskz_movehdup_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_movehdup_ps(0b00000000_11111111, a);
let e = _mm512_setr_ps(
2., 2., 4., 4., 6., 6., 8., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_movehdup_ps() {
let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
let r = _mm256_mask_movehdup_ps(a, 0, a);
assert_eq_m256(r, a);
let r = _mm256_mask_movehdup_ps(a, 0b11111111, a);
let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_movehdup_ps() {
let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
let r = _mm256_maskz_movehdup_ps(0, a);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_movehdup_ps(0b11111111, a);
let e = _mm256_set_ps(1., 1., 3., 3., 5., 5., 7., 7.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_movehdup_ps() {
let a = _mm_set_ps(1., 2., 3., 4.);
let r = _mm_mask_movehdup_ps(a, 0, a);
assert_eq_m128(r, a);
let r = _mm_mask_movehdup_ps(a, 0b00001111, a);
let e = _mm_set_ps(1., 1., 3., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_movehdup_ps() {
let a = _mm_set_ps(1., 2., 3., 4.);
let r = _mm_maskz_movehdup_ps(0, a);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_movehdup_ps(0b00001111, a);
let e = _mm_set_ps(1., 1., 3., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_inserti32x4() {
let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm_setr_epi32(17, 18, 19, 20);
let r = _mm512_inserti32x4(a, b, 0);
let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_inserti32x4() {
let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm_setr_epi32(17, 18, 19, 20);
let r = _mm512_mask_inserti32x4(a, 0, a, b, 0);
assert_eq_m512i(r, a);
let r = _mm512_mask_inserti32x4(a, 0b11111111_11111111, a, b, 0);
let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_inserti32x4() {
let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm_setr_epi32(17, 18, 19, 20);
let r = _mm512_maskz_inserti32x4(0, a, b, 0);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_inserti32x4(0b00000000_11111111, a, b, 0);
let e = _mm512_setr_epi32(17, 18, 19, 20, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_inserti32x4() {
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_set_epi32(17, 18, 19, 20);
let r = _mm256_inserti32x4(a, b, 1);
let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_inserti32x4() {
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_set_epi32(17, 18, 19, 20);
let r = _mm256_mask_inserti32x4(a, 0, a, b, 0);
assert_eq_m256i(r, a);
let r = _mm256_mask_inserti32x4(a, 0b11111111, a, b, 1);
let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_inserti32x4() {
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm_set_epi32(17, 18, 19, 20);
let r = _mm256_maskz_inserti32x4(0, a, b, 0);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_inserti32x4(0b11111111, a, b, 1);
let e = _mm256_set_epi32(17, 18, 19, 20, 5, 6, 7, 8);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_insertf32x4() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let b = _mm_setr_ps(17., 18., 19., 20.);
let r = _mm512_insertf32x4(a, b, 0);
let e = _mm512_setr_ps(
17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_insertf32x4() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let b = _mm_setr_ps(17., 18., 19., 20.);
let r = _mm512_mask_insertf32x4(a, 0, a, b, 0);
assert_eq_m512(r, a);
let r = _mm512_mask_insertf32x4(a, 0b11111111_11111111, a, b, 0);
let e = _mm512_setr_ps(
17., 18., 19., 20., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_insertf32x4() {
let a = _mm512_setr_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let b = _mm_setr_ps(17., 18., 19., 20.);
let r = _mm512_maskz_insertf32x4(0, a, b, 0);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_insertf32x4(0b00000000_11111111, a, b, 0);
let e = _mm512_setr_ps(
17., 18., 19., 20., 5., 6., 7., 8., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_insertf32x4() {
let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
let b = _mm_set_ps(17., 18., 19., 20.);
let r = _mm256_insertf32x4(a, b, 1);
let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_insertf32x4() {
let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
let b = _mm_set_ps(17., 18., 19., 20.);
let r = _mm256_mask_insertf32x4(a, 0, a, b, 0);
assert_eq_m256(r, a);
let r = _mm256_mask_insertf32x4(a, 0b11111111, a, b, 1);
let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_insertf32x4() {
let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
let b = _mm_set_ps(17., 18., 19., 20.);
let r = _mm256_maskz_insertf32x4(0, a, b, 0);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_insertf32x4(0b11111111, a, b, 1);
let e = _mm256_set_ps(17., 18., 19., 20., 5., 6., 7., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_castps128_ps512() {
let a = _mm_setr_ps(17., 18., 19., 20.);
let r = _mm512_castps128_ps512(a);
let e = _mm512_setr_ps(
17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_castps256_ps512() {
let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
let r = _mm512_castps256_ps512(a);
let e = _mm512_setr_ps(
17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_zextps128_ps512() {
let a = _mm_setr_ps(17., 18., 19., 20.);
let r = _mm512_zextps128_ps512(a);
let e = _mm512_setr_ps(
17., 18., 19., 20., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_zextps256_ps512() {
let a = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
let r = _mm512_zextps256_ps512(a);
let e = _mm512_setr_ps(
17., 18., 19., 20., 21., 22., 23., 24., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_castps512_ps128() {
let a = _mm512_setr_ps(
17., 18., 19., 20., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1., -1.,
);
let r = _mm512_castps512_ps128(a);
let e = _mm_setr_ps(17., 18., 19., 20.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_castps512_ps256() {
let a = _mm512_setr_ps(
17., 18., 19., 20., 21., 22., 23., 24., -1., -1., -1., -1., -1., -1., -1., -1.,
);
let r = _mm512_castps512_ps256(a);
let e = _mm256_setr_ps(17., 18., 19., 20., 21., 22., 23., 24.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_castps_pd() {
let a = _mm512_set1_ps(1.);
let r = _mm512_castps_pd(a);
let e = _mm512_set1_pd(0.007812501848093234);
assert_eq_m512d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_castps_si512() {
let a = _mm512_set1_ps(1.);
let r = _mm512_castps_si512(a);
let e = _mm512_set1_epi32(1065353216);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_broadcastd_epi32() {
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm512_broadcastd_epi32(a);
let e = _mm512_set1_epi32(20);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_broadcastd_epi32() {
let src = _mm512_set1_epi32(20);
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm512_mask_broadcastd_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_broadcastd_epi32(src, 0b11111111_11111111, a);
let e = _mm512_set1_epi32(20);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_broadcastd_epi32() {
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm512_maskz_broadcastd_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_broadcastd_epi32(0b00000000_11111111, a);
let e = _mm512_setr_epi32(20, 20, 20, 20, 20, 20, 20, 20, 0, 0, 0, 0, 0, 0, 0, 0);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_broadcastd_epi32() {
let src = _mm256_set1_epi32(20);
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm256_mask_broadcastd_epi32(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm256_mask_broadcastd_epi32(src, 0b11111111, a);
let e = _mm256_set1_epi32(20);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_broadcastd_epi32() {
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm256_maskz_broadcastd_epi32(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_broadcastd_epi32(0b11111111, a);
let e = _mm256_set1_epi32(20);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_broadcastd_epi32() {
let src = _mm_set1_epi32(20);
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm_mask_broadcastd_epi32(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_broadcastd_epi32(src, 0b00001111, a);
let e = _mm_set1_epi32(20);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_broadcastd_epi32() {
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm_maskz_broadcastd_epi32(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_broadcastd_epi32(0b00001111, a);
let e = _mm_set1_epi32(20);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_broadcastss_ps() {
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm512_broadcastss_ps(a);
let e = _mm512_set1_ps(20.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_broadcastss_ps() {
let src = _mm512_set1_ps(20.);
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm512_mask_broadcastss_ps(src, 0, a);
assert_eq_m512(r, src);
let r = _mm512_mask_broadcastss_ps(src, 0b11111111_11111111, a);
let e = _mm512_set1_ps(20.);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_broadcastss_ps() {
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm512_maskz_broadcastss_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_broadcastss_ps(0b00000000_11111111, a);
let e = _mm512_setr_ps(
20., 20., 20., 20., 20., 20., 20., 20., 0., 0., 0., 0., 0., 0., 0., 0.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_broadcastss_ps() {
let src = _mm256_set1_ps(20.);
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm256_mask_broadcastss_ps(src, 0, a);
assert_eq_m256(r, src);
let r = _mm256_mask_broadcastss_ps(src, 0b11111111, a);
let e = _mm256_set1_ps(20.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_broadcastss_ps() {
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm256_maskz_broadcastss_ps(0, a);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_broadcastss_ps(0b11111111, a);
let e = _mm256_set1_ps(20.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_broadcastss_ps() {
let src = _mm_set1_ps(20.);
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm_mask_broadcastss_ps(src, 0, a);
assert_eq_m128(r, src);
let r = _mm_mask_broadcastss_ps(src, 0b00001111, a);
let e = _mm_set1_ps(20.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_broadcastss_ps() {
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm_maskz_broadcastss_ps(0, a);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_broadcastss_ps(0b00001111, a);
let e = _mm_set1_ps(20.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_broadcast_i32x4() {
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm512_broadcast_i32x4(a);
let e = _mm512_set_epi32(
17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_broadcast_i32x4() {
let src = _mm512_set1_epi32(20);
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm512_mask_broadcast_i32x4(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_broadcast_i32x4(src, 0b11111111_11111111, a);
let e = _mm512_set_epi32(
17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20, 17, 18, 19, 20,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_broadcast_i32x4() {
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm512_maskz_broadcast_i32x4(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_broadcast_i32x4(0b00000000_11111111, a);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 17, 18, 19, 20, 17, 18, 19, 20);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_broadcast_i32x4() {
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm256_broadcast_i32x4(a);
let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_broadcast_i32x4() {
let src = _mm256_set1_epi32(20);
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm256_mask_broadcast_i32x4(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm256_mask_broadcast_i32x4(src, 0b11111111, a);
let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_broadcast_i32x4() {
let a = _mm_set_epi32(17, 18, 19, 20);
let r = _mm256_maskz_broadcast_i32x4(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_broadcast_i32x4(0b11111111, a);
let e = _mm256_set_epi32(17, 18, 19, 20, 17, 18, 19, 20);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_broadcast_f32x4() {
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm512_broadcast_f32x4(a);
let e = _mm512_set_ps(
17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_broadcast_f32x4() {
let src = _mm512_set1_ps(20.);
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm512_mask_broadcast_f32x4(src, 0, a);
assert_eq_m512(r, src);
let r = _mm512_mask_broadcast_f32x4(src, 0b11111111_11111111, a);
let e = _mm512_set_ps(
17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20., 17., 18., 19., 20.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_broadcast_f32x4() {
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm512_maskz_broadcast_f32x4(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_broadcast_f32x4(0b00000000_11111111, a);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 17., 18., 19., 20., 17., 18., 19., 20.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_broadcast_f32x4() {
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm256_broadcast_f32x4(a);
let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_broadcast_f32x4() {
let src = _mm256_set1_ps(20.);
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm256_mask_broadcast_f32x4(src, 0, a);
assert_eq_m256(r, src);
let r = _mm256_mask_broadcast_f32x4(src, 0b11111111, a);
let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_broadcast_f32x4() {
let a = _mm_set_ps(17., 18., 19., 20.);
let r = _mm256_maskz_broadcast_f32x4(0, a);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_broadcast_f32x4(0b11111111, a);
let e = _mm256_set_ps(17., 18., 19., 20., 17., 18., 19., 20.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_blend_epi32() {
let a = _mm512_set1_epi32(1);
let b = _mm512_set1_epi32(2);
let r = _mm512_mask_blend_epi32(0b11111111_00000000, a, b);
let e = _mm512_set_epi32(2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1, 1);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_blend_epi32() {
let a = _mm256_set1_epi32(1);
let b = _mm256_set1_epi32(2);
let r = _mm256_mask_blend_epi32(0b11111111, a, b);
let e = _mm256_set1_epi32(2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_blend_epi32() {
let a = _mm_set1_epi32(1);
let b = _mm_set1_epi32(2);
let r = _mm_mask_blend_epi32(0b00001111, a, b);
let e = _mm_set1_epi32(2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_blend_ps() {
let a = _mm512_set1_ps(1.);
let b = _mm512_set1_ps(2.);
let r = _mm512_mask_blend_ps(0b11111111_00000000, a, b);
let e = _mm512_set_ps(
2., 2., 2., 2., 2., 2., 2., 2., 1., 1., 1., 1., 1., 1., 1., 1.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_blend_ps() {
let a = _mm256_set1_ps(1.);
let b = _mm256_set1_ps(2.);
let r = _mm256_mask_blend_ps(0b11111111, a, b);
let e = _mm256_set1_ps(2.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_blend_ps() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let r = _mm_mask_blend_ps(0b00001111, a, b);
let e = _mm_set1_ps(2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_unpackhi_epi32() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm512_set_epi32(
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
);
let r = _mm512_unpackhi_epi32(a, b);
let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_unpackhi_epi32() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm512_set_epi32(
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
);
let r = _mm512_mask_unpackhi_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_unpackhi_epi32(a, 0b11111111_11111111, a, b);
let e = _mm512_set_epi32(17, 1, 18, 2, 21, 5, 22, 6, 25, 9, 26, 10, 29, 13, 30, 14);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_unpackhi_epi32() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm512_set_epi32(
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
);
let r = _mm512_maskz_unpackhi_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_unpackhi_epi32(0b00000000_11111111, a, b);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 9, 26, 10, 29, 13, 30, 14);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_unpackhi_epi32() {
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
let r = _mm256_mask_unpackhi_epi32(a, 0, a, b);
assert_eq_m256i(r, a);
let r = _mm256_mask_unpackhi_epi32(a, 0b11111111, a, b);
let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_unpackhi_epi32() {
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
let r = _mm256_maskz_unpackhi_epi32(0, a, b);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_unpackhi_epi32(0b11111111, a, b);
let e = _mm256_set_epi32(17, 1, 18, 2, 21, 5, 22, 6);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_unpackhi_epi32() {
let a = _mm_set_epi32(1, 2, 3, 4);
let b = _mm_set_epi32(17, 18, 19, 20);
let r = _mm_mask_unpackhi_epi32(a, 0, a, b);
assert_eq_m128i(r, a);
let r = _mm_mask_unpackhi_epi32(a, 0b00001111, a, b);
let e = _mm_set_epi32(17, 1, 18, 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_unpackhi_epi32() {
let a = _mm_set_epi32(1, 2, 3, 4);
let b = _mm_set_epi32(17, 18, 19, 20);
let r = _mm_maskz_unpackhi_epi32(0, a, b);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_unpackhi_epi32(0b00001111, a, b);
let e = _mm_set_epi32(17, 1, 18, 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_unpackhi_ps() {
let a = _mm512_set_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let b = _mm512_set_ps(
17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
);
let r = _mm512_unpackhi_ps(a, b);
let e = _mm512_set_ps(
17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_unpackhi_ps() {
let a = _mm512_set_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let b = _mm512_set_ps(
17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
);
let r = _mm512_mask_unpackhi_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_unpackhi_ps(a, 0b11111111_11111111, a, b);
let e = _mm512_set_ps(
17., 1., 18., 2., 21., 5., 22., 6., 25., 9., 26., 10., 29., 13., 30., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_unpackhi_ps() {
let a = _mm512_set_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let b = _mm512_set_ps(
17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
);
let r = _mm512_maskz_unpackhi_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_unpackhi_ps(0b00000000_11111111, a, b);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 25., 9., 26., 10., 29., 13., 30., 14.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_unpackhi_ps() {
let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
let r = _mm256_mask_unpackhi_ps(a, 0, a, b);
assert_eq_m256(r, a);
let r = _mm256_mask_unpackhi_ps(a, 0b11111111, a, b);
let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_unpackhi_ps() {
let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
let r = _mm256_maskz_unpackhi_ps(0, a, b);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_unpackhi_ps(0b11111111, a, b);
let e = _mm256_set_ps(17., 1., 18., 2., 21., 5., 22., 6.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_unpackhi_ps() {
let a = _mm_set_ps(1., 2., 3., 4.);
let b = _mm_set_ps(17., 18., 19., 20.);
let r = _mm_mask_unpackhi_ps(a, 0, a, b);
assert_eq_m128(r, a);
let r = _mm_mask_unpackhi_ps(a, 0b00001111, a, b);
let e = _mm_set_ps(17., 1., 18., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_unpackhi_ps() {
let a = _mm_set_ps(1., 2., 3., 4.);
let b = _mm_set_ps(17., 18., 19., 20.);
let r = _mm_maskz_unpackhi_ps(0, a, b);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_unpackhi_ps(0b00001111, a, b);
let e = _mm_set_ps(17., 1., 18., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_unpacklo_epi32() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm512_set_epi32(
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
);
let r = _mm512_unpacklo_epi32(a, b);
let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_unpacklo_epi32() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm512_set_epi32(
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
);
let r = _mm512_mask_unpacklo_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_unpacklo_epi32(a, 0b11111111_11111111, a, b);
let e = _mm512_set_epi32(19, 3, 20, 4, 23, 7, 24, 8, 27, 11, 28, 12, 31, 15, 32, 16);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_unpacklo_epi32() {
let a = _mm512_set_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let b = _mm512_set_epi32(
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32,
);
let r = _mm512_maskz_unpacklo_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_unpacklo_epi32(0b00000000_11111111, a, b);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 27, 11, 28, 12, 31, 15, 32, 16);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_unpacklo_epi32() {
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
let r = _mm256_mask_unpacklo_epi32(a, 0, a, b);
assert_eq_m256i(r, a);
let r = _mm256_mask_unpacklo_epi32(a, 0b11111111, a, b);
let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_unpacklo_epi32() {
let a = _mm256_set_epi32(1, 2, 3, 4, 5, 6, 7, 8);
let b = _mm256_set_epi32(17, 18, 19, 20, 21, 22, 23, 24);
let r = _mm256_maskz_unpacklo_epi32(0, a, b);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_unpacklo_epi32(0b11111111, a, b);
let e = _mm256_set_epi32(19, 3, 20, 4, 23, 7, 24, 8);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_unpacklo_epi32() {
let a = _mm_set_epi32(1, 2, 3, 4);
let b = _mm_set_epi32(17, 18, 19, 20);
let r = _mm_mask_unpacklo_epi32(a, 0, a, b);
assert_eq_m128i(r, a);
let r = _mm_mask_unpacklo_epi32(a, 0b00001111, a, b);
let e = _mm_set_epi32(19, 3, 20, 4);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_unpacklo_epi32() {
let a = _mm_set_epi32(1, 2, 3, 4);
let b = _mm_set_epi32(17, 18, 19, 20);
let r = _mm_maskz_unpacklo_epi32(0, a, b);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_unpacklo_epi32(0b00001111, a, b);
let e = _mm_set_epi32(19, 3, 20, 4);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_unpacklo_ps() {
let a = _mm512_set_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let b = _mm512_set_ps(
17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
);
let r = _mm512_unpacklo_ps(a, b);
let e = _mm512_set_ps(
19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_unpacklo_ps() {
let a = _mm512_set_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let b = _mm512_set_ps(
17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
);
let r = _mm512_mask_unpacklo_ps(a, 0, a, b);
assert_eq_m512(r, a);
let r = _mm512_mask_unpacklo_ps(a, 0b11111111_11111111, a, b);
let e = _mm512_set_ps(
19., 3., 20., 4., 23., 7., 24., 8., 27., 11., 28., 12., 31., 15., 32., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_unpacklo_ps() {
let a = _mm512_set_ps(
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16.,
);
let b = _mm512_set_ps(
17., 18., 19., 20., 21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32.,
);
let r = _mm512_maskz_unpacklo_ps(0, a, b);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_unpacklo_ps(0b00000000_11111111, a, b);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 27., 11., 28., 12., 31., 15., 32., 16.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_unpacklo_ps() {
let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
let r = _mm256_mask_unpacklo_ps(a, 0, a, b);
assert_eq_m256(r, a);
let r = _mm256_mask_unpacklo_ps(a, 0b11111111, a, b);
let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_unpacklo_ps() {
let a = _mm256_set_ps(1., 2., 3., 4., 5., 6., 7., 8.);
let b = _mm256_set_ps(17., 18., 19., 20., 21., 22., 23., 24.);
let r = _mm256_maskz_unpacklo_ps(0, a, b);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_unpacklo_ps(0b11111111, a, b);
let e = _mm256_set_ps(19., 3., 20., 4., 23., 7., 24., 8.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_unpacklo_ps() {
let a = _mm_set_ps(1., 2., 3., 4.);
let b = _mm_set_ps(17., 18., 19., 20.);
let r = _mm_mask_unpacklo_ps(a, 0, a, b);
assert_eq_m128(r, a);
let r = _mm_mask_unpacklo_ps(a, 0b00001111, a, b);
let e = _mm_set_ps(19., 3., 20., 4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_unpacklo_ps() {
let a = _mm_set_ps(1., 2., 3., 4.);
let b = _mm_set_ps(17., 18., 19., 20.);
let r = _mm_maskz_unpacklo_ps(0, a, b);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_unpacklo_ps(0b00001111, a, b);
let e = _mm_set_ps(19., 3., 20., 4.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_alignr_epi32() {
let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
let b = _mm512_set_epi32(
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
);
let r = _mm512_alignr_epi32(a, b, 0);
assert_eq_m512i(r, b);
let r = _mm512_alignr_epi32(a, b, 16);
assert_eq_m512i(r, b);
let r = _mm512_alignr_epi32(a, b, 1);
let e = _mm512_set_epi32(
1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_alignr_epi32() {
let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
let b = _mm512_set_epi32(
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
);
let r = _mm512_mask_alignr_epi32(a, 0, a, b, 1);
assert_eq_m512i(r, a);
let r = _mm512_mask_alignr_epi32(a, 0b11111111_11111111, a, b, 1);
let e = _mm512_set_epi32(
1, 32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_alignr_epi32() {
let a = _mm512_set_epi32(16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1);
let b = _mm512_set_epi32(
32, 31, 30, 29, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 18, 17,
);
let r = _mm512_maskz_alignr_epi32(0, a, b, 1);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_alignr_epi32(0b00000000_11111111, a, b, 1);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 25, 24, 23, 22, 21, 20, 19, 18);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_alignr_epi32() {
let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
let r = _mm256_alignr_epi32(a, b, 0);
assert_eq_m256i(r, b);
let r = _mm256_alignr_epi32(a, b, 1);
let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_alignr_epi32() {
let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
let r = _mm256_mask_alignr_epi32(a, 0, a, b, 1);
assert_eq_m256i(r, a);
let r = _mm256_mask_alignr_epi32(a, 0b11111111, a, b, 1);
let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_alignr_epi32() {
let a = _mm256_set_epi32(8, 7, 6, 5, 4, 3, 2, 1);
let b = _mm256_set_epi32(16, 15, 14, 13, 12, 11, 10, 9);
let r = _mm256_maskz_alignr_epi32(0, a, b, 1);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_alignr_epi32(0b11111111, a, b, 1);
let e = _mm256_set_epi32(1, 16, 15, 14, 13, 12, 11, 10);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_alignr_epi32() {
let a = _mm_set_epi32(4, 3, 2, 1);
let b = _mm_set_epi32(8, 7, 6, 5);
let r = _mm_alignr_epi32(a, b, 0);
assert_eq_m128i(r, b);
let r = _mm_alignr_epi32(a, b, 1);
let e = _mm_set_epi32(1, 8, 7, 6);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_alignr_epi32() {
let a = _mm_set_epi32(4, 3, 2, 1);
let b = _mm_set_epi32(8, 7, 6, 5);
let r = _mm_mask_alignr_epi32(a, 0, a, b, 1);
assert_eq_m128i(r, a);
let r = _mm_mask_alignr_epi32(a, 0b00001111, a, b, 1);
let e = _mm_set_epi32(1, 8, 7, 6);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_alignr_epi32() {
let a = _mm_set_epi32(4, 3, 2, 1);
let b = _mm_set_epi32(8, 7, 6, 5);
let r = _mm_maskz_alignr_epi32(0, a, b, 1);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_alignr_epi32(0b00001111, a, b, 1);
let e = _mm_set_epi32(1, 8, 7, 6);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_and_epi32() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
1 << 1 | 1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 3,
);
#[rustfmt::skip]
let b = _mm512_set_epi32(
1 << 1, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 3 | 1 << 4,
);
let r = _mm512_and_epi32(a, b);
let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_and_epi32() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
1 << 1 | 1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 3,
);
#[rustfmt::skip]
let b = _mm512_set_epi32(
1 << 1, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 3 | 1 << 4,
);
let r = _mm512_mask_and_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_and_epi32(a, 0b01111111_11111111, a, b);
#[rustfmt::skip]
let e = _mm512_set_epi32(
1 << 1 | 1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 3,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_and_epi32() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
1 << 1 | 1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 3,
);
#[rustfmt::skip]
let b = _mm512_set_epi32(
1 << 1, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 3 | 1 << 4,
);
let r = _mm512_maskz_and_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_and_epi32(0b00000000_11111111, a, b);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_and_epi32() {
let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
let b = _mm256_set1_epi32(1 << 1);
let r = _mm256_mask_and_epi32(a, 0, a, b);
assert_eq_m256i(r, a);
let r = _mm256_mask_and_epi32(a, 0b11111111, a, b);
let e = _mm256_set1_epi32(1 << 1);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_and_epi32() {
let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
let b = _mm256_set1_epi32(1 << 1);
let r = _mm256_maskz_and_epi32(0, a, b);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_and_epi32(0b11111111, a, b);
let e = _mm256_set1_epi32(1 << 1);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_and_epi32() {
let a = _mm_set1_epi32(1 << 1 | 1 << 2);
let b = _mm_set1_epi32(1 << 1);
let r = _mm_mask_and_epi32(a, 0, a, b);
assert_eq_m128i(r, a);
let r = _mm_mask_and_epi32(a, 0b00001111, a, b);
let e = _mm_set1_epi32(1 << 1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_and_epi32() {
let a = _mm_set1_epi32(1 << 1 | 1 << 2);
let b = _mm_set1_epi32(1 << 1);
let r = _mm_maskz_and_epi32(0, a, b);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_and_epi32(0b00001111, a, b);
let e = _mm_set1_epi32(1 << 1);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_and_si512() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
1 << 1 | 1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 3,
);
#[rustfmt::skip]
let b = _mm512_set_epi32(
1 << 1, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 3 | 1 << 4,
);
let r = _mm512_and_epi32(a, b);
let e = _mm512_set_epi32(1 << 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 3);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_or_epi32() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
1 << 1 | 1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 3,
);
#[rustfmt::skip]
let b = _mm512_set_epi32(
1 << 1, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 3 | 1 << 4,
);
let r = _mm512_or_epi32(a, b);
#[rustfmt::skip]
let e = _mm512_set_epi32(
1 << 1 | 1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_or_epi32() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
1 << 1 | 1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 3,
);
#[rustfmt::skip]
let b = _mm512_set_epi32(
1 << 1, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 3 | 1 << 4,
);
let r = _mm512_mask_or_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_or_epi32(a, 0b11111111_11111111, a, b);
#[rustfmt::skip]
let e = _mm512_set_epi32(
1 << 1 | 1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_or_epi32() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
1 << 1 | 1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 3,
);
#[rustfmt::skip]
let b = _mm512_set_epi32(
1 << 1, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 3 | 1 << 4,
);
let r = _mm512_maskz_or_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_or_epi32(0b00000000_11111111, a, b);
#[rustfmt::skip]
let e = _mm512_set_epi32(
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_or_epi32() {
let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
let b = _mm256_set1_epi32(1 << 1);
let r = _mm256_or_epi32(a, b);
let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_or_epi32() {
let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
let b = _mm256_set1_epi32(1 << 1);
let r = _mm256_mask_or_epi32(a, 0, a, b);
assert_eq_m256i(r, a);
let r = _mm256_mask_or_epi32(a, 0b11111111, a, b);
let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_or_epi32() {
let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
let b = _mm256_set1_epi32(1 << 1);
let r = _mm256_maskz_or_epi32(0, a, b);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_or_epi32(0b11111111, a, b);
let e = _mm256_set1_epi32(1 << 1 | 1 << 2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_or_epi32() {
let a = _mm_set1_epi32(1 << 1 | 1 << 2);
let b = _mm_set1_epi32(1 << 1);
let r = _mm_or_epi32(a, b);
let e = _mm_set1_epi32(1 << 1 | 1 << 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_or_epi32() {
let a = _mm_set1_epi32(1 << 1 | 1 << 2);
let b = _mm_set1_epi32(1 << 1);
let r = _mm_mask_or_epi32(a, 0, a, b);
assert_eq_m128i(r, a);
let r = _mm_mask_or_epi32(a, 0b00001111, a, b);
let e = _mm_set1_epi32(1 << 1 | 1 << 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_or_epi32() {
let a = _mm_set1_epi32(1 << 1 | 1 << 2);
let b = _mm_set1_epi32(1 << 1);
let r = _mm_maskz_or_epi32(0, a, b);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_or_epi32(0b00001111, a, b);
let e = _mm_set1_epi32(1 << 1 | 1 << 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_or_si512() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
1 << 1 | 1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 3,
);
#[rustfmt::skip]
let b = _mm512_set_epi32(
1 << 1, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 3 | 1 << 4,
);
let r = _mm512_or_epi32(a, b);
#[rustfmt::skip]
let e = _mm512_set_epi32(
1 << 1 | 1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 3 | 1 << 4,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_xor_epi32() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
1 << 1 | 1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 3,
);
#[rustfmt::skip]
let b = _mm512_set_epi32(
1 << 1, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 3 | 1 << 4,
);
let r = _mm512_xor_epi32(a, b);
#[rustfmt::skip]
let e = _mm512_set_epi32(
1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 4,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_xor_epi32() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
1 << 1 | 1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 3,
);
#[rustfmt::skip]
let b = _mm512_set_epi32(
1 << 1, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 3 | 1 << 4,
);
let r = _mm512_mask_xor_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_xor_epi32(a, 0b01111111_11111111, a, b);
#[rustfmt::skip]
let e = _mm512_set_epi32(
1 << 1 | 1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 4,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_xor_epi32() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
1 << 1 | 1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 3,
);
#[rustfmt::skip]
let b = _mm512_set_epi32(
1 << 1, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 3 | 1 << 4,
);
let r = _mm512_maskz_xor_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_xor_epi32(0b00000000_11111111, a, b);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 << 1 | 1 << 4);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_xor_epi32() {
let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
let b = _mm256_set1_epi32(1 << 1);
let r = _mm256_xor_epi32(a, b);
let e = _mm256_set1_epi32(1 << 2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_xor_epi32() {
let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
let b = _mm256_set1_epi32(1 << 1);
let r = _mm256_mask_xor_epi32(a, 0, a, b);
assert_eq_m256i(r, a);
let r = _mm256_mask_xor_epi32(a, 0b11111111, a, b);
let e = _mm256_set1_epi32(1 << 2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_xor_epi32() {
let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
let b = _mm256_set1_epi32(1 << 1);
let r = _mm256_maskz_xor_epi32(0, a, b);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_xor_epi32(0b11111111, a, b);
let e = _mm256_set1_epi32(1 << 2);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_xor_epi32() {
let a = _mm_set1_epi32(1 << 1 | 1 << 2);
let b = _mm_set1_epi32(1 << 1);
let r = _mm_xor_epi32(a, b);
let e = _mm_set1_epi32(1 << 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_xor_epi32() {
let a = _mm_set1_epi32(1 << 1 | 1 << 2);
let b = _mm_set1_epi32(1 << 1);
let r = _mm_mask_xor_epi32(a, 0, a, b);
assert_eq_m128i(r, a);
let r = _mm_mask_xor_epi32(a, 0b00001111, a, b);
let e = _mm_set1_epi32(1 << 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_xor_epi32() {
let a = _mm_set1_epi32(1 << 1 | 1 << 2);
let b = _mm_set1_epi32(1 << 1);
let r = _mm_maskz_xor_epi32(0, a, b);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_xor_epi32(0b00001111, a, b);
let e = _mm_set1_epi32(1 << 2);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_xor_si512() {
#[rustfmt::skip]
let a = _mm512_set_epi32(
1 << 1 | 1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 3,
);
#[rustfmt::skip]
let b = _mm512_set_epi32(
1 << 1, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 3 | 1 << 4,
);
let r = _mm512_xor_epi32(a, b);
#[rustfmt::skip]
let e = _mm512_set_epi32(
1 << 2, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 1 << 1 | 1 << 4,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_andnot_epi32() {
let a = _mm512_set1_epi32(0);
let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
let r = _mm512_andnot_epi32(a, b);
let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_andnot_epi32() {
let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
let r = _mm512_mask_andnot_epi32(a, 0, a, b);
assert_eq_m512i(r, a);
let r = _mm512_mask_andnot_epi32(a, 0b11111111_11111111, a, b);
let e = _mm512_set1_epi32(1 << 3 | 1 << 4);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_andnot_epi32() {
let a = _mm512_set1_epi32(1 << 1 | 1 << 2);
let b = _mm512_set1_epi32(1 << 3 | 1 << 4);
let r = _mm512_maskz_andnot_epi32(0, a, b);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_andnot_epi32(0b00000000_11111111, a, b);
#[rustfmt::skip]
let e = _mm512_set_epi32(
0, 0, 0, 0,
0, 0, 0, 0,
1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4, 1 << 3 | 1 << 4,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_andnot_epi32() {
let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
let r = _mm256_mask_andnot_epi32(a, 0, a, b);
assert_eq_m256i(r, a);
let r = _mm256_mask_andnot_epi32(a, 0b11111111, a, b);
let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_andnot_epi32() {
let a = _mm256_set1_epi32(1 << 1 | 1 << 2);
let b = _mm256_set1_epi32(1 << 3 | 1 << 4);
let r = _mm256_maskz_andnot_epi32(0, a, b);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_andnot_epi32(0b11111111, a, b);
let e = _mm256_set1_epi32(1 << 3 | 1 << 4);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_andnot_epi32() {
let a = _mm_set1_epi32(1 << 1 | 1 << 2);
let b = _mm_set1_epi32(1 << 3 | 1 << 4);
let r = _mm_mask_andnot_epi32(a, 0, a, b);
assert_eq_m128i(r, a);
let r = _mm_mask_andnot_epi32(a, 0b00001111, a, b);
let e = _mm_set1_epi32(1 << 3 | 1 << 4);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_andnot_epi32() {
let a = _mm_set1_epi32(1 << 1 | 1 << 2);
let b = _mm_set1_epi32(1 << 3 | 1 << 4);
let r = _mm_maskz_andnot_epi32(0, a, b);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_andnot_epi32(0b00001111, a, b);
let e = _mm_set1_epi32(1 << 3 | 1 << 4);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_kand() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b11001100_00110011;
let r = _mm512_kand(a, b);
let e: u16 = 0b11001100_00110011;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_kand_mask16() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b11001100_00110011;
let r = _kand_mask16(a, b);
let e: u16 = 0b11001100_00110011;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_kor() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _mm512_kor(a, b);
let e: u16 = 0b11101110_00111011;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_kor_mask16() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _kor_mask16(a, b);
let e: u16 = 0b11101110_00111011;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_kxor() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _mm512_kxor(a, b);
let e: u16 = 0b11100010_00111000;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_kxor_mask16() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _kxor_mask16(a, b);
let e: u16 = 0b11100010_00111000;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_knot() {
let a: u16 = 0b11001100_00110011;
let r = _mm512_knot(a);
let e: u16 = 0b00110011_11001100;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_knot_mask16() {
let a: u16 = 0b11001100_00110011;
let r = _knot_mask16(a);
let e: u16 = 0b00110011_11001100;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_kandn() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _mm512_kandn(a, b);
let e: u16 = 0b00100010_00001000;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_kandn_mask16() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _kandn_mask16(a, b);
let e: u16 = 0b00100010_00001000;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_kxnor() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _mm512_kxnor(a, b);
let e: u16 = 0b00011101_11000111;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_kxnor_mask16() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _kxnor_mask16(a, b);
let e: u16 = 0b00011101_11000111;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_kmov() {
let a: u16 = 0b11001100_00110011;
let r = _mm512_kmov(a);
let e: u16 = 0b11001100_00110011;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_int2mask() {
let a: i32 = 0b11001100_00110011;
let r = _mm512_int2mask(a);
let e: u16 = 0b11001100_00110011;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask2int() {
let k1: __mmask16 = 0b11001100_00110011;
let r = _mm512_mask2int(k1);
let e: i32 = 0b11001100_00110011;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_kunpackb() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _mm512_kunpackb(a, b);
let e: u16 = 0b00101110_00110011;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_kortestc() {
let a: u16 = 0b11001100_00110011;
let b: u16 = 0b00101110_00001011;
let r = _mm512_kortestc(a, b);
assert_eq!(r, 0);
let b: u16 = 0b11111111_11111111;
let r = _mm512_kortestc(a, b);
assert_eq!(r, 1);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_test_epi32_mask() {
let a = _mm512_set1_epi32(1 << 0);
let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
let r = _mm512_test_epi32_mask(a, b);
let e: __mmask16 = 0b11111111_11111111;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_test_epi32_mask() {
let a = _mm512_set1_epi32(1 << 0);
let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
let r = _mm512_mask_test_epi32_mask(0, a, b);
assert_eq!(r, 0);
let r = _mm512_mask_test_epi32_mask(0b11111111_11111111, a, b);
let e: __mmask16 = 0b11111111_11111111;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_test_epi32_mask() {
let a = _mm256_set1_epi32(1 << 0);
let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
let r = _mm256_test_epi32_mask(a, b);
let e: __mmask8 = 0b11111111;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_test_epi32_mask() {
let a = _mm256_set1_epi32(1 << 0);
let b = _mm256_set1_epi32(1 << 0 | 1 << 1);
let r = _mm256_mask_test_epi32_mask(0, a, b);
assert_eq!(r, 0);
let r = _mm256_mask_test_epi32_mask(0b11111111, a, b);
let e: __mmask8 = 0b11111111;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_test_epi32_mask() {
let a = _mm_set1_epi32(1 << 0);
let b = _mm_set1_epi32(1 << 0 | 1 << 1);
let r = _mm_test_epi32_mask(a, b);
let e: __mmask8 = 0b00001111;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_test_epi32_mask() {
let a = _mm_set1_epi32(1 << 0);
let b = _mm_set1_epi32(1 << 0 | 1 << 1);
let r = _mm_mask_test_epi32_mask(0, a, b);
assert_eq!(r, 0);
let r = _mm_mask_test_epi32_mask(0b11111111, a, b);
let e: __mmask8 = 0b00001111;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_testn_epi32_mask() {
let a = _mm512_set1_epi32(1 << 0);
let b = _mm512_set1_epi32(1 << 0 | 1 << 1);
let r = _mm512_testn_epi32_mask(a, b);
let e: __mmask16 = 0b00000000_00000000;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_testn_epi32_mask() {
let a = _mm512_set1_epi32(1 << 0);
let b = _mm512_set1_epi32(1 << 1);
let r = _mm512_mask_test_epi32_mask(0, a, b);
assert_eq!(r, 0);
let r = _mm512_mask_testn_epi32_mask(0b11111111_11111111, a, b);
let e: __mmask16 = 0b11111111_11111111;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_testn_epi32_mask() {
let a = _mm256_set1_epi32(1 << 0);
let b = _mm256_set1_epi32(1 << 1);
let r = _mm256_testn_epi32_mask(a, b);
let e: __mmask8 = 0b11111111;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_testn_epi32_mask() {
let a = _mm256_set1_epi32(1 << 0);
let b = _mm256_set1_epi32(1 << 1);
let r = _mm256_mask_test_epi32_mask(0, a, b);
assert_eq!(r, 0);
let r = _mm256_mask_testn_epi32_mask(0b11111111, a, b);
let e: __mmask8 = 0b11111111;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_testn_epi32_mask() {
let a = _mm_set1_epi32(1 << 0);
let b = _mm_set1_epi32(1 << 1);
let r = _mm_testn_epi32_mask(a, b);
let e: __mmask8 = 0b00001111;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_testn_epi32_mask() {
let a = _mm_set1_epi32(1 << 0);
let b = _mm_set1_epi32(1 << 1);
let r = _mm_mask_test_epi32_mask(0, a, b);
assert_eq!(r, 0);
let r = _mm_mask_testn_epi32_mask(0b11111111, a, b);
let e: __mmask8 = 0b00001111;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_stream_ps() {
#[repr(align(32))]
struct Memory {
pub data: [f32; 16],
}
let a = _mm512_set1_ps(7.0);
let mut mem = Memory { data: [-1.0; 16] };
_mm512_stream_ps(&mut mem.data[0] as *mut f32, a);
for i in 0..16 {
assert_eq!(mem.data[i], get_m512(a, i));
}
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_add_epi32() {
let a = _mm512_set1_epi32(1);
let e: i32 = _mm512_reduce_add_epi32(a);
assert_eq!(16, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_add_epi32() {
let a = _mm512_set1_epi32(1);
let e: i32 = _mm512_mask_reduce_add_epi32(0b11111111_00000000, a);
assert_eq!(8, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_add_ps() {
let a = _mm512_set1_ps(1.);
let e: f32 = _mm512_reduce_add_ps(a);
assert_eq!(16., e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_add_ps() {
let a = _mm512_set1_ps(1.);
let e: f32 = _mm512_mask_reduce_add_ps(0b11111111_00000000, a);
assert_eq!(8., e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_mul_epi32() {
let a = _mm512_set1_epi32(2);
let e: i32 = _mm512_reduce_mul_epi32(a);
assert_eq!(65536, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_mul_epi32() {
let a = _mm512_set1_epi32(2);
let e: i32 = _mm512_mask_reduce_mul_epi32(0b11111111_00000000, a);
assert_eq!(256, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_mul_ps() {
let a = _mm512_set1_ps(2.);
let e: f32 = _mm512_reduce_mul_ps(a);
assert_eq!(65536., e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_mul_ps() {
let a = _mm512_set1_ps(2.);
let e: f32 = _mm512_mask_reduce_mul_ps(0b11111111_00000000, a);
assert_eq!(256., e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_max_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e: i32 = _mm512_reduce_max_epi32(a);
assert_eq!(15, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_max_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e: i32 = _mm512_mask_reduce_max_epi32(0b11111111_00000000, a);
assert_eq!(7, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_max_epu32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e: u32 = _mm512_reduce_max_epu32(a);
assert_eq!(15, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_max_epu32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e: u32 = _mm512_mask_reduce_max_epu32(0b11111111_00000000, a);
assert_eq!(7, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_max_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let e: f32 = _mm512_reduce_max_ps(a);
assert_eq!(15., e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_max_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let e: f32 = _mm512_mask_reduce_max_ps(0b11111111_00000000, a);
assert_eq!(7., e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_min_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e: i32 = _mm512_reduce_min_epi32(a);
assert_eq!(0, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_min_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e: i32 = _mm512_mask_reduce_min_epi32(0b11111111_00000000, a);
assert_eq!(0, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_min_epu32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e: u32 = _mm512_reduce_min_epu32(a);
assert_eq!(0, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_min_epu32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let e: u32 = _mm512_mask_reduce_min_epu32(0b11111111_00000000, a);
assert_eq!(0, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_min_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let e: f32 = _mm512_reduce_min_ps(a);
assert_eq!(0., e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_min_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let e: f32 = _mm512_mask_reduce_min_ps(0b11111111_00000000, a);
assert_eq!(0., e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_and_epi32() {
let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
let e: i32 = _mm512_reduce_and_epi32(a);
assert_eq!(0, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_and_epi32() {
let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
assert_eq!(1, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_reduce_or_epi32() {
let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
let e: i32 = _mm512_reduce_or_epi32(a);
assert_eq!(3, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_reduce_or_epi32() {
let a = _mm512_set_epi32(1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2);
let e: i32 = _mm512_mask_reduce_and_epi32(0b11111111_00000000, a);
assert_eq!(1, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_compress_epi32() {
let src = _mm512_set1_epi32(200);
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_mask_compress_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_compress_epi32(src, 0b01010101_01010101, a);
let e = _mm512_set_epi32(
200, 200, 200, 200, 200, 200, 200, 200, 1, 3, 5, 7, 9, 11, 13, 15,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_compress_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_compress_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_compress_epi32(0b01010101_01010101, a);
let e = _mm512_set_epi32(0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 5, 7, 9, 11, 13, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_compress_epi32() {
let src = _mm256_set1_epi32(200);
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let r = _mm256_mask_compress_epi32(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm256_mask_compress_epi32(src, 0b01010101, a);
let e = _mm256_set_epi32(200, 200, 200, 200, 1, 3, 5, 7);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_compress_epi32() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let r = _mm256_maskz_compress_epi32(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_compress_epi32(0b01010101, a);
let e = _mm256_set_epi32(0, 0, 0, 0, 1, 3, 5, 7);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_compress_epi32() {
let src = _mm_set1_epi32(200);
let a = _mm_set_epi32(0, 1, 2, 3);
let r = _mm_mask_compress_epi32(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_compress_epi32(src, 0b00000101, a);
let e = _mm_set_epi32(200, 200, 1, 3);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_compress_epi32() {
let a = _mm_set_epi32(0, 1, 2, 3);
let r = _mm_maskz_compress_epi32(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_compress_epi32(0b00000101, a);
let e = _mm_set_epi32(0, 0, 1, 3);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_compress_ps() {
let src = _mm512_set1_ps(200.);
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_mask_compress_ps(src, 0, a);
assert_eq_m512(r, src);
let r = _mm512_mask_compress_ps(src, 0b01010101_01010101, a);
let e = _mm512_set_ps(
200., 200., 200., 200., 200., 200., 200., 200., 1., 3., 5., 7., 9., 11., 13., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_compress_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_maskz_compress_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_compress_ps(0b01010101_01010101, a);
let e = _mm512_set_ps(
0., 0., 0., 0., 0., 0., 0., 0., 1., 3., 5., 7., 9., 11., 13., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_compress_ps() {
let src = _mm256_set1_ps(200.);
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let r = _mm256_mask_compress_ps(src, 0, a);
assert_eq_m256(r, src);
let r = _mm256_mask_compress_ps(src, 0b01010101, a);
let e = _mm256_set_ps(200., 200., 200., 200., 1., 3., 5., 7.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_compress_ps() {
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let r = _mm256_maskz_compress_ps(0, a);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_compress_ps(0b01010101, a);
let e = _mm256_set_ps(0., 0., 0., 0., 1., 3., 5., 7.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_compress_ps() {
let src = _mm_set1_ps(200.);
let a = _mm_set_ps(0., 1., 2., 3.);
let r = _mm_mask_compress_ps(src, 0, a);
assert_eq_m128(r, src);
let r = _mm_mask_compress_ps(src, 0b00000101, a);
let e = _mm_set_ps(200., 200., 1., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_compress_ps() {
let a = _mm_set_ps(0., 1., 2., 3.);
let r = _mm_maskz_compress_ps(0, a);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_compress_ps(0b00000101, a);
let e = _mm_set_ps(0., 0., 1., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_expand_epi32() {
let src = _mm512_set1_epi32(200);
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_mask_expand_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_expand_epi32(src, 0b01010101_01010101, a);
let e = _mm512_set_epi32(
200, 8, 200, 9, 200, 10, 200, 11, 200, 12, 200, 13, 200, 14, 200, 15,
);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_expand_epi32() {
let a = _mm512_set_epi32(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
let r = _mm512_maskz_expand_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_expand_epi32(0b01010101_01010101, a);
let e = _mm512_set_epi32(0, 8, 0, 9, 0, 10, 0, 11, 0, 12, 0, 13, 0, 14, 0, 15);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_expand_epi32() {
let src = _mm256_set1_epi32(200);
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let r = _mm256_mask_expand_epi32(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm256_mask_expand_epi32(src, 0b01010101, a);
let e = _mm256_set_epi32(200, 4, 200, 5, 200, 6, 200, 7);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_expand_epi32() {
let a = _mm256_set_epi32(0, 1, 2, 3, 4, 5, 6, 7);
let r = _mm256_maskz_expand_epi32(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_expand_epi32(0b01010101, a);
let e = _mm256_set_epi32(0, 4, 0, 5, 0, 6, 0, 7);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_expand_epi32() {
let src = _mm_set1_epi32(200);
let a = _mm_set_epi32(0, 1, 2, 3);
let r = _mm_mask_expand_epi32(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_expand_epi32(src, 0b00000101, a);
let e = _mm_set_epi32(200, 2, 200, 3);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_expand_epi32() {
let a = _mm_set_epi32(0, 1, 2, 3);
let r = _mm_maskz_expand_epi32(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_expand_epi32(0b00000101, a);
let e = _mm_set_epi32(0, 2, 0, 3);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_expand_ps() {
let src = _mm512_set1_ps(200.);
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_mask_expand_ps(src, 0, a);
assert_eq_m512(r, src);
let r = _mm512_mask_expand_ps(src, 0b01010101_01010101, a);
let e = _mm512_set_ps(
200., 8., 200., 9., 200., 10., 200., 11., 200., 12., 200., 13., 200., 14., 200., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_expand_ps() {
let a = _mm512_set_ps(
0., 1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12., 13., 14., 15.,
);
let r = _mm512_maskz_expand_ps(0, a);
assert_eq_m512(r, _mm512_setzero_ps());
let r = _mm512_maskz_expand_ps(0b01010101_01010101, a);
let e = _mm512_set_ps(
0., 8., 0., 9., 0., 10., 0., 11., 0., 12., 0., 13., 0., 14., 0., 15.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_expand_ps() {
let src = _mm256_set1_ps(200.);
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let r = _mm256_mask_expand_ps(src, 0, a);
assert_eq_m256(r, src);
let r = _mm256_mask_expand_ps(src, 0b01010101, a);
let e = _mm256_set_ps(200., 4., 200., 5., 200., 6., 200., 7.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_maskz_expand_ps() {
let a = _mm256_set_ps(0., 1., 2., 3., 4., 5., 6., 7.);
let r = _mm256_maskz_expand_ps(0, a);
assert_eq_m256(r, _mm256_setzero_ps());
let r = _mm256_maskz_expand_ps(0b01010101, a);
let e = _mm256_set_ps(0., 4., 0., 5., 0., 6., 0., 7.);
assert_eq_m256(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_expand_ps() {
let src = _mm_set1_ps(200.);
let a = _mm_set_ps(0., 1., 2., 3.);
let r = _mm_mask_expand_ps(src, 0, a);
assert_eq_m128(r, src);
let r = _mm_mask_expand_ps(src, 0b00000101, a);
let e = _mm_set_ps(200., 2., 200., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_maskz_expand_ps() {
let a = _mm_set_ps(0., 1., 2., 3.);
let r = _mm_maskz_expand_ps(0, a);
assert_eq_m128(r, _mm_setzero_ps());
let r = _mm_maskz_expand_ps(0b00000101, a);
let e = _mm_set_ps(0., 2., 0., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_loadu_epi32() {
let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
let p = a.as_ptr();
let r = _mm512_loadu_epi32(black_box(p));
let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_loadu_epi32() {
let a = &[4, 3, 2, 5, 8, 9, 64, 50];
let p = a.as_ptr();
let r = _mm256_loadu_epi32(black_box(p));
let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_loadu_epi32() {
let a = &[4, 3, 2, 5];
let p = a.as_ptr();
let r = _mm_loadu_epi32(black_box(p));
let e = _mm_setr_epi32(4, 3, 2, 5);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepi32_storeu_epi16() {
let a = _mm512_set1_epi32(9);
let mut r = _mm256_undefined_si256();
_mm512_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
let e = _mm256_set1_epi16(9);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtepi32_storeu_epi16() {
let a = _mm256_set1_epi32(9);
let mut r = _mm_undefined_si128();
_mm256_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
let e = _mm_set1_epi16(9);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtepi32_storeu_epi16() {
let a = _mm_set1_epi32(9);
let mut r = _mm_set1_epi8(0);
_mm_mask_cvtepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
let e = _mm_set_epi16(0, 0, 0, 0, 9, 9, 9, 9);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtsepi32_storeu_epi16() {
let a = _mm512_set1_epi32(i32::MAX);
let mut r = _mm256_undefined_si256();
_mm512_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
let e = _mm256_set1_epi16(i16::MAX);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtsepi32_storeu_epi16() {
let a = _mm256_set1_epi32(i32::MAX);
let mut r = _mm_undefined_si128();
_mm256_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
let e = _mm_set1_epi16(i16::MAX);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtsepi32_storeu_epi16() {
let a = _mm_set1_epi32(i32::MAX);
let mut r = _mm_set1_epi8(0);
_mm_mask_cvtsepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
let e = _mm_set_epi16(0, 0, 0, 0, i16::MAX, i16::MAX, i16::MAX, i16::MAX);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtusepi32_storeu_epi16() {
let a = _mm512_set1_epi32(i32::MAX);
let mut r = _mm256_undefined_si256();
_mm512_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
let e = _mm256_set1_epi16(u16::MAX as i16);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtusepi32_storeu_epi16() {
let a = _mm256_set1_epi32(i32::MAX);
let mut r = _mm_undefined_si128();
_mm256_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
let e = _mm_set1_epi16(u16::MAX as i16);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtusepi32_storeu_epi16() {
let a = _mm_set1_epi32(i32::MAX);
let mut r = _mm_set1_epi8(0);
_mm_mask_cvtusepi32_storeu_epi16(&mut r as *mut _ as *mut i8, 0b11111111, a);
let e = _mm_set_epi16(
0,
0,
0,
0,
u16::MAX as i16,
u16::MAX as i16,
u16::MAX as i16,
u16::MAX as i16,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtepi32_storeu_epi8() {
let a = _mm512_set1_epi32(9);
let mut r = _mm_undefined_si128();
_mm512_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
let e = _mm_set1_epi8(9);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtepi32_storeu_epi8() {
let a = _mm256_set1_epi32(9);
let mut r = _mm_set1_epi8(0);
_mm256_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9, 9, 9, 9, 9);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtepi32_storeu_epi8() {
let a = _mm_set1_epi32(9);
let mut r = _mm_set1_epi8(0);
_mm_mask_cvtepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
let e = _mm_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 9, 9, 9, 9);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtsepi32_storeu_epi8() {
let a = _mm512_set1_epi32(i32::MAX);
let mut r = _mm_undefined_si128();
_mm512_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
let e = _mm_set1_epi8(i8::MAX);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtsepi32_storeu_epi8() {
let a = _mm256_set1_epi32(i32::MAX);
let mut r = _mm_set1_epi8(0);
_mm256_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
#[rustfmt::skip]
let e = _mm_set_epi8(
0, 0, 0, 0,
0, 0, 0, 0,
i8::MAX, i8::MAX, i8::MAX, i8::MAX,
i8::MAX, i8::MAX, i8::MAX, i8::MAX,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtsepi32_storeu_epi8() {
let a = _mm_set1_epi32(i32::MAX);
let mut r = _mm_set1_epi8(0);
_mm_mask_cvtsepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
#[rustfmt::skip]
let e = _mm_set_epi8(
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
i8::MAX, i8::MAX, i8::MAX, i8::MAX,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_cvtusepi32_storeu_epi8() {
let a = _mm512_set1_epi32(i32::MAX);
let mut r = _mm_undefined_si128();
_mm512_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111_11111111, a);
let e = _mm_set1_epi8(u8::MAX as i8);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_cvtusepi32_storeu_epi8() {
let a = _mm256_set1_epi32(i32::MAX);
let mut r = _mm_set1_epi8(0);
_mm256_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
#[rustfmt::skip]
let e = _mm_set_epi8(
0, 0, 0, 0,
0, 0, 0, 0,
u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_cvtusepi32_storeu_epi8() {
let a = _mm_set1_epi32(i32::MAX);
let mut r = _mm_set1_epi8(0);
_mm_mask_cvtusepi32_storeu_epi8(&mut r as *mut _ as *mut i8, 0b11111111, a);
#[rustfmt::skip]
let e = _mm_set_epi8(
0, 0, 0, 0,
0, 0, 0, 0,
0, 0, 0, 0,
u8::MAX as i8, u8::MAX as i8, u8::MAX as i8, u8::MAX as i8,
);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_storeu_epi32() {
let a = _mm512_set1_epi32(9);
let mut r = _mm512_undefined_epi32();
_mm512_storeu_epi32(&mut r as *mut _ as *mut i32, a);
assert_eq_m512i(r, a);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_storeu_epi32() {
let a = _mm256_set1_epi32(9);
let mut r = _mm256_undefined_si256();
_mm256_storeu_epi32(&mut r as *mut _ as *mut i32, a);
assert_eq_m256i(r, a);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_storeu_epi32() {
let a = _mm_set1_epi32(9);
let mut r = _mm_undefined_si128();
_mm_storeu_epi32(&mut r as *mut _ as *mut i32, a);
assert_eq_m128i(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_loadu_si512() {
let a = &[4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50];
let p = a.as_ptr();
let r = _mm512_loadu_si512(black_box(p));
let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_storeu_si512() {
let a = _mm512_set1_epi32(9);
let mut r = _mm512_undefined_epi32();
_mm512_storeu_si512(&mut r as *mut _ as *mut i32, a);
assert_eq_m512i(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_load_si512() {
#[repr(align(64))]
struct Align {
data: [i32; 16],
}
let a = Align {
data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
};
let p = (a.data).as_ptr();
let r = _mm512_load_si512(black_box(p));
let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_store_si512() {
let a = _mm512_set1_epi32(9);
let mut r = _mm512_undefined_epi32();
_mm512_store_si512(&mut r as *mut _ as *mut i32, a);
assert_eq_m512i(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_load_epi32() {
#[repr(align(64))]
struct Align {
data: [i32; 16],
}
let a = Align {
data: [4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50],
};
let p = (a.data).as_ptr();
let r = _mm512_load_epi32(black_box(p));
let e = _mm512_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50, -4, -3, -2, -5, -8, -9, -64, -50);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_load_epi32() {
#[repr(align(64))]
struct Align {
data: [i32; 8],
}
let a = Align {
data: [4, 3, 2, 5, 8, 9, 64, 50],
};
let p = (a.data).as_ptr();
let r = _mm256_load_epi32(black_box(p));
let e = _mm256_setr_epi32(4, 3, 2, 5, 8, 9, 64, 50);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_load_epi32() {
#[repr(align(64))]
struct Align {
data: [i32; 4],
}
let a = Align { data: [4, 3, 2, 5] };
let p = (a.data).as_ptr();
let r = _mm_load_epi32(black_box(p));
let e = _mm_setr_epi32(4, 3, 2, 5);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_store_epi32() {
let a = _mm512_set1_epi32(9);
let mut r = _mm512_undefined_epi32();
_mm512_store_epi32(&mut r as *mut _ as *mut i32, a);
assert_eq_m512i(r, a);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_store_epi32() {
let a = _mm256_set1_epi32(9);
let mut r = _mm256_undefined_si256();
_mm256_store_epi32(&mut r as *mut _ as *mut i32, a);
assert_eq_m256i(r, a);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_store_epi32() {
let a = _mm_set1_epi32(9);
let mut r = _mm_undefined_si128();
_mm_store_epi32(&mut r as *mut _ as *mut i32, a);
assert_eq_m128i(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_load_ps() {
#[repr(align(64))]
struct Align {
data: [f32; 16],
}
let a = Align {
data: [
4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
],
};
let p = (a.data).as_ptr();
let r = _mm512_load_ps(black_box(p));
let e = _mm512_setr_ps(
4., 3., 2., 5., 8., 9., 64., 50., -4., -3., -2., -5., -8., -9., -64., -50.,
);
assert_eq_m512(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_store_ps() {
let a = _mm512_set1_ps(9.);
let mut r = _mm512_undefined_ps();
_mm512_store_ps(&mut r as *mut _ as *mut f32, a);
assert_eq_m512(r, a);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_mask_set1_epi32() {
let src = _mm512_set1_epi32(2);
let a: i32 = 11;
let r = _mm512_mask_set1_epi32(src, 0, a);
assert_eq_m512i(r, src);
let r = _mm512_mask_set1_epi32(src, 0b11111111_11111111, a);
let e = _mm512_set1_epi32(11);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_maskz_set1_epi32() {
let a: i32 = 11;
let r = _mm512_maskz_set1_epi32(0, a);
assert_eq_m512i(r, _mm512_setzero_si512());
let r = _mm512_maskz_set1_epi32(0b11111111_11111111, a);
let e = _mm512_set1_epi32(11);
assert_eq_m512i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm256_mask_set1_epi32() {
let src = _mm256_set1_epi32(2);
let a: i32 = 11;
let r = _mm256_mask_set1_epi32(src, 0, a);
assert_eq_m256i(r, src);
let r = _mm256_mask_set1_epi32(src, 0b11111111, a);
let e = _mm256_set1_epi32(11);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm256_maskz_set1_epi32() {
let a: i32 = 11;
let r = _mm256_maskz_set1_epi32(0, a);
assert_eq_m256i(r, _mm256_setzero_si256());
let r = _mm256_maskz_set1_epi32(0b11111111, a);
let e = _mm256_set1_epi32(11);
assert_eq_m256i(r, e);
}
#[simd_test(enable = "avx512f,avx512vl")]
unsafe fn test_mm_mask_set1_epi32() {
let src = _mm_set1_epi32(2);
let a: i32 = 11;
let r = _mm_mask_set1_epi32(src, 0, a);
assert_eq_m128i(r, src);
let r = _mm_mask_set1_epi32(src, 0b00001111, a);
let e = _mm_set1_epi32(11);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_set1_epi32() {
let a: i32 = 11;
let r = _mm_maskz_set1_epi32(0, a);
assert_eq_m128i(r, _mm_setzero_si128());
let r = _mm_maskz_set1_epi32(0b00001111, a);
let e = _mm_set1_epi32(11);
assert_eq_m128i(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_move_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mask_move_ss(src, 0, a, b);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_move_ss(src, 0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 40.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_move_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_maskz_move_ss(0, a, b);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_move_ss(0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 40.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_move_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_move_sd(src, 0, a, b);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_move_sd(src, 0b11111111, a, b);
let e = _mm_set_pd(1., 4.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_move_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_move_sd(0, a, b);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_move_sd(0b11111111, a, b);
let e = _mm_set_pd(1., 4.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_add_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mask_add_ss(src, 0, a, b);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_add_ss(src, 0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 60.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_add_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_maskz_add_ss(0, a, b);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_add_ss(0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 60.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_add_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_add_sd(src, 0, a, b);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_add_sd(src, 0b11111111, a, b);
let e = _mm_set_pd(1., 6.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_add_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_add_sd(0, a, b);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_add_sd(0b11111111, a, b);
let e = _mm_set_pd(1., 6.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_sub_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mask_sub_ss(src, 0, a, b);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_sub_ss(src, 0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., -20.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_sub_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_maskz_sub_ss(0, a, b);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_sub_ss(0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., -20.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_sub_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_sub_sd(src, 0, a, b);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_sub_sd(src, 0b11111111, a, b);
let e = _mm_set_pd(1., -2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_sub_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_sub_sd(0, a, b);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_sub_sd(0b11111111, a, b);
let e = _mm_set_pd(1., -2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_mul_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mask_mul_ss(src, 0, a, b);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_mul_ss(src, 0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 800.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_mul_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_maskz_mul_ss(0, a, b);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_mul_ss(0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 800.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_mul_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_mul_sd(src, 0, a, b);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_mul_sd(src, 0b11111111, a, b);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_mul_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_mul_sd(0, a, b);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_mul_sd(0b11111111, a, b);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_div_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mask_div_ss(src, 0, a, b);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_div_ss(src, 0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 0.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_div_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_maskz_div_ss(0, a, b);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_div_ss(0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 0.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_div_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_div_sd(src, 0, a, b);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_div_sd(src, 0b11111111, a, b);
let e = _mm_set_pd(1., 0.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_div_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_div_sd(0, a, b);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_div_sd(0b11111111, a, b);
let e = _mm_set_pd(1., 0.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_max_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_mask_max_ss(a, 0, a, b);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
let r = _mm_mask_max_ss(a, 0b11111111, a, b);
let e = _mm_set_ps(0., 1., 2., 7.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_max_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_maskz_max_ss(0, a, b);
let e = _mm_set_ps(0., 1., 2., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_max_ss(0b11111111, a, b);
let e = _mm_set_ps(0., 1., 2., 7.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_max_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_mask_max_sd(a, 0, a, b);
let e = _mm_set_pd(0., 1.);
assert_eq_m128d(r, e);
let r = _mm_mask_max_sd(a, 0b11111111, a, b);
let e = _mm_set_pd(0., 3.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_max_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_maskz_max_sd(0, a, b);
let e = _mm_set_pd(0., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_max_sd(0b11111111, a, b);
let e = _mm_set_pd(0., 3.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_min_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_mask_min_ss(a, 0, a, b);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
let r = _mm_mask_min_ss(a, 0b11111111, a, b);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_min_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_maskz_min_ss(0, a, b);
let e = _mm_set_ps(0., 1., 2., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_min_ss(0b11111111, a, b);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_min_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_mask_min_sd(a, 0, a, b);
let e = _mm_set_pd(0., 1.);
assert_eq_m128d(r, e);
let r = _mm_mask_min_sd(a, 0b11111111, a, b);
let e = _mm_set_pd(0., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_min_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_maskz_min_sd(0, a, b);
let e = _mm_set_pd(0., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_min_sd(0b11111111, a, b);
let e = _mm_set_pd(0., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_sqrt_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_mask_sqrt_ss(src, 0, a, b);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_sqrt_ss(src, 0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_sqrt_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_maskz_sqrt_ss(0, a, b);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_sqrt_ss(0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_sqrt_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_sqrt_sd(src, 0, a, b);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_sqrt_sd(src, 0b11111111, a, b);
let e = _mm_set_pd(1., 2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_sqrt_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_sqrt_sd(0, a, b);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_sqrt_sd(0b11111111, a, b);
let e = _mm_set_pd(1., 2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_rsqrt14_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_rsqrt14_ss(a, b);
let e = _mm_set_ps(1., 2., 10., 0.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_rsqrt14_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_mask_rsqrt14_ss(src, 0, a, b);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_rsqrt14_ss(src, 0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 0.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_rsqrt14_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_maskz_rsqrt14_ss(0, a, b);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_rsqrt14_ss(0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 0.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_rsqrt14_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_rsqrt14_sd(a, b);
let e = _mm_set_pd(1., 0.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_rsqrt14_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_rsqrt14_sd(src, 0, a, b);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_rsqrt14_sd(src, 0b11111111, a, b);
let e = _mm_set_pd(1., 0.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_rsqrt14_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_rsqrt14_sd(0, a, b);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_rsqrt14_sd(0b11111111, a, b);
let e = _mm_set_pd(1., 0.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_rcp14_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_rcp14_ss(a, b);
let e = _mm_set_ps(1., 2., 10., 0.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_rcp14_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_mask_rcp14_ss(src, 0, a, b);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_rcp14_ss(src, 0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 0.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_rcp14_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_maskz_rcp14_ss(0, a, b);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_rcp14_ss(0b11111111, a, b);
let e = _mm_set_ps(1., 2., 10., 0.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_rcp14_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_rcp14_sd(a, b);
let e = _mm_set_pd(1., 0.25);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_rcp14_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_rcp14_sd(src, 0, a, b);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_rcp14_sd(src, 0b11111111, a, b);
let e = _mm_set_pd(1., 0.25);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_rcp14_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_rcp14_sd(0, a, b);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_rcp14_sd(0b11111111, a, b);
let e = _mm_set_pd(1., 0.25);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_getexp_ss() {
let a = _mm_set1_ps(2.);
let b = _mm_set1_ps(3.);
let r = _mm_getexp_ss(a, b);
let e = _mm_set_ps(2., 2., 2., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_getexp_ss() {
let a = _mm_set1_ps(2.);
let b = _mm_set1_ps(3.);
let r = _mm_mask_getexp_ss(a, 0, a, b);
let e = _mm_set_ps(2., 2., 2., 2.);
assert_eq_m128(r, e);
let r = _mm_mask_getexp_ss(a, 0b11111111, a, b);
let e = _mm_set_ps(2., 2., 2., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_getexp_ss() {
let a = _mm_set1_ps(2.);
let b = _mm_set1_ps(3.);
let r = _mm_maskz_getexp_ss(0, a, b);
let e = _mm_set_ps(2., 2., 2., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_getexp_ss(0b11111111, a, b);
let e = _mm_set_ps(2., 2., 2., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_getexp_sd() {
let a = _mm_set1_pd(2.);
let b = _mm_set1_pd(3.);
let r = _mm_getexp_sd(a, b);
let e = _mm_set_pd(2., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_getexp_sd() {
let a = _mm_set1_pd(2.);
let b = _mm_set1_pd(3.);
let r = _mm_mask_getexp_sd(a, 0, a, b);
let e = _mm_set_pd(2., 2.);
assert_eq_m128d(r, e);
let r = _mm_mask_getexp_sd(a, 0b11111111, a, b);
let e = _mm_set_pd(2., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_getexp_sd() {
let a = _mm_set1_pd(2.);
let b = _mm_set1_pd(3.);
let r = _mm_maskz_getexp_sd(0, a, b);
let e = _mm_set_pd(2., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_getexp_sd(0b11111111, a, b);
let e = _mm_set_pd(2., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_getmant_ss() {
let a = _mm_set1_ps(20.);
let b = _mm_set1_ps(10.);
let r = _mm_getmant_ss(a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_ps(20., 20., 20., 1.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_getmant_ss() {
let a = _mm_set1_ps(20.);
let b = _mm_set1_ps(10.);
let r = _mm_mask_getmant_ss(a, 0, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_ps(20., 20., 20., 20.);
assert_eq_m128(r, e);
let r = _mm_mask_getmant_ss(a, 0b11111111, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_ps(20., 20., 20., 1.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_getmant_ss() {
let a = _mm_set1_ps(20.);
let b = _mm_set1_ps(10.);
let r = _mm_maskz_getmant_ss(0, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_ps(20., 20., 20., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_getmant_ss(0b11111111, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_ps(20., 20., 20., 1.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_getmant_sd() {
let a = _mm_set1_pd(20.);
let b = _mm_set1_pd(10.);
let r = _mm_getmant_sd(a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_pd(20., 1.25);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_getmant_sd() {
let a = _mm_set1_pd(20.);
let b = _mm_set1_pd(10.);
let r = _mm_mask_getmant_sd(a, 0, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_pd(20., 20.);
assert_eq_m128d(r, e);
let r = _mm_mask_getmant_sd(a, 0b11111111, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_pd(20., 1.25);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_getmant_sd() {
let a = _mm_set1_pd(20.);
let b = _mm_set1_pd(10.);
let r = _mm_maskz_getmant_sd(0, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_pd(20., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_getmant_sd(0b11111111, a, b, _MM_MANT_NORM_1_2, _MM_MANT_SIGN_SRC);
let e = _mm_set_pd(20., 1.25);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_roundscale_ss() {
let a = _mm_set1_ps(2.2);
let b = _mm_set1_ps(1.1);
let r = _mm_roundscale_ss(a, b, 0);
let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_roundscale_ss() {
let a = _mm_set1_ps(2.2);
let b = _mm_set1_ps(1.1);
let r = _mm_mask_roundscale_ss(a, 0, a, b, 0);
let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
assert_eq_m128(r, e);
let r = _mm_mask_roundscale_ss(a, 0b11111111, a, b, 0);
let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_roundscale_ss() {
let a = _mm_set1_ps(2.2);
let b = _mm_set1_ps(1.1);
let r = _mm_maskz_roundscale_ss(0, a, b, 0);
let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
assert_eq_m128(r, e);
let r = _mm_maskz_roundscale_ss(0b11111111, a, b, 0);
let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_roundscale_sd() {
let a = _mm_set1_pd(2.2);
let b = _mm_set1_pd(1.1);
let r = _mm_roundscale_sd(a, b, 0);
let e = _mm_set_pd(2.2, 1.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_roundscale_sd() {
let a = _mm_set1_pd(2.2);
let b = _mm_set1_pd(1.1);
let r = _mm_mask_roundscale_sd(a, 0, a, b, 0);
let e = _mm_set_pd(2.2, 2.2);
assert_eq_m128d(r, e);
let r = _mm_mask_roundscale_sd(a, 0b11111111, a, b, 0);
let e = _mm_set_pd(2.2, 1.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_roundscale_sd() {
let a = _mm_set1_pd(2.2);
let b = _mm_set1_pd(1.1);
let r = _mm_maskz_roundscale_sd(0, a, b, 0);
let e = _mm_set_pd(2.2, 0.0);
assert_eq_m128d(r, e);
let r = _mm_maskz_roundscale_sd(0b11111111, a, b, 0);
let e = _mm_set_pd(2.2, 1.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_scalef_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(3.);
let r = _mm_scalef_ss(a, b);
let e = _mm_set_ps(1., 1., 1., 8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_scalef_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(3.);
let r = _mm_mask_scalef_ss(a, 0, a, b);
let e = _mm_set_ps(1., 1., 1., 1.);
assert_eq_m128(r, e);
let r = _mm_mask_scalef_ss(a, 0b11111111, a, b);
let e = _mm_set_ps(1., 1., 1., 8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_scalef_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(3.);
let r = _mm_maskz_scalef_ss(0, a, b);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_scalef_ss(0b11111111, a, b);
let e = _mm_set_ps(1., 1., 1., 8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_scalef_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(3.);
let r = _mm_scalef_sd(a, b);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_scalef_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(3.);
let r = _mm_mask_scalef_sd(a, 0, a, b);
let e = _mm_set_pd(1., 1.);
assert_eq_m128d(r, e);
let r = _mm_mask_scalef_sd(a, 0b11111111, a, b);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_scalef_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(3.);
let r = _mm_maskz_scalef_sd(0, a, b);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_scalef_sd(0b11111111, a, b);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fmadd_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask_fmadd_ss(a, 0, b, c);
assert_eq_m128(r, a);
let r = _mm_mask_fmadd_ss(a, 0b11111111, b, c);
let e = _mm_set_ps(1., 1., 1., 5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fmadd_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_maskz_fmadd_ss(0, a, b, c);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_fmadd_ss(0b11111111, a, b, c);
let e = _mm_set_ps(1., 1., 1., 5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fmadd_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask3_fmadd_ss(a, b, c, 0);
assert_eq_m128(r, c);
let r = _mm_mask3_fmadd_ss(a, b, c, 0b11111111);
let e = _mm_set_ps(3., 3., 3., 5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fmadd_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask_fmadd_sd(a, 0, b, c);
assert_eq_m128d(r, a);
let r = _mm_mask_fmadd_sd(a, 0b11111111, b, c);
let e = _mm_set_pd(1., 5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fmadd_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_maskz_fmadd_sd(0, a, b, c);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_fmadd_sd(0b11111111, a, b, c);
let e = _mm_set_pd(1., 5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fmadd_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask3_fmadd_sd(a, b, c, 0);
assert_eq_m128d(r, c);
let r = _mm_mask3_fmadd_sd(a, b, c, 0b11111111);
let e = _mm_set_pd(3., 5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fmsub_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask_fmsub_ss(a, 0, b, c);
assert_eq_m128(r, a);
let r = _mm_mask_fmsub_ss(a, 0b11111111, b, c);
let e = _mm_set_ps(1., 1., 1., -1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fmsub_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_maskz_fmsub_ss(0, a, b, c);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_fmsub_ss(0b11111111, a, b, c);
let e = _mm_set_ps(1., 1., 1., -1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fmsub_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask3_fmsub_ss(a, b, c, 0);
assert_eq_m128(r, c);
let r = _mm_mask3_fmsub_ss(a, b, c, 0b11111111);
let e = _mm_set_ps(3., 3., 3., -1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fmsub_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask_fmsub_sd(a, 0, b, c);
assert_eq_m128d(r, a);
let r = _mm_mask_fmsub_sd(a, 0b11111111, b, c);
let e = _mm_set_pd(1., -1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fmsub_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_maskz_fmsub_sd(0, a, b, c);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_fmsub_sd(0b11111111, a, b, c);
let e = _mm_set_pd(1., -1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fmsub_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask3_fmsub_sd(a, b, c, 0);
assert_eq_m128d(r, c);
let r = _mm_mask3_fmsub_sd(a, b, c, 0b11111111);
let e = _mm_set_pd(3., -1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fnmadd_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask_fnmadd_ss(a, 0, b, c);
assert_eq_m128(r, a);
let r = _mm_mask_fnmadd_ss(a, 0b11111111, b, c);
let e = _mm_set_ps(1., 1., 1., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fnmadd_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_maskz_fnmadd_ss(0, a, b, c);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_fnmadd_ss(0b11111111, a, b, c);
let e = _mm_set_ps(1., 1., 1., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fnmadd_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask3_fnmadd_ss(a, b, c, 0);
assert_eq_m128(r, c);
let r = _mm_mask3_fnmadd_ss(a, b, c, 0b11111111);
let e = _mm_set_ps(3., 3., 3., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fnmadd_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask_fnmadd_sd(a, 0, b, c);
assert_eq_m128d(r, a);
let r = _mm_mask_fnmadd_sd(a, 0b11111111, b, c);
let e = _mm_set_pd(1., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fnmadd_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_maskz_fnmadd_sd(0, a, b, c);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_fnmadd_sd(0b11111111, a, b, c);
let e = _mm_set_pd(1., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fnmadd_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask3_fnmadd_sd(a, b, c, 0);
assert_eq_m128d(r, c);
let r = _mm_mask3_fnmadd_sd(a, b, c, 0b11111111);
let e = _mm_set_pd(3., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fnmsub_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask_fnmsub_ss(a, 0, b, c);
assert_eq_m128(r, a);
let r = _mm_mask_fnmsub_ss(a, 0b11111111, b, c);
let e = _mm_set_ps(1., 1., 1., -5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fnmsub_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_maskz_fnmsub_ss(0, a, b, c);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_fnmsub_ss(0b11111111, a, b, c);
let e = _mm_set_ps(1., 1., 1., -5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fnmsub_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask3_fnmsub_ss(a, b, c, 0);
assert_eq_m128(r, c);
let r = _mm_mask3_fnmsub_ss(a, b, c, 0b11111111);
let e = _mm_set_ps(3., 3., 3., -5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fnmsub_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask_fnmsub_sd(a, 0, b, c);
assert_eq_m128d(r, a);
let r = _mm_mask_fnmsub_sd(a, 0b11111111, b, c);
let e = _mm_set_pd(1., -5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fnmsub_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_maskz_fnmsub_sd(0, a, b, c);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_fnmsub_sd(0b11111111, a, b, c);
let e = _mm_set_pd(1., -5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fnmsub_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask3_fnmsub_sd(a, b, c, 0);
assert_eq_m128d(r, c);
let r = _mm_mask3_fnmsub_sd(a, b, c, 0b11111111);
let e = _mm_set_pd(3., -5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_add_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_add_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 60.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_add_round_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mask_add_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_add_round_ss(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 2., 10., 60.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_add_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_maskz_add_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_add_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 60.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_add_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_add_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 6.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_add_round_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_add_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_add_round_sd(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 6.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_add_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_add_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_add_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 6.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_sub_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_sub_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., -20.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_sub_round_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mask_sub_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_sub_round_ss(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 2., 10., -20.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_sub_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_maskz_sub_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_sub_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., -20.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_sub_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_sub_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., -2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_sub_round_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_sub_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_sub_round_sd(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., -2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_sub_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_sub_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_sub_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., -2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mul_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mul_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 800.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_mul_round_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mask_mul_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_mul_round_ss(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 2., 10., 800.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_mul_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_maskz_mul_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_mul_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 800.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mul_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mul_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_mul_round_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_mul_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_mul_round_sd(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_mul_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_mul_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_mul_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_div_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_div_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 0.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_div_round_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_mask_div_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_div_round_ss(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 2., 10., 0.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_div_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 40.);
let r = _mm_maskz_div_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_div_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 0.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_div_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_div_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_div_round_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_div_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_div_round_sd(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 0.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_div_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_div_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_div_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_max_round_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_max_round_ss(a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 7.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_max_round_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_mask_max_round_ss(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
let r = _mm_mask_max_round_ss(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 7.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_max_round_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_maskz_max_round_ss(0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_max_round_ss(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 7.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_max_round_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_max_round_sd(a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 3.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_max_round_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_mask_max_round_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 1.);
assert_eq_m128d(r, e);
let r = _mm_mask_max_round_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 3.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_max_round_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_maskz_max_round_sd(0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_max_round_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 3.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_min_round_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_min_round_ss(a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_min_round_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_mask_min_round_ss(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
let r = _mm_mask_min_round_ss(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_min_round_ss() {
let a = _mm_set_ps(0., 1., 2., 3.);
let b = _mm_set_ps(4., 5., 6., 7.);
let r = _mm_maskz_min_round_ss(0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_min_round_ss(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 1., 2., 3.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_min_round_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_min_round_sd(a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_min_round_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_mask_min_round_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 1.);
assert_eq_m128d(r, e);
let r = _mm_mask_min_round_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_min_round_sd() {
let a = _mm_set_pd(0., 1.);
let b = _mm_set_pd(2., 3.);
let r = _mm_maskz_min_round_sd(0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_min_round_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_sqrt_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_sqrt_round_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_sqrt_round_ss() {
let src = _mm_set_ps(10., 11., 100., 110.);
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_mask_sqrt_round_ss(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 110.);
assert_eq_m128(r, e);
let r = _mm_mask_sqrt_round_ss(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 2., 10., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_sqrt_round_ss() {
let a = _mm_set_ps(1., 2., 10., 20.);
let b = _mm_set_ps(3., 4., 30., 4.);
let r = _mm_maskz_sqrt_round_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_sqrt_round_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 2., 10., 2.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_sqrt_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_sqrt_round_sd(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_sqrt_round_sd() {
let src = _mm_set_pd(10., 11.);
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_mask_sqrt_round_sd(src, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 11.);
assert_eq_m128d(r, e);
let r = _mm_mask_sqrt_round_sd(
src,
0b11111111,
a,
b,
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_sqrt_round_sd() {
let a = _mm_set_pd(1., 2.);
let b = _mm_set_pd(3., 4.);
let r = _mm_maskz_sqrt_round_sd(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_sqrt_round_sd(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 2.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_getexp_round_ss() {
let a = _mm_set1_ps(2.);
let b = _mm_set1_ps(3.);
let r = _mm_getexp_round_ss(a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2., 2., 2., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_getexp_round_ss() {
let a = _mm_set1_ps(2.);
let b = _mm_set1_ps(3.);
let r = _mm_mask_getexp_round_ss(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2., 2., 2., 2.);
assert_eq_m128(r, e);
let r = _mm_mask_getexp_round_ss(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2., 2., 2., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_getexp_round_ss() {
let a = _mm_set1_ps(2.);
let b = _mm_set1_ps(3.);
let r = _mm_maskz_getexp_round_ss(0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2., 2., 2., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_getexp_round_ss(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2., 2., 2., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_getexp_round_sd() {
let a = _mm_set1_pd(2.);
let b = _mm_set1_pd(3.);
let r = _mm_getexp_round_sd(a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_getexp_round_sd() {
let a = _mm_set1_pd(2.);
let b = _mm_set1_pd(3.);
let r = _mm_mask_getexp_round_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2., 2.);
assert_eq_m128d(r, e);
let r = _mm_mask_getexp_round_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_getexp_round_sd() {
let a = _mm_set1_pd(2.);
let b = _mm_set1_pd(3.);
let r = _mm_maskz_getexp_round_sd(0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_getexp_round_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_getmant_round_ss() {
let a = _mm_set1_ps(20.);
let b = _mm_set1_ps(10.);
let r = _mm_getmant_round_ss(
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_ps(20., 20., 20., 1.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_getmant_round_ss() {
let a = _mm_set1_ps(20.);
let b = _mm_set1_ps(10.);
let r = _mm_mask_getmant_round_ss(
a,
0,
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_ps(20., 20., 20., 20.);
assert_eq_m128(r, e);
let r = _mm_mask_getmant_round_ss(
a,
0b11111111,
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_ps(20., 20., 20., 1.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_getmant_round_ss() {
let a = _mm_set1_ps(20.);
let b = _mm_set1_ps(10.);
let r = _mm_maskz_getmant_round_ss(
0,
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_ps(20., 20., 20., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_getmant_round_ss(
0b11111111,
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_ps(20., 20., 20., 1.25);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_getmant_round_sd() {
let a = _mm_set1_pd(20.);
let b = _mm_set1_pd(10.);
let r = _mm_getmant_round_sd(
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_pd(20., 1.25);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_getmant_round_sd() {
let a = _mm_set1_pd(20.);
let b = _mm_set1_pd(10.);
let r = _mm_mask_getmant_round_sd(
a,
0,
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_pd(20., 20.);
assert_eq_m128d(r, e);
let r = _mm_mask_getmant_round_sd(
a,
0b11111111,
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_pd(20., 1.25);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_getmant_round_sd() {
let a = _mm_set1_pd(20.);
let b = _mm_set1_pd(10.);
let r = _mm_maskz_getmant_round_sd(
0,
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_pd(20., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_getmant_round_sd(
0b11111111,
a,
b,
_MM_MANT_NORM_1_2,
_MM_MANT_SIGN_SRC,
_MM_FROUND_CUR_DIRECTION,
);
let e = _mm_set_pd(20., 1.25);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_roundscale_round_ss() {
let a = _mm_set1_ps(2.2);
let b = _mm_set1_ps(1.1);
let r = _mm_roundscale_round_ss(a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_roundscale_round_ss() {
let a = _mm_set1_ps(2.2);
let b = _mm_set1_ps(1.1);
let r = _mm_mask_roundscale_round_ss(a, 0, a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2.2, 2.2, 2.2, 2.2);
assert_eq_m128(r, e);
let r = _mm_mask_roundscale_round_ss(a, 0b11111111, a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_roundscale_round_ss() {
let a = _mm_set1_ps(2.2);
let b = _mm_set1_ps(1.1);
let r = _mm_maskz_roundscale_round_ss(0, a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2.2, 2.2, 2.2, 0.0);
assert_eq_m128(r, e);
let r = _mm_maskz_roundscale_round_ss(0b11111111, a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(2.2, 2.2, 2.2, 1.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_roundscale_round_sd() {
let a = _mm_set1_pd(2.2);
let b = _mm_set1_pd(1.1);
let r = _mm_roundscale_round_sd(a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2.2, 1.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_roundscale_round_sd() {
let a = _mm_set1_pd(2.2);
let b = _mm_set1_pd(1.1);
let r = _mm_mask_roundscale_round_sd(a, 0, a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2.2, 2.2);
assert_eq_m128d(r, e);
let r = _mm_mask_roundscale_round_sd(a, 0b11111111, a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2.2, 1.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_roundscale_round_sd() {
let a = _mm_set1_pd(2.2);
let b = _mm_set1_pd(1.1);
let r = _mm_maskz_roundscale_round_sd(0, a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2.2, 0.0);
assert_eq_m128d(r, e);
let r = _mm_maskz_roundscale_round_sd(0b11111111, a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(2.2, 1.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_scalef_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(3.);
let r = _mm_scalef_round_ss(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., 8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_scalef_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(3.);
let r = _mm_mask_scalef_round_ss(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., 1.);
assert_eq_m128(r, e);
let r = _mm_mask_scalef_round_ss(
a,
0b11111111,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., 8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_scalef_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(3.);
let r = _mm_maskz_scalef_round_ss(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_scalef_round_ss(
0b11111111,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., 8.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_scalef_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(3.);
let r = _mm_scalef_round_sd(a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_scalef_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(3.);
let r = _mm_mask_scalef_round_sd(a, 0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 1.);
assert_eq_m128d(r, e);
let r = _mm_mask_scalef_round_sd(
a,
0b11111111,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_scalef_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(3.);
let r = _mm_maskz_scalef_round_sd(0, a, b, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_scalef_round_sd(
0b11111111,
a,
b,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 8.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fmadd_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_fmadd_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., 5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fmadd_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask_fmadd_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128(r, a);
let r = _mm_mask_fmadd_round_ss(
a,
0b11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., 5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fmadd_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_maskz_fmadd_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_fmadd_round_ss(
0b11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., 5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fmadd_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask3_fmadd_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128(r, c);
let r = _mm_mask3_fmadd_round_ss(
a,
b,
c,
0b11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(3., 3., 3., 5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fmadd_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_fmadd_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fmadd_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask_fmadd_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128d(r, a);
let r = _mm_mask_fmadd_round_sd(
a,
0b11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fmadd_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_maskz_fmadd_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_fmadd_round_sd(
0b11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fmadd_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask3_fmadd_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128d(r, c);
let r = _mm_mask3_fmadd_round_sd(
a,
b,
c,
0b11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(3., 5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fmsub_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_fmsub_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., -1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fmsub_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask_fmsub_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128(r, a);
let r = _mm_mask_fmsub_round_ss(
a,
0b11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., -1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fmsub_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_maskz_fmsub_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_fmsub_round_ss(
0b11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., -1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fmsub_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask3_fmsub_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128(r, c);
let r = _mm_mask3_fmsub_round_ss(
a,
b,
c,
0b11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(3., 3., 3., -1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fmsub_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_fmsub_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., -1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fmsub_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask_fmsub_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128d(r, a);
let r = _mm_mask_fmsub_round_sd(
a,
0b11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., -1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fmsub_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_maskz_fmsub_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_fmsub_round_sd(
0b11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., -1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fmsub_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask3_fmsub_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128d(r, c);
let r = _mm_mask3_fmsub_round_sd(
a,
b,
c,
0b11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(3., -1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fnmadd_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_fnmadd_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fnmadd_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask_fnmadd_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128(r, a);
let r = _mm_mask_fnmadd_round_ss(
a,
0b11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fnmadd_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r =
_mm_maskz_fnmadd_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_fnmadd_round_ss(
0b11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fnmadd_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r =
_mm_mask3_fnmadd_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128(r, c);
let r = _mm_mask3_fnmadd_round_ss(
a,
b,
c,
0b11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(3., 3., 3., 1.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fnmadd_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_fnmadd_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fnmadd_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask_fnmadd_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128d(r, a);
let r = _mm_mask_fnmadd_round_sd(
a,
0b11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fnmadd_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r =
_mm_maskz_fnmadd_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_fnmadd_round_sd(
0b11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fnmadd_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r =
_mm_mask3_fnmadd_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128d(r, c);
let r = _mm_mask3_fnmadd_round_sd(
a,
b,
c,
0b11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(3., 1.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fnmsub_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_fnmsub_round_ss(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., -5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fnmsub_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r = _mm_mask_fnmsub_round_ss(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128(r, a);
let r = _mm_mask_fnmsub_round_ss(
a,
0b11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., -5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fnmsub_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r =
_mm_maskz_fnmsub_round_ss(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(1., 1., 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_fnmsub_round_ss(
0b11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(1., 1., 1., -5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fnmsub_round_ss() {
let a = _mm_set1_ps(1.);
let b = _mm_set1_ps(2.);
let c = _mm_set1_ps(3.);
let r =
_mm_mask3_fnmsub_round_ss(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128(r, c);
let r = _mm_mask3_fnmsub_round_ss(
a,
b,
c,
0b11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_ps(3., 3., 3., -5.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fnmsub_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_fnmsub_round_sd(a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., -5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fnmsub_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r = _mm_mask_fnmsub_round_sd(a, 0, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128d(r, a);
let r = _mm_mask_fnmsub_round_sd(
a,
0b11111111,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., -5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fnmsub_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r =
_mm_maskz_fnmsub_round_sd(0, a, b, c, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
let e = _mm_set_pd(1., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_fnmsub_round_sd(
0b11111111,
a,
b,
c,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(1., -5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask3_fnmsub_round_sd() {
let a = _mm_set1_pd(1.);
let b = _mm_set1_pd(2.);
let c = _mm_set1_pd(3.);
let r =
_mm_mask3_fnmsub_round_sd(a, b, c, 0, _MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC);
assert_eq_m128d(r, c);
let r = _mm_mask3_fnmsub_round_sd(
a,
b,
c,
0b11111111,
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC,
);
let e = _mm_set_pd(3., -5.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fixupimm_ss() {
let a = _mm_set_ps(0., 0., 0., f32::NAN);
let b = _mm_set1_ps(f32::MAX);
let c = _mm_set1_epi32(i32::MAX);
let r = _mm_fixupimm_ss(a, b, c, 5);
let e = _mm_set_ps(0., 0., 0., -0.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fixupimm_ss() {
let a = _mm_set_ps(0., 0., 0., f32::NAN);
let b = _mm_set1_ps(f32::MAX);
let c = _mm_set1_epi32(i32::MAX);
let r = _mm_mask_fixupimm_ss(a, 0b11111111, b, c, 5);
let e = _mm_set_ps(0., 0., 0., -0.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fixupimm_ss() {
let a = _mm_set_ps(0., 0., 0., f32::NAN);
let b = _mm_set1_ps(f32::MAX);
let c = _mm_set1_epi32(i32::MAX);
let r = _mm_maskz_fixupimm_ss(0b00000000, a, b, c, 5);
let e = _mm_set_ps(0., 0., 0., 0.0);
assert_eq_m128(r, e);
let r = _mm_maskz_fixupimm_ss(0b11111111, a, b, c, 5);
let e = _mm_set_ps(0., 0., 0., -0.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fixupimm_sd() {
let a = _mm_set_pd(0., f64::NAN);
let b = _mm_set1_pd(f64::MAX);
let c = _mm_set1_epi64x(i32::MAX as i64);
let r = _mm_fixupimm_sd(a, b, c, 5);
let e = _mm_set_pd(0., -0.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fixupimm_sd() {
let a = _mm_set_pd(0., f64::NAN);
let b = _mm_set1_pd(f64::MAX);
let c = _mm_set1_epi64x(i32::MAX as i64);
let r = _mm_mask_fixupimm_sd(a, 0b11111111, b, c, 5);
let e = _mm_set_pd(0., -0.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fixupimm_sd() {
let a = _mm_set_pd(0., f64::NAN);
let b = _mm_set1_pd(f64::MAX);
let c = _mm_set1_epi64x(i32::MAX as i64);
let r = _mm_maskz_fixupimm_sd(0b00000000, a, b, c, 5);
let e = _mm_set_pd(0., 0.0);
assert_eq_m128d(r, e);
let r = _mm_maskz_fixupimm_sd(0b11111111, a, b, c, 5);
let e = _mm_set_pd(0., -0.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fixupimm_round_ss() {
let a = _mm_set_ps(0., 0., 0., f32::NAN);
let b = _mm_set1_ps(f32::MAX);
let c = _mm_set1_epi32(i32::MAX);
let r = _mm_fixupimm_round_ss(a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 0., 0., -0.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fixupimm_round_ss() {
let a = _mm_set_ps(0., 0., 0., f32::NAN);
let b = _mm_set1_ps(f32::MAX);
let c = _mm_set1_epi32(i32::MAX);
let r = _mm_mask_fixupimm_round_ss(a, 0b11111111, b, c, 5, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 0., 0., -0.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fixupimm_round_ss() {
let a = _mm_set_ps(0., 0., 0., f32::NAN);
let b = _mm_set1_ps(f32::MAX);
let c = _mm_set1_epi32(i32::MAX);
let r = _mm_maskz_fixupimm_round_ss(0b00000000, a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 0., 0., 0.0);
assert_eq_m128(r, e);
let r = _mm_maskz_fixupimm_round_ss(0b11111111, a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_ps(0., 0., 0., -0.0);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_fixupimm_round_sd() {
let a = _mm_set_pd(0., f64::NAN);
let b = _mm_set1_pd(f64::MAX);
let c = _mm_set1_epi64x(i32::MAX as i64);
let r = _mm_fixupimm_round_sd(a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., -0.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_fixupimm_round_sd() {
let a = _mm_set_pd(0., f64::NAN);
let b = _mm_set1_pd(f64::MAX);
let c = _mm_set1_epi64x(i32::MAX as i64);
let r = _mm_mask_fixupimm_round_sd(a, 0b11111111, b, c, 5, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., -0.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_fixupimm_round_sd() {
let a = _mm_set_pd(0., f64::NAN);
let b = _mm_set1_pd(f64::MAX);
let c = _mm_set1_epi64x(i32::MAX as i64);
let r = _mm_maskz_fixupimm_round_sd(0b00000000, a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., 0.0);
assert_eq_m128d(r, e);
let r = _mm_maskz_fixupimm_round_sd(0b11111111, a, b, c, 5, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(0., -0.0);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cvtss_sd() {
let a = _mm_set_pd(6., -7.5);
let b = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_mask_cvtss_sd(a, 0, a, b);
assert_eq_m128d(r, a);
let r = _mm_mask_cvtss_sd(a, 0b11111111, a, b);
let e = _mm_set_pd(6., -1.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_cvtss_sd() {
let a = _mm_set_pd(6., -7.5);
let b = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_maskz_cvtss_sd(0, a, b);
let e = _mm_set_pd(6., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_cvtss_sd(0b11111111, a, b);
let e = _mm_set_pd(6., -1.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cvtsd_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b = _mm_set_pd(6., -7.5);
let r = _mm_mask_cvtsd_ss(a, 0, a, b);
assert_eq_m128(r, a);
let r = _mm_mask_cvtsd_ss(a, 0b11111111, a, b);
let e = _mm_set_ps(0., -0.5, 1., -7.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_cvtsd_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b = _mm_set_pd(6., -7.5);
let r = _mm_maskz_cvtsd_ss(0, a, b);
let e = _mm_set_ps(0., -0.5, 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_cvtsd_ss(0b11111111, a, b);
let e = _mm_set_ps(0., -0.5, 1., -7.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundss_sd() {
let a = _mm_set_pd(6., -7.5);
let b = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvt_roundss_sd(a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(6., -1.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cvt_roundss_sd() {
let a = _mm_set_pd(6., -7.5);
let b = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_mask_cvt_roundss_sd(a, 0, a, b, _MM_FROUND_CUR_DIRECTION);
assert_eq_m128d(r, a);
let r = _mm_mask_cvt_roundss_sd(a, 0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(6., -1.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_cvt_roundss_sd() {
let a = _mm_set_pd(6., -7.5);
let b = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_maskz_cvt_roundss_sd(0, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(6., 0.);
assert_eq_m128d(r, e);
let r = _mm_maskz_cvt_roundss_sd(0b11111111, a, b, _MM_FROUND_CUR_DIRECTION);
let e = _mm_set_pd(6., -1.5);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundsd_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b = _mm_set_pd(6., -7.5);
let r = _mm_cvt_roundsd_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(0., -0.5, 1., -7.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_mask_cvt_roundsd_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b = _mm_set_pd(6., -7.5);
let r = _mm_mask_cvt_roundsd_ss(a, 0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
assert_eq_m128(r, a);
let r =
_mm_mask_cvt_roundsd_ss(a, 0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(0., -0.5, 1., -7.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_maskz_cvt_roundsd_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b = _mm_set_pd(6., -7.5);
let r = _mm_maskz_cvt_roundsd_ss(0, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(0., -0.5, 1., 0.);
assert_eq_m128(r, e);
let r = _mm_maskz_cvt_roundsd_ss(0b11111111, a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(0., -0.5, 1., -7.5);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundss_si32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvt_roundss_si32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e: i32 = -1;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundss_i32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvt_roundss_i32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e: i32 = -1;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundss_u32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvt_roundss_u32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e: u32 = u32::MAX;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtss_i32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvtss_i32(a);
let e: i32 = -2;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtss_u32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvtss_u32(a);
let e: u32 = u32::MAX;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundsd_si32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvt_roundsd_si32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e: i32 = -1;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundsd_i32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvt_roundsd_i32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e: i32 = -1;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundsd_u32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvt_roundsd_u32(a, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e: u32 = u32::MAX;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtsd_i32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvtsd_i32(a);
let e: i32 = -2;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtsd_u32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvtsd_u32(a);
let e: u32 = u32::MAX;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundi32_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b: i32 = 9;
let r = _mm_cvt_roundi32_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(0., -0.5, 1., 9.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundsi32_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b: i32 = 9;
let r = _mm_cvt_roundsi32_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(0., -0.5, 1., 9.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvt_roundu32_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b: u32 = 9;
let r = _mm_cvt_roundu32_ss(a, b, _MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC);
let e = _mm_set_ps(0., -0.5, 1., 9.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvti32_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b: i32 = 9;
let r = _mm_cvti32_ss(a, b);
let e = _mm_set_ps(0., -0.5, 1., 9.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvti32_sd() {
let a = _mm_set_pd(1., -1.5);
let b: i32 = 9;
let r = _mm_cvti32_sd(a, b);
let e = _mm_set_pd(1., 9.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtt_roundss_si32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvtt_roundss_si32(a, _MM_FROUND_CUR_DIRECTION);
let e: i32 = -2;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtt_roundss_i32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvtt_roundss_i32(a, _MM_FROUND_CUR_DIRECTION);
let e: i32 = -2;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtt_roundss_u32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvtt_roundss_u32(a, _MM_FROUND_CUR_DIRECTION);
let e: u32 = u32::MAX;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvttss_i32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvttss_i32(a);
let e: i32 = -2;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvttss_u32() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let r = _mm_cvttss_u32(a);
let e: u32 = u32::MAX;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtt_roundsd_si32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvtt_roundsd_si32(a, _MM_FROUND_CUR_DIRECTION);
let e: i32 = -2;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtt_roundsd_i32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvtt_roundsd_i32(a, _MM_FROUND_CUR_DIRECTION);
let e: i32 = -2;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtt_roundsd_u32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvtt_roundsd_u32(a, _MM_FROUND_CUR_DIRECTION);
let e: u32 = u32::MAX;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvttsd_i32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvttsd_i32(a);
let e: i32 = -2;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvttsd_u32() {
let a = _mm_set_pd(1., -1.5);
let r = _mm_cvttsd_u32(a);
let e: u32 = u32::MAX;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtu32_ss() {
let a = _mm_set_ps(0., -0.5, 1., -1.5);
let b: u32 = 9;
let r = _mm_cvtu32_ss(a, b);
let e = _mm_set_ps(0., -0.5, 1., 9.);
assert_eq_m128(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_cvtu32_sd() {
let a = _mm_set_pd(1., -1.5);
let b: u32 = 9;
let r = _mm_cvtu32_sd(a, b);
let e = _mm_set_pd(1., 9.);
assert_eq_m128d(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_comi_round_ss() {
let a = _mm_set1_ps(2.2);
let b = _mm_set1_ps(1.1);
let r = _mm_comi_round_ss(a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e: i32 = 0;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm_comi_round_sd() {
let a = _mm_set1_pd(2.2);
let b = _mm_set1_pd(1.1);
let r = _mm_comi_round_sd(a, b, 0, _MM_FROUND_CUR_DIRECTION);
let e: i32 = 0;
assert_eq!(r, e);
}
#[simd_test(enable = "avx512f")]
unsafe fn test_mm512_cvtsi512_si32() {
let a = _mm512_setr_epi32(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
let r = _mm512_cvtsi512_si32(a);
let e: i32 = 1;
assert_eq!(r, e);
}
}