Skip to content

Commit

Permalink
use floating point instructions for f32/f64 masked loads
Browse files Browse the repository at this point in the history
  • Loading branch information
sarah committed May 22, 2024
1 parent b1cf582 commit dff8107
Show file tree
Hide file tree
Showing 2 changed files with 123 additions and 1 deletion.
2 changes: 1 addition & 1 deletion pulp/Cargo.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[package]
name = "pulp"
version = "0.18.13"
version = "0.18.14"
edition = "2021"
authors = ["sarah <>"]
description = "Safe generic simd"
Expand Down
122 changes: 122 additions & 0 deletions pulp/src/x86.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1888,6 +1888,22 @@ impl Simd for V3 {
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn f32s_mask_load_ptr(
self,
mask: Self::m32s,
ptr: *const f32,
or: Self::f32s,
) -> Self::f32s {
self.m32s_select_f32s(
mask,
transmute(_mm256_maskload_ps(ptr as _, transmute(mask))),
or,
)
}
/// # Safety
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn c32s_mask_load_ptr(
self,
mask: Self::m32s,
Expand Down Expand Up @@ -1920,6 +1936,22 @@ impl Simd for V3 {
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn f64s_mask_load_ptr(
self,
mask: Self::m64s,
ptr: *const f64,
or: Self::f64s,
) -> Self::f64s {
self.m64s_select_f64s(
mask,
transmute(_mm256_maskload_pd(ptr as _, transmute(mask))),
or,
)
}
/// # Safety
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn c64s_mask_load_ptr(
self,
mask: Self::m64s,
Expand All @@ -1944,6 +1976,13 @@ impl Simd for V3 {
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn f32s_mask_store_ptr(self, mask: Self::m32s, ptr: *mut f32, values: Self::f32s) {
_mm256_maskstore_ps(ptr as *mut f32, transmute(mask), transmute(values));
}
/// # Safety
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn c32s_mask_store_ptr(self, mask: Self::m32s, ptr: *mut c32, values: Self::c32s) {
_mm256_maskstore_ps(ptr as *mut f32, transmute(mask), transmute(values));
}
Expand All @@ -1958,6 +1997,13 @@ impl Simd for V3 {
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn f64s_mask_store_ptr(self, mask: Self::m64s, ptr: *mut f64, values: Self::f64s) {
_mm256_maskstore_pd(ptr as *mut f64, transmute(mask), transmute(values));
}
/// # Safety
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn c64s_mask_store_ptr(self, mask: Self::m64s, ptr: *mut c64, values: Self::c64s) {
_mm256_maskstore_pd(ptr as *mut f64, transmute(mask), transmute(values));
}
Expand Down Expand Up @@ -2801,6 +2847,18 @@ impl Simd for V4 {
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn f32s_mask_load_ptr(
self,
mask: Self::m32s,
ptr: *const f32,
or: Self::f32s,
) -> Self::f32s {
transmute(_mm512_mask_loadu_ps(transmute(or), mask.0, ptr as _))
}
/// # Safety
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn c32s_mask_load_ptr(
self,
mask: Self::m32s,
Expand All @@ -2825,6 +2883,18 @@ impl Simd for V4 {
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn f64s_mask_load_ptr(
self,
mask: Self::m64s,
ptr: *const f64,
or: Self::f64s,
) -> Self::f64s {
transmute(_mm512_mask_loadu_pd(transmute(or), mask.0, ptr as _))
}
/// # Safety
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn c64s_mask_load_ptr(
self,
mask: Self::m64s,
Expand All @@ -2845,6 +2915,13 @@ impl Simd for V4 {
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn f32s_mask_store_ptr(self, mask: Self::m32s, ptr: *mut f32, values: Self::f32s) {
_mm512_mask_storeu_ps(ptr as *mut f32, mask.0, transmute(values));
}
/// # Safety
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn c32s_mask_store_ptr(self, mask: Self::m32s, ptr: *mut c32, values: Self::c32s) {
_mm512_mask_storeu_ps(ptr as *mut f32, mask.0, transmute(values));
}
Expand All @@ -2859,6 +2936,13 @@ impl Simd for V4 {
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn f64s_mask_store_ptr(self, mask: Self::m64s, ptr: *mut f64, values: Self::f64s) {
_mm512_mask_storeu_pd(ptr as *mut f64, mask.0, transmute(values));
}
/// # Safety
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn c64s_mask_store_ptr(self, mask: Self::m64s, ptr: *mut c64, values: Self::c64s) {
_mm512_mask_storeu_pd(ptr as *mut f64, mask.0, transmute(values));
}
Expand Down Expand Up @@ -3577,6 +3661,18 @@ impl Simd for V4_256 {
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn f32s_mask_load_ptr(
self,
mask: Self::m32s,
ptr: *const f32,
or: Self::f32s,
) -> Self::f32s {
transmute(_mm256_mask_loadu_ps(transmute(or), mask.0, ptr as _))
}
/// # Safety
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn c32s_mask_load_ptr(
self,
mask: Self::m32s,
Expand All @@ -3601,6 +3697,18 @@ impl Simd for V4_256 {
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn f64s_mask_load_ptr(
self,
mask: Self::m64s,
ptr: *const f64,
or: Self::f64s,
) -> Self::f64s {
transmute(_mm256_mask_loadu_pd(transmute(or), mask.0, ptr as _))
}
/// # Safety
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn c64s_mask_load_ptr(
self,
mask: Self::m64s,
Expand All @@ -3621,6 +3729,13 @@ impl Simd for V4_256 {
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn f32s_mask_store_ptr(self, mask: Self::m32s, ptr: *mut f32, values: Self::f32s) {
_mm256_mask_storeu_ps(ptr as *mut f32, mask.0, transmute(values));
}
/// # Safety
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn c32s_mask_store_ptr(self, mask: Self::m32s, ptr: *mut c32, values: Self::c32s) {
_mm256_mask_storeu_ps(ptr as *mut f32, mask.0, transmute(values));
}
Expand All @@ -3635,6 +3750,13 @@ impl Simd for V4_256 {
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn f64s_mask_store_ptr(self, mask: Self::m64s, ptr: *mut f64, values: Self::f64s) {
_mm256_mask_storeu_pd(ptr as *mut f64, mask.0, transmute(values));
}
/// # Safety
///
/// See the trait-level safety documentation.
#[inline(always)]
unsafe fn c64s_mask_store_ptr(self, mask: Self::m64s, ptr: *mut c64, values: Self::c64s) {
_mm256_mask_storeu_pd(ptr as *mut f64, mask.0, transmute(values));
}
Expand Down

0 comments on commit dff8107

Please sign in to comment.