Created
March 21, 2018 14:00
-
-
Save alexcrichton/943a64e12e3affd7a40aa7eeb0775bd2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
commit 56d3a103b5a68ea876d8c5a4c7eef53d45cf6e38 | |
Author: Alex Crichton <alex@alexcrichton.com> | |
Date: Mon Mar 12 14:56:10 2018 -0700 | |
Add x86 SHA intrinsics | |
Binding [these] intrinsics! | |
[these]: https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=SHA | |
diff --git a/coresimd/x86/mod.rs b/coresimd/x86/mod.rs | |
index 32915c3..a404e71 100644 | |
--- a/coresimd/x86/mod.rs | |
+++ b/coresimd/x86/mod.rs | |
@@ -609,3 +609,6 @@ pub use self::aes::*; | |
mod rdrand; | |
pub use self::rdrand::*; | |
+ | |
+mod sha; | |
+pub use self::sha::*; | |
diff --git a/coresimd/x86/sha.rs b/coresimd/x86/sha.rs | |
new file mode 100644 | |
index 0000000..268becd | |
--- /dev/null | |
+++ b/coresimd/x86/sha.rs | |
@@ -0,0 +1,106 @@ | |
+ | |
+use coresimd::simd::*; | |
+use coresimd::x86::*; | |
+use mem; | |
+ | |
+#[allow(improper_ctypes)] | |
+extern { | |
+ #[link_name = "llvm.x86.sha1msg1"] | |
+ fn sha1msg1(a: i32x4, b: i32x4) -> i32x4; | |
+ #[link_name = "llvm.x86.sha1msg2"] | |
+ fn sha1msg2(a: i32x4, b: i32x4) -> i32x4; | |
+ #[link_name = "llvm.x86.sha1nexte"] | |
+ fn sha1nexte(a: i32x4, b: i32x4) -> i32x4; | |
+ #[link_name = "llvm.x86.sha1rnds4"] | |
+ fn sha1rnds4(a: i32x4, b: i32x4, c: i8) -> i32x4; | |
+ #[link_name = "llvm.x86.sha256msg1"] | |
+ fn sha256msg1(a: i32x4, b: i32x4) -> i32x4; | |
+ #[link_name = "llvm.x86.sha256msg2"] | |
+ fn sha256msg2(a: i32x4, b: i32x4) -> i32x4; | |
+ #[link_name = "llvm.x86.sha256rnds2"] | |
+ fn sha256rnds2(a: i32x4, b: i32x4, k: i32x4) -> i32x4; | |
+} | |
+ | |
+#[cfg(test)] | |
+use stdsimd_test::assert_instr; | |
+ | |
+/// Perform an intermediate calculation for the next four SHA1 message values | |
+/// (unsigned 32-bit integers) using previous message values from `a` and `b`, | |
+/// and returning the result. | |
+#[inline] | |
+#[target_feature = "+sha"] | |
+#[cfg_attr(test, assert_instr(sha1msg1))] | |
+pub unsafe fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i { | |
+ mem::transmute(sha1msg1(a.as_i32x4(), b.as_i32x4())) | |
+} | |
+ | |
+/// Perform the final calculation for the next four SHA1 message values | |
+/// (unsigned 32-bit integers) using the intermediate result in `a` and the | |
+/// previous message values in `b`, and returns the result. | |
+#[inline] | |
+#[target_feature = "+sha"] | |
+#[cfg_attr(test, assert_instr(sha1msg2))] | |
+pub unsafe fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i { | |
+ mem::transmute(sha1msg2(a.as_i32x4(), b.as_i32x4())) | |
+} | |
+ | |
+/// Calculate SHA1 state variable E after four rounds of operation from the | |
+/// current SHA1 state variable `a`, add that value to the scheduled values | |
+/// (unsigned 32-bit integers) in `b`, and returns the result. | |
+#[inline] | |
+#[target_feature = "+sha"] | |
+#[cfg_attr(test, assert_instr(sha1nexte))] | |
+pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i { | |
+ mem::transmute(sha1nexte(a.as_i32x4(), b.as_i32x4())) | |
+} | |
+ | |
+/// Perform four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D) | |
+/// from `a` and some pre-computed sum of the next 4 round message values | |
+/// (unsigned 32-bit integers), and state variable E from `b`, and return the | |
+/// updated SHA1 state (A,B,C,D). `func` contains the logic functions and round | |
+/// constants. | |
+#[inline] | |
+#[target_feature = "+sha"] | |
+#[cfg_attr(test, assert_instr(sha1rnds4, func = 0))] | |
+#[rustc_args_required_const(2)] | |
+pub unsafe fn _mm_sha1rnds4_epu32(a: __m128i, b: __m128i, func: i32) -> __m128i { | |
+ let a = a.as_i32x4(); | |
+ let b = b.as_i32x4(); | |
+ macro_rules! call { | |
+ ($imm2:expr) => { sha1rnds4(a, b, $imm2) } | |
+ } | |
+ let ret = constify_imm2!(func, call); | |
+ mem::transmute(ret) | |
+} | |
+ | |
+/// Perform an intermediate calculation for the next four SHA256 message values | |
+/// (unsigned 32-bit integers) using previous message values from `a` and `b`, | |
+/// and return the result. | |
+#[inline] | |
+#[target_feature = "+sha"] | |
+#[cfg_attr(test, assert_instr(sha256msg1))] | |
+pub unsafe fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i { | |
+ mem::transmute(sha256msg1(a.as_i32x4(), b.as_i32x4())) | |
+} | |
+ | |
+/// Perform the final calculation for the next four SHA256 message values | |
+/// (unsigned 32-bit integers) using previous message values from `a` and `b`, | |
+/// and return the result. | |
+#[inline] | |
+#[target_feature = "+sha"] | |
+#[cfg_attr(test, assert_instr(sha256msg2))] | |
+pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i { | |
+ mem::transmute(sha256msg2(a.as_i32x4(), b.as_i32x4())) | |
+} | |
+ | |
+/// Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H) | |
+/// from `a`, an initial SHA256 state (A,B,E,F) from `b`, and a pre-computed sum | |
+/// of the next 2 round message values (unsigned 32-bit integers) and the | |
+/// corresponding round constants from `k`, and store the updated SHA256 state | |
+/// (A,B,E,F) in dst. | |
+#[inline] | |
+#[target_feature = "+sha"] | |
+#[cfg_attr(test, assert_instr(sha256rnds2))] | |
+pub unsafe fn _mm_sha256rnds2_epu32 (a: __m128i, b: __m128i, k: __m128i) -> __m128i { | |
+ mem::transmute(sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4())) | |
+} | |
diff --git a/stdsimd/arch/detect/x86.rs b/stdsimd/arch/detect/x86.rs | |
index 772002b..19e7073 100644 | |
--- a/stdsimd/arch/detect/x86.rs | |
+++ b/stdsimd/arch/detect/x86.rs | |
@@ -73,6 +73,10 @@ macro_rules! is_x86_feature_detected { | |
cfg!(target_feature = "sse4a") || $crate::arch::detect::check_for( | |
$crate::arch::detect::Feature::sse4a) | |
}; | |
+ ("sha") => { | |
+ cfg!(target_feature = "sha") || $crate::arch::detect::check_for( | |
+ $crate::arch::detect::Feature::sha) | |
+ }; | |
("avx") => { | |
cfg!(target_feature = "avx") || $crate::arch::detect::check_for( | |
$crate::arch::detect::Feature::avx) | |
@@ -256,6 +260,8 @@ pub enum Feature { | |
xsaves, | |
/// XSAVEC (Save Processor Extended States Compacted) | |
xsavec, | |
+ /// SHA | |
+ sha, | |
} | |
/// Run-time feature detection on x86 works by using the CPUID instruction. | |
@@ -373,6 +379,7 @@ pub fn detect_features() -> cache::Initializer { | |
enable(extended_features_ebx, 3, Feature::bmi); | |
enable(extended_features_ebx, 8, Feature::bmi2); | |
+ enable(extended_features_ebx, 29, Feature::sha); | |
// `XSAVE` and `AVX` support: | |
let cpu_xsave = bit::test(proc_info_ecx as usize, 26); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment