Skip to content

Instantly share code, notes, and snippets.

@alexcrichton
Created March 21, 2018 14:00
Show Gist options
  • Save alexcrichton/943a64e12e3affd7a40aa7eeb0775bd2 to your computer and use it in GitHub Desktop.
Save alexcrichton/943a64e12e3affd7a40aa7eeb0775bd2 to your computer and use it in GitHub Desktop.
commit 56d3a103b5a68ea876d8c5a4c7eef53d45cf6e38
Author: Alex Crichton <alex@alexcrichton.com>
Date: Mon Mar 12 14:56:10 2018 -0700
Add x86 SHA intrinsics
Binding [these] intrinsics!
[these]: https://software.intel.com/sites/landingpage/IntrinsicsGuide/#othertechs=SHA
diff --git a/coresimd/x86/mod.rs b/coresimd/x86/mod.rs
index 32915c3..a404e71 100644
--- a/coresimd/x86/mod.rs
+++ b/coresimd/x86/mod.rs
@@ -609,3 +609,6 @@ pub use self::aes::*;
mod rdrand;
pub use self::rdrand::*;
+
+mod sha;
+pub use self::sha::*;
diff --git a/coresimd/x86/sha.rs b/coresimd/x86/sha.rs
new file mode 100644
index 0000000..268becd
--- /dev/null
+++ b/coresimd/x86/sha.rs
@@ -0,0 +1,106 @@
+
+use coresimd::simd::*;
+use coresimd::x86::*;
+use mem;
+
+#[allow(improper_ctypes)]
+extern {
+ #[link_name = "llvm.x86.sha1msg1"]
+ fn sha1msg1(a: i32x4, b: i32x4) -> i32x4;
+ #[link_name = "llvm.x86.sha1msg2"]
+ fn sha1msg2(a: i32x4, b: i32x4) -> i32x4;
+ #[link_name = "llvm.x86.sha1nexte"]
+ fn sha1nexte(a: i32x4, b: i32x4) -> i32x4;
+ #[link_name = "llvm.x86.sha1rnds4"]
+ fn sha1rnds4(a: i32x4, b: i32x4, c: i8) -> i32x4;
+ #[link_name = "llvm.x86.sha256msg1"]
+ fn sha256msg1(a: i32x4, b: i32x4) -> i32x4;
+ #[link_name = "llvm.x86.sha256msg2"]
+ fn sha256msg2(a: i32x4, b: i32x4) -> i32x4;
+ #[link_name = "llvm.x86.sha256rnds2"]
+ fn sha256rnds2(a: i32x4, b: i32x4, k: i32x4) -> i32x4;
+}
+
+#[cfg(test)]
+use stdsimd_test::assert_instr;
+
+/// Perform an intermediate calculation for the next four SHA1 message values
+/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
+/// and returning the result.
+#[inline]
+#[target_feature = "+sha"]
+#[cfg_attr(test, assert_instr(sha1msg1))]
+pub unsafe fn _mm_sha1msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
+ mem::transmute(sha1msg1(a.as_i32x4(), b.as_i32x4()))
+}
+
+/// Perform the final calculation for the next four SHA1 message values
+/// (unsigned 32-bit integers) using the intermediate result in `a` and the
+/// previous message values in `b`, and returns the result.
+#[inline]
+#[target_feature = "+sha"]
+#[cfg_attr(test, assert_instr(sha1msg2))]
+pub unsafe fn _mm_sha1msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
+ mem::transmute(sha1msg2(a.as_i32x4(), b.as_i32x4()))
+}
+
+/// Calculate SHA1 state variable E after four rounds of operation from the
+/// current SHA1 state variable `a`, add that value to the scheduled values
+/// (unsigned 32-bit integers) in `b`, and returns the result.
+#[inline]
+#[target_feature = "+sha"]
+#[cfg_attr(test, assert_instr(sha1nexte))]
+pub unsafe fn _mm_sha1nexte_epu32(a: __m128i, b: __m128i) -> __m128i {
+ mem::transmute(sha1nexte(a.as_i32x4(), b.as_i32x4()))
+}
+
+/// Perform four rounds of SHA1 operation using an initial SHA1 state (A,B,C,D)
+/// from `a` and some pre-computed sum of the next 4 round message values
+/// (unsigned 32-bit integers), and state variable E from `b`, and return the
+/// updated SHA1 state (A,B,C,D). `func` contains the logic functions and round
+/// constants.
+#[inline]
+#[target_feature = "+sha"]
+#[cfg_attr(test, assert_instr(sha1rnds4, func = 0))]
+#[rustc_args_required_const(2)]
+pub unsafe fn _mm_sha1rnds4_epu32(a: __m128i, b: __m128i, func: i32) -> __m128i {
+ let a = a.as_i32x4();
+ let b = b.as_i32x4();
+ macro_rules! call {
+ ($imm2:expr) => { sha1rnds4(a, b, $imm2) }
+ }
+ let ret = constify_imm2!(func, call);
+ mem::transmute(ret)
+}
+
+/// Perform an intermediate calculation for the next four SHA256 message values
+/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
+/// and return the result.
+#[inline]
+#[target_feature = "+sha"]
+#[cfg_attr(test, assert_instr(sha256msg1))]
+pub unsafe fn _mm_sha256msg1_epu32(a: __m128i, b: __m128i) -> __m128i {
+ mem::transmute(sha256msg1(a.as_i32x4(), b.as_i32x4()))
+}
+
+/// Perform the final calculation for the next four SHA256 message values
+/// (unsigned 32-bit integers) using previous message values from `a` and `b`,
+/// and return the result.
+#[inline]
+#[target_feature = "+sha"]
+#[cfg_attr(test, assert_instr(sha256msg2))]
+pub unsafe fn _mm_sha256msg2_epu32(a: __m128i, b: __m128i) -> __m128i {
+ mem::transmute(sha256msg2(a.as_i32x4(), b.as_i32x4()))
+}
+
+/// Perform 2 rounds of SHA256 operation using an initial SHA256 state (C,D,G,H)
+/// from `a`, an initial SHA256 state (A,B,E,F) from `b`, and a pre-computed sum
+/// of the next 2 round message values (unsigned 32-bit integers) and the
+/// corresponding round constants from `k`, and store the updated SHA256 state
+/// (A,B,E,F) in dst.
+#[inline]
+#[target_feature = "+sha"]
+#[cfg_attr(test, assert_instr(sha256rnds2))]
+pub unsafe fn _mm_sha256rnds2_epu32 (a: __m128i, b: __m128i, k: __m128i) -> __m128i {
+ mem::transmute(sha256rnds2(a.as_i32x4(), b.as_i32x4(), k.as_i32x4()))
+}
diff --git a/stdsimd/arch/detect/x86.rs b/stdsimd/arch/detect/x86.rs
index 772002b..19e7073 100644
--- a/stdsimd/arch/detect/x86.rs
+++ b/stdsimd/arch/detect/x86.rs
@@ -73,6 +73,10 @@ macro_rules! is_x86_feature_detected {
cfg!(target_feature = "sse4a") || $crate::arch::detect::check_for(
$crate::arch::detect::Feature::sse4a)
};
+ ("sha") => {
+ cfg!(target_feature = "sha") || $crate::arch::detect::check_for(
+ $crate::arch::detect::Feature::sha)
+ };
("avx") => {
cfg!(target_feature = "avx") || $crate::arch::detect::check_for(
$crate::arch::detect::Feature::avx)
@@ -256,6 +260,8 @@ pub enum Feature {
xsaves,
/// XSAVEC (Save Processor Extended States Compacted)
xsavec,
+ /// SHA
+ sha,
}
/// Run-time feature detection on x86 works by using the CPUID instruction.
@@ -373,6 +379,7 @@ pub fn detect_features() -> cache::Initializer {
enable(extended_features_ebx, 3, Feature::bmi);
enable(extended_features_ebx, 8, Feature::bmi2);
+ enable(extended_features_ebx, 29, Feature::sha);
// `XSAVE` and `AVX` support:
let cpu_xsave = bit::test(proc_info_ecx as usize, 26);
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment