Last active
April 8, 2017 08:32
-
-
Save MaverickTse/b78eff8fcc70962e0ee7a21b985bbaa9 to your computer and use it in GitHub Desktop.
Calculating Japan's My Number check digit using AVX intrinsics
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <string> | |
#include <immintrin.h> //AVX | |
#include <tmmintrin.h> //SSSE3 | |
#include <iostream> | |
#include <stdexcept> | |
#include <array> | |
/// Function to calculate check digit with AVX2 intrinsics | |
/// parm: a 11 digit number as string | |
/// ret: an unsigned integer 0~11 | |
unsigned char get_check_digit_avx2(std::string& query) | |
{ | |
if (query.length() != 11) | |
{ | |
std::cerr << "Query string is not 11 characters" << std::endl; | |
return 0xFF; | |
} | |
unsigned long long as_value{ 0 }; | |
try | |
{ | |
as_value = std::stoull(query); | |
} | |
catch (std::invalid_argument e) | |
{ | |
std::cerr << "Query string is not a valid number" << std::endl; | |
return 0xFF; | |
} | |
catch (std::exception all) | |
{ | |
std::cerr << "Error converting string to number." << std::endl; | |
std::cerr << all.what() << std::endl; | |
return 0xFF; | |
} | |
std::array<unsigned char, 32> digits{0}; | |
std::array<short, 16> simd_result{ 0 }; // the 16bit intermediate results from SIMD | |
for (int i = 0; i < 11; ++i) | |
{ | |
digits[i] = as_value % 10; | |
as_value /= 10; | |
} | |
// Note: the least significant digit is now the 1st item of array | |
// Fire up SIMD for calculation | |
// load P from array | |
__m256i vP = _mm256_loadu_si256(reinterpret_cast<const __m256i*> (digits.data())); | |
// Set Q, beware of param order | |
__m256i vQ = _mm256_set_epi8(0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 7, 6, 5, 4, 3, 2); | |
// Multiply-add vP and vQ | |
__m256i vR = _mm256_maddubs_epi16(vP, vQ); | |
// Store vR | |
_mm256_storeu_si256(reinterpret_cast<__m256i*>(simd_result.data()), vR); | |
// our result | |
int result{ 0 }; | |
//for (auto v : simd_result) | |
//{ | |
// result += v; | |
//} | |
for (int i = 0; i < 6; ++i) | |
{ | |
result += simd_result[i]; | |
} | |
result %= 11; | |
if (result <= 1) | |
{ | |
return 0; | |
} | |
result = 11 - result; | |
return static_cast<unsigned char>(result); | |
} | |
/// Function to calculate check digit with SSE3 intrinsics | |
/// parm: a 11 digit number as string | |
/// ret: an unsigned integer 0~11 | |
unsigned char get_check_digit_ssse3(std::string& query) | |
{ | |
if (query.length() != 11) | |
{ | |
std::cerr << "Query string is not 11 characters" << std::endl; | |
return 0xFF; | |
} | |
unsigned long long as_value{ 0 }; | |
try | |
{ | |
as_value = std::stoull(query); | |
} | |
catch (std::invalid_argument e) | |
{ | |
std::cerr << "Query string is not a valid number" << std::endl; | |
return 0xFF; | |
} | |
catch (std::exception all) | |
{ | |
std::cerr << "Error converting string to number." << std::endl; | |
std::cerr << all.what() << std::endl; | |
return 0xFF; | |
} | |
std::array<unsigned char, 16> digits{ 0 }; | |
std::array<short, 16> simd_result{ 0 }; // the 16bit intermediate results from SIMD | |
for (int i = 0; i < 11; ++i) | |
{ | |
digits[i] = as_value % 10; | |
as_value /= 10; | |
} | |
// Note: the least significant digit is now the 1st item of array | |
// Fire up SIMD for calculation | |
// load P from array | |
__m128i vP = _mm_loadu_si128(reinterpret_cast<const __m128i*> (digits.data())); | |
// Set Q, beware of order | |
__m128i vQ = _mm_set_epi8(0, 0, 0, 0, 0, 6, 5, 4, 3, 2, 7, 6, 5, 4, 3, 2); | |
// Multiply-add vP and vQ | |
__m128i vR = _mm_maddubs_epi16(vP, vQ); | |
// Store vR | |
_mm_storeu_si128(reinterpret_cast<__m128i*>(simd_result.data()), vR); | |
// our result | |
int result{ 0 }; | |
//for (auto v : simd_result) | |
//{ | |
// result += v; | |
//} | |
for (int i = 0; i < 6; ++i) | |
{ | |
result += simd_result[i]; | |
} | |
result %= 11; | |
if (result <= 1) | |
{ | |
return 0; | |
} | |
result = 11 - result; | |
return static_cast<unsigned char>(result); | |
} | |
/// The main() entry point | |
int main() | |
{ | |
std::string foo{ "12345678901" }; | |
//by hand calculation | |
// 1*2 + 0*3 + 9*4 + 8*5 + 7*6 + 6*7 | |
// + 5*2 + 4*3 + 3*4 + 2*5 + 1*6 | |
// = 2 + 0 + 36 + 40 + 42 + 42 + 10 + 12 + 12 + 10 + 6 | |
// = 2 + 76 + 84 + 22 + 22 +6 | |
// = 212 | |
// 212 % 11 = 3 | |
// 11 -3 = 8 | |
std::cout << "Test number: " << foo << std::endl; | |
std::cout << "Expected: 8" << std::endl; | |
std::cout << "Calculated SSSE3: " << static_cast<int>(get_check_digit_ssse3(foo)) << std::endl; | |
std::cout << "Calculated AVX2: " << static_cast<int>(get_check_digit_avx2(foo)) << std::endl; | |
return 0; | |
} |
Calculation method follow this article (in Japanese):
http://qiita.com/yumetodo/items/600ca0df422010cbc4c1
I don't have real MyNumber ID samples so I can't really test for validity of calculation
In rev1 and 2, there is a logical error concerning the case of "mod<=1". Actually, the function should directly return Zero, not 11-0 -> 11.
The number parsing here using a plain stoull() for sake of easier understanding. It is SLOW however. There is a hack using intrinsics for MUCH faster number parsing, see
https://github.com/yumetodo/benchmark_calc_check_degit/blob/master/benchmark_calc_check_degit/Source.cpp
for reference.
The intrinsics way make latter processing confusing, be warned.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
in the first revision, the values of Q are loaded from memory with "loadu", while in the second, just set the values directly with "set_epi8". This should spare a step of memory allocation.
In the final summation step, actually the first 6 short integers would be useful, the rest are zero.