Skip to content

Instantly share code, notes, and snippets.

@ned14
Last active April 6, 2020 20:28
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save ned14/fe863db40482bc4d62bd to your computer and use it in GitHub Desktop.
Save ned14/fe863db40482bc4d62bd to your computer and use it in GitHub Desktop.
More generalised C++ hamming code testing
/* hamming_test.cpp
Test generic hamming coding routines (SECDEC). These can fix 1 bit errors and detect two bit errors.
(C) 2015 Niall Douglas http://www.nedprod.com
g++ -std=c++11 -o hamming_test -O3 -fverbose-asm -Wa,-adhln hamming_test.cpp -march=ivybridge > hamming_test.S
On my 3.9Ghz Ivy Bridge Intel 3770K:
For no intrinsics:
Calculating 4Kb: 12.06Mb/sec
Fixing 4Kb: 14.98Mb/sec
With intrinsics (bitscan for set bits):
Calculating 4Kb: 73.78Mb/sec
Fixing 4Kb: 64.15Mb/sec
With tables (bitscan for set bits):
Calculating 4Kb: 82.41Mb/sec
Fixing 4Kb: 72.58Mb/sec
With prefetch of packed tables and slice by 8 with fixed bit masking:
Calculating 4Kb: 168.68Mb/sec (22 corrected cycles/byte)
Fixing 4Kb: 110.97Mb/sec
With prefetch of packed tables and slice by 8 with fixed bit masking and OpenMP:
Calculating 4Kb: 683Mb/sec
Fixing 4Kb: 383Mb/sec
*/
#include <atomic>
#include <iostream>
#include <random>
#include <vector>
#include <chrono>
#include <stdexcept>
#include <string.h>
#define USE_INTRINSICS
/*! \class secdec_ecc
\brief Calculates the single error correcting double error detecting (SECDEC) Hamming Error Correcting Code for a \em blocksize block of bytes.
After construction, no state is modified and therefore this class is safe for static storage.
*/
template<size_t blocksize> class secdec_ecc
{
public:
typedef unsigned int result_type; //!< The largest ECC which can be calculated
private:
static constexpr size_t bits_per_byte=8;
typedef unsigned char unit_type; // The batch unit of processing
result_type bitsvalid;
// Many CPUs (x86) are slow doing variable bit shifts, so keep a table
result_type ecc_twospowers[sizeof(result_type)*bits_per_byte];
unsigned short ecc_table[blocksize*bits_per_byte];
static bool _is_single_bit_set(unsigned long long x)
{
#ifdef USE_INTRINSICS
return __builtin_popcountl(x)==1;
#else
x -= (x >> 1) & 0x5555555555555555ULL;
x = (x & 0x3333333333333333ULL) + ((x >> 2) & 0x3333333333333333ULL);
x = (x + (x >> 4)) & 0x0f0f0f0f0f0f0f0fULL;
unsigned long long count=(x * 0x0101010101010101ULL)>>56;
return count==1;
#endif
}
public:
//! Constructs an instance, configuring the necessary lookup tables
secdec_ecc()
{
for(size_t n=0; n<sizeof(result_type)*bits_per_byte; n++)
ecc_twospowers[n]=((result_type)1<<n);
result_type length=blocksize*8;
// This is (data bits + parity bits + 1) <= 2^(parity bits)
for(result_type p=1; p<sizeof(result_type)*bits_per_byte; p++)
if((length+p+1)<=ecc_twospowers[p])
{
bitsvalid=p;
break;
}
if((7+bitsvalid)/8>sizeof(result_type))
throw std::runtime_error("ECC would exceed the size of result_type!");
for(result_type i=0; i<blocksize*bits_per_byte; i++)
{
// Make a code bit
result_type b=i+1;
#ifdef USE_INTRINSICS
result_type topbit=8*sizeof(result_type)-__builtin_clz(b);
b+=topbit;
if(b>=ecc_twospowers[topbit]) b++;
//while(b>ecc_twospowers(_topbit+1)) _topbit++;
//b+=_topbit;
//if(b>=ecc_twospowers(_topbit)) b++;
#else
for(size_t p=0; ecc_twospowers[p]<(b+1); p++)
b++;
#endif
ecc_table[i]=b;
if(b>(unsigned short)-1)
throw std::runtime_error("Precalculated table has exceeded its bounds");
}
}
//! The number of bits valid in result_type
size_t result_bits_valid() const noexcept
{
return bitsvalid;
}
//! Accumulate ECC from buffer
result_type operator()(result_type ecc, const char *buffer) const noexcept
{
#if 1
// Process in lumps of eight
const unit_type *_buffer=(const unit_type *) buffer;
#pragma omp parallel for reduction(^:ecc)
for(size_t i=0; i<blocksize; i+=sizeof(unit_type)*8)
{
union { unsigned long long v; unit_type c[8]; };
result_type prefetch[8];
v=*(unsigned long long *)(&_buffer[0+i/sizeof(unit_type)]); // min 1 cycle
#define ROUND(n) \
prefetch[0]=ecc_table[(i+0)*8+n]; \
prefetch[1]=ecc_table[(i+1)*8+n]; \
prefetch[2]=ecc_table[(i+2)*8+n]; \
prefetch[3]=ecc_table[(i+3)*8+n]; \
prefetch[4]=ecc_table[(i+4)*8+n]; \
prefetch[5]=ecc_table[(i+5)*8+n]; \
prefetch[6]=ecc_table[(i+6)*8+n]; \
prefetch[7]=ecc_table[(i+7)*8+n]; \
if(c[0]&((unit_type)1<<n))\
ecc^=prefetch[0];\
if(c[1]&((unit_type)1<<n))\
ecc^=prefetch[1];\
if(c[2]&((unit_type)1<<n))\
ecc^=prefetch[2];\
if(c[3]&((unit_type)1<<n))\
ecc^=prefetch[3];\
if(c[4]&((unit_type)1<<n))\
ecc^=prefetch[4];\
if(c[5]&((unit_type)1<<n))\
ecc^=prefetch[5];\
if(c[6]&((unit_type)1<<n))\
ecc^=prefetch[6];\
if(c[7]&((unit_type)1<<n))\
ecc^=prefetch[7];
ROUND(0) // prefetch = min 8, bit test and xor = min 16, total = 24
ROUND(1)
ROUND(2)
ROUND(3)
ROUND(4)
ROUND(5)
ROUND(6)
ROUND(7)
#undef ROUND // total should be 1+(8*24/3)=65
}
return ecc;
#else
const unit_type *_buffer=(const unit_type *) buffer;
#pragma omp parallel for reduction(^:ecc)
for(size_t i=0; i<blocksize; i+=sizeof(unit_type))
{
unit_type c=_buffer[i/sizeof(unit_type)];
if(!c)
continue;
result_type prefetch[bits_per_byte*sizeof(unit_type)];
// for(size_t n=0; n<bits_per_byte*sizeof(unit_type); n++)
// prefetch[n]=ecc_table[i*8+n];
#define ROUND(n) \
prefetch[n]=ecc_table[i*8+n];
ROUND(0)
ROUND(1)
ROUND(2)
ROUND(3)
ROUND(4)
ROUND(5)
ROUND(6)
ROUND(7)
#undef ROUND
// for(size_t n=0; n<bits_per_byte*sizeof(unit_type); n++)
// {
// if(c&((unit_type)1<<n))
// ecc^=prefetch[n];
// }
#define ROUND(n) \
if(c&((unit_type)1<<n))\
ecc^=prefetch[n];
ROUND(0)
ROUND(1)
ROUND(2)
ROUND(3)
ROUND(4)
ROUND(5)
ROUND(6)
ROUND(7)
#undef ROUND
}
return ecc;
#endif
}
result_type operator()(const char *buffer) const noexcept { return (*this)(0, buffer); }
//! Accumulate ECC from partial buffer where \em length <= \em blocksize
result_type operator()(result_type ecc, const char *buffer, size_t length) const noexcept
{
const unit_type *_buffer=(const unit_type *) buffer;
#pragma omp parallel for reduction(^:ecc)
for(size_t i=0; i<length; i+=sizeof(unit_type))
{
unit_type c=_buffer[i/sizeof(unit_type)]; // min 1 cycle
if(!c) // min 1 cycle
continue;
char bitset[bits_per_byte*sizeof(unit_type)];
// Most compilers will roll this out
for(size_t n=0; n<bits_per_byte*sizeof(unit_type); n++) // min 16 cycles
bitset[n]=!!(c&((unit_type)1<<n));
result_type localecc=0;
for(size_t n=0; n<bits_per_byte*sizeof(unit_type); n++)
{
if(bitset[n]) // min 8 cycles
localecc^=ecc_table[i*8+n]; // min 16 cycles
}
ecc^=localecc; // min 1 cycle. Total cycles = min 43 cycles/byte
}
return ecc;
}
result_type operator()(const char *buffer, size_t length) const noexcept { return (*this)(0, buffer, length); }
//! Given the original ECC and the new ECC for a buffer, find the bad bit. Return (size_t)-1 if not found (e.g. ECC corrupt)
size_t find_bad_bit(result_type good_ecc, result_type bad_ecc) const noexcept
{
result_type length=blocksize*8, eccdiff=good_ecc^bad_ecc;
if(_is_single_bit_set(eccdiff))
return (size_t)-1;
for(result_type i=0, b=1; i<length; i++, b++)
{
// Skip parity bits
while(_is_single_bit_set(b))
b++;
if(b==eccdiff)
return i;
}
return (size_t)-1;
}
//! Verifies and heals when possible a buffer, returning non zero if the buffer is error free
int verify(char *buffer, result_type good_ecc) const noexcept
{
result_type this_ecc=(*this)(0, buffer);
if(this_ecc==good_ecc)
return 1; // no errors
size_t badbit=find_bad_bit(good_ecc, this_ecc);
if((size_t)-1==badbit)
return 0; // parity corrupt?
buffer[badbit/8]^=(unsigned char) ecc_twospowers[badbit%8];
this_ecc=(*this)(0, buffer);
if(this_ecc==good_ecc)
return 2; // error healed
// Put the bit back
buffer[badbit/8]^=(unsigned char) ecc_twospowers[badbit%8];
return 0; // more than one bit was corrupt
}
//! Verifies and heals when possible a partial buffer, returning non zero if the buffer is error free
int verify(char *buffer, size_t length, result_type good_ecc) const noexcept
{
result_type this_ecc=(*this)(0, buffer, length);
if(this_ecc==good_ecc)
return 1; // no errors
size_t badbit=find_bad_bit(good_ecc, this_ecc);
if((size_t)-1==badbit)
return 0; // parity corrupt?
buffer[badbit/8]^=(unsigned char) ecc_twospowers[badbit%8];
this_ecc=(*this)(0, buffer, length);
if(this_ecc==good_ecc)
return 2; // error healed
// Put the bit back
buffer[badbit/8]^=(unsigned char) ecc_twospowers[badbit%8];
return 0; // more than one bit was corrupt
}
};
int main(int argc, const char *argv[])
{
int errcode=0;
const size_t bytes=4096;
std::vector<char> buffer(bytes);
{
std::random_device r;
std::random_device::result_type *b=(std::random_device::result_type *) buffer.data();
for(size_t n=0; n<buffer.size()/sizeof(*b); n++)
b[n]=r();
}
secdec_ecc<4096> engine;
size_t eccbits=engine.result_bits_valid();
std::cout << "ECC will be " << eccbits << " bits long" << std::endl;
size_t ecc=engine(buffer.data());
std::cout << "ECC was calculated to be " << std::hex << ecc << std::dec << std::endl;
auto end=std::chrono::high_resolution_clock::now(), begin=std::chrono::high_resolution_clock::now();
unsigned long long _begin=__rdtsc();
auto diff=std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1, 1>>>(end-begin);
#if 0
do
{
end=std::chrono::high_resolution_clock::now();
} while(std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1, 1>>>(end-begin).count()<1);
unsigned long long _end=__rdtsc();
std::cout << "There are " << (_end-_begin) << " TSCs in 1 second." << std::endl;
#endif
std::cout << "Flipping every bit in the buffer to see if it is correctly detected ..." << std::endl;
begin=std::chrono::high_resolution_clock::now();
for(size_t toflip=0; toflip<bytes*8; toflip++)
{
buffer[toflip/8]^=((size_t)1<<(toflip%8));
size_t newecc=engine(buffer.data());
if(ecc==newecc)
{
std::cerr << "ERROR: Flipping bit " << toflip << " not detected!" << std::endl;
errcode=2;
}
else
{
size_t badbit=engine.find_bad_bit(ecc, newecc);
if(badbit!=toflip)
{
std::cerr << "ERROR: Bad bit " << badbit << " is not the bit " << toflip << " we flipped!" << std::endl;
errcode=2;
}
// else
// std::cout << "SUCCESS: Bit flip " << toflip << " correctly detected" << std::endl;
}
if(2!=engine.verify(buffer.data(), ecc))
{
std::cerr << "ERROR: verify() did not heal the buffer!" << std::endl;
errcode=2;
}
}
end=std::chrono::high_resolution_clock::now();
diff=std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1, 1>>>(end-begin);
std::cout << "Checking and fixing is approximately " << (bytes*10000/diff.count()/1024/1024) << " Mb/sec" << std::endl;
std::cout << "\nFlipping two bits in the buffer to see if it is correctly detected ..." << std::endl;
buffer[0]^=1;
begin=std::chrono::high_resolution_clock::now();
for(size_t toflip=1; toflip<bytes*8; toflip++)
{
buffer[toflip/8]^=((size_t)1<<(toflip%8));
size_t newecc=engine(buffer.data());
if(ecc==newecc)
{
std::cerr << "ERROR: Flipping bits 0 and " << toflip << " not detected!" << std::endl;
errcode=3;
}
// else
// {
// std::cout << "SUCCESS: Flipping bits 0 and " << toflip << " correctly detected" << std::endl;
// }
// if(0!=engine.verify(buffer.data(), ecc))
// {
// std::cerr << "ERROR: verify() thought it healed the buffer!" << std::endl;
// errcode=2;
// }
buffer[toflip/8]^=((size_t)1<<(toflip%8));
}
end=std::chrono::high_resolution_clock::now();
diff=std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1, 1>>>(end-begin);
std::cout << "Calculating is approximately " << (bytes*10000/diff.count()/1024/1024) << " Mb/sec" << std::endl;
std::cout << "\nCalculating speeds ..." << std::endl;
size_t foo=0;
begin=std::chrono::high_resolution_clock::now();
_begin=__rdtsc();
for(size_t n=0; n<10000; n++)
{
buffer[0]=(char)n;
foo+=engine(buffer.data());
}
_end=__rdtsc();
end=std::chrono::high_resolution_clock::now();
diff=std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1, 1>>>(end-begin);
if(foo)
std::cout << "Calculating is approximately " << (bytes*10000/diff.count()/1024/1024) << " Mb/sec, or " << ((_end-_begin)/10000.0/4096) << " cycles/byte" << std::endl;
foo=0;
begin=std::chrono::high_resolution_clock::now();
for(size_t n=0; n<10000; n++)
{
buffer[0]=(char)n;
foo+=engine.verify(buffer.data(), ecc);
}
end=std::chrono::high_resolution_clock::now();
diff=std::chrono::duration_cast<std::chrono::duration<double, std::ratio<1, 1>>>(end-begin);
if(foo)
std::cout << "Checking and fixing is approximately " << (bytes*10000/diff.count()/1024/1024) << " Mb/sec" << std::endl;
return errcode;
}
@saruque
Copy link

saruque commented Jul 31, 2019

I found this one: Hamming Code in C++

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment