Skip to content

Instantly share code, notes, and snippets.

@NotKyon
Last active June 9, 2018 10:13
Show Gist options
  • Save NotKyon/a113a8fca28bb95cdc7c9a648010e48e to your computer and use it in GitHub Desktop.
Save NotKyon/a113a8fca28bb95cdc7c9a648010e48e to your computer and use it in GitHub Desktop.
#include <stdio.h>
#include <assert.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <new>
#include <thread>
#include <atomic>
#define countof(X_) (sizeof((X_))/sizeof((X_)[0]))
constexpr unsigned numKana = 46;
constexpr unsigned numYouon = 21;
constexpr unsigned kiRow = 0;
constexpr unsigned shiRow = 1;
constexpr unsigned chiRow = 2;
constexpr unsigned niRow = 3;
constexpr unsigned hiRow = 4;
constexpr unsigned miRow = 5;
constexpr unsigned riRow = 6;
constexpr unsigned yaCol = 0;
constexpr unsigned yuCol = 1;
constexpr unsigned yoCol = 2;
constexpr unsigned youonIndex( unsigned row, unsigned column ) {
return row*3 + column;
}
constexpr unsigned youonRow( unsigned index ) {
return index/3;
}
constexpr unsigned youonColumn( unsigned index ) {
return index%3;
}
constexpr unsigned youonRowValue( unsigned row ) {
constexpr unsigned indexBase = 1;
constexpr unsigned offsetToI = 1;
constexpr unsigned bias = indexBase + offsetToI;
return
row < riRow ? row*5 + bias :
row == riRow ? 38 + bias :
0;
}
constexpr unsigned youonColumnValue( unsigned column ) {
constexpr unsigned indexBase = 1;
return
35 + column + indexBase;
}
constexpr unsigned youonValue( unsigned index ) {
return
youonRowValue( youonRow(index) ) +
youonColumnValue( youonColumn(index) );
}
#pragma pack(push,1)
struct KanaInfo {
static const KanaInfo g_kanaInfo[ numKana + numYouon ];
char romaji[4];
uint8_t consonants:4;
uint8_t vowels:3;
uint8_t youon:1;
KanaInfo( const char *pszRomaji, bool isYouon = false );
unsigned sum() const;
};
#pragma pack(pop)
inline KanaInfo::KanaInfo( const char *pszRomaji, bool isYouon ) {
consonants = 0;
vowels = 0;
assert( pszRomaji != nullptr );
const size_t len = strlen( pszRomaji );
assert( len >= 1 );
assert( len <= 3 );
for( size_t i = 0; i <= len /* copy NUL char */; i += 1 ) {
const char c = pszRomaji[i];
assert( c == '\0' || ( c >= 'a' && c <= 'z' ) );
romaji[i] = c;
if( c=='a' || c=='e' || c=='i' || c=='o' || c=='u' ) {
vowels += 1;
} else if( c != '\0' ) {
consonants += 1;
}
}
youon = +isYouon;
}
inline unsigned KanaInfo::sum() const {
const unsigned index = (unsigned)ptrdiff_t( this - &g_kanaInfo[0] );
if( index < numKana ) {
return 1 + index;
}
if( index < numKana + numYouon ) {
return youonValue( index - numKana );
}
assert( false && "Unhandled case" );
return 0;
}
const KanaInfo KanaInfo::g_kanaInfo[ countof(KanaInfo::g_kanaInfo) ] = {
#define K(X_) KanaInfo((X_),false)
K( "a"), K( "i"), K( "u"), K( "e"), K( "o"),
K("ka"), K("ki"), K("ku"), K("ke"), K("ko"),
K("sa"),K("shi"), K("su"), K("se"), K("so"),
K("ta"),K("chi"),K("tsu"), K("te"), K("to"),
K("na"), K("ni"), K("nu"), K("ne"), K("no"),
K("ha"), K("hi"), K("fu"), K("he"), K("ho"),
K("ma"), K("mi"), K("mu"), K("me"), K("mo"),
K("ya"), K("yu"), K("yo"),
K("ra"), K("ri"), K("ru"), K("re"), K("ro"),
K("wa"), K("wo"),
K("n"),
#undef K
#define K(X_) KanaInfo((X_),true)
K("kya"),K("kyu"),K("kyo"),
K("sha"),K("shu"),K("sho"),
K("cha"),K("chu"),K("cho"),
K("nya"),K("nyu"),K("nyo"),
K("hya"),K("hyu"),K("hyo"),
K("mya"),K("myu"),K("myo"),
K("rya"),K("ryu"),K("ryo")
#undef K
};
enum class ConsonantLimit {
unlimited,
one
};
struct NameAnalysis {
unsigned consonants = 0;
unsigned vowels = 0;
unsigned sum = 0;
};
enum class NameTestResult {
pass,
tooFewConsonants,
tooManyConsonants,
tooFewVowels,
tooManyVowels,
smallSum,
largeSum
};
const char *nameTestResultToString( NameTestResult r ) {
switch( r ) {
#define C(X_) case NameTestResult::X_: return #X_
C(pass);
C(tooFewConsonants);
C(tooManyConsonants);
C(tooFewVowels);
C(tooManyVowels);
C(smallSum);
C(largeSum);
#undef C
}
return "\?\?\?";
}
void analyzeName( NameAnalysis &dst, const uint8_t *indices_begin, const uint8_t *indices_end, ConsonantLimit cl = ConsonantLimit::unlimited ) {
assert( indices_begin != nullptr );
assert( indices_end != nullptr );
dst = NameAnalysis();
// Calculate the sums
for( const uint8_t *p = indices_begin; p != indices_end; p += 1 ) {
assert( *p < countof(KanaInfo::g_kanaInfo) );
const KanaInfo &k = KanaInfo::g_kanaInfo[*p];
// 1 for あ, 2 for い, 3 for う, ..., 46 for ん
dst.sum += k.sum();
// Increment the number of romanized consonants
const auto effectiveLimit = k.youon ? ConsonantLimit::unlimited : cl;
switch( effectiveLimit ) {
case ConsonantLimit::unlimited:
dst.consonants += (unsigned)k.consonants;
break;
case ConsonantLimit::one:
if( k.consonants != 0 ) {
dst.consonants += 1;
}
break;
}
// Increment the number of romanized vowels
dst.vowels += (unsigned)k.vowels;
}
}
void nameIndicesToString( char *dst, size_t dstMax, const uint8_t *indices_begin, const uint8_t *indices_end, ConsonantLimit cl = ConsonantLimit::unlimited ) {
assert( indices_begin != nullptr );
assert( indices_end != nullptr );
assert( dst != nullptr );
assert( dstMax > size_t(ptrdiff_t(indices_end - indices_begin)) );
size_t i = 0;
for( const uint8_t *p = indices_begin; p != indices_end; p += 1 ) {
assert( *p < countof(KanaInfo::g_kanaInfo) );
const KanaInfo &k = KanaInfo::g_kanaInfo[*p];
const size_t len = strlen(k.romaji);
assert( i + len < dstMax );
for( size_t j = 0; j < len; j += 1 ) {
// skip middle characters if limited to one consonant
if( cl == ConsonantLimit::one && len > 2 && j > 0 && j < len - 1 && !k.youon ) {
continue;
}
dst[ i ] = k.romaji[ j ];
i += 1;
}
}
assert( i < dstMax );
dst[ i ] = '\0';
}
NameTestResult testAnalyzedName( const NameAnalysis &src ) {
const unsigned expectedConsonants = 7;
const unsigned expectedVowels = 8;
const unsigned expectedSum = 134;
if( src.sum < expectedSum ) {
return NameTestResult::smallSum;
} else if( src.sum > expectedSum ) {
return NameTestResult::largeSum;
}
if( src.consonants < expectedConsonants ) {
return NameTestResult::tooFewConsonants;
} else if( src.consonants > expectedConsonants ) {
return NameTestResult::tooManyConsonants;
}
if( src.vowels < expectedVowels ) {
return NameTestResult::tooFewVowels;
} else if( src.vowels > expectedVowels ) {
return NameTestResult::tooManyVowels;
}
return NameTestResult::pass;
}
NameTestResult testName( const uint8_t *indices_begin, const uint8_t *indices_end,
ConsonantLimit cl = ConsonantLimit::unlimited ) {
NameAnalysis na;
analyzeName( na, indices_begin, indices_end, cl );
return testAnalyzedName( na );
}
void generateName( uint64_t index, uint8_t *buffer, size_t maxBuffer,
const uint8_t *&indices_begin, const uint8_t *&indices_end ) {
assert( buffer != nullptr );
assert( maxBuffer > 16 ); // Let's be reasonable here, there shouldn't be a name larger than this that passes the test
auto n = index;
size_t i = 0;
while( i < maxBuffer ) {
buffer[i] = uint8_t( n%countof(KanaInfo::g_kanaInfo) );
i += 1;
n /= countof(KanaInfo::g_kanaInfo);
if( !n ) {
break;
}
}
indices_begin = &buffer[0];
indices_end = &buffer[i];
}
NameTestResult generateAndTestName( uint64_t index, uint8_t *buffer, size_t maxBuffer,
const uint8_t *&indices_begin, const uint8_t *&indices_end, ConsonantLimit cl = ConsonantLimit::unlimited ) {
generateName( index, buffer, maxBuffer, indices_begin, indices_end );
return testName( indices_begin, indices_end, cl );
}
void showNameDetails( uint64_t index ) {
uint8_t indbuf[ 64 ];
char strbuf[ 64 ];
const ConsonantLimit clArr[] {
ConsonantLimit::unlimited,
ConsonantLimit::one
};
for( const auto cl : clArr ) {
const uint8_t *indices_begin = nullptr;
const uint8_t *indices_end = nullptr;
NameAnalysis na;
generateName( index, indbuf, countof(indbuf), indices_begin, indices_end );
analyzeName( na, indices_begin, indices_end, cl );
const auto r = testAnalyzedName( na );
nameIndicesToString( strbuf, countof(strbuf), indices_begin, indices_end, cl );
printf(
"Name #%u mode %i: %s\n"
" Consonants: %u\n"
" Vowels: %u\n"
" Sum: %u\n"
" Result: %s (%i)\n\n",
(unsigned)index, int(cl), strbuf,
na.consonants, na.vowels, na.sum,
nameTestResultToString(r), int(r) );
}
}
void runTests( uint64_t headIndex, uint64_t tailIndex, ConsonantLimit cl = ConsonantLimit::unlimited ) {
uint8_t indbuf[ 64 ];
char strbuf[ 64 ];
for( auto index = headIndex; index <= tailIndex; index += 1 ) {
const uint8_t *indices_begin = nullptr;
const uint8_t *indices_end = nullptr;
const NameTestResult r =
generateAndTestName( index, indbuf, countof(indbuf),
indices_begin, indices_end, cl );
if( r == NameTestResult::pass ) {
nameIndicesToString( strbuf, countof(strbuf), indices_begin, indices_end, cl );
printf( "%s\n", strbuf );
fflush(nullptr);
}
}
}
uint64_t g_rangeHead = 0;
uint64_t g_rangeTail = 0;
static std::atomic<int> g_nextTaskId;
static std::atomic<uint64_t> g_nextRange;
static void thread_run_f() {
assert( g_rangeHead != 0 && g_rangeTail != 0 );
assert( g_rangeTail > g_rangeHead );
constexpr uint64_t rangeGranularity = 1'000'000'000;
const int taskId = 1 + g_nextTaskId.fetch_add( 1, std::memory_order_acq_rel );
for(;;) {
const uint64_t rangeHead = g_nextRange.fetch_add( rangeGranularity, std::memory_order_acq_rel );
const uint64_t rangeTailP = rangeHead + rangeGranularity;
const uint64_t rangeTail = rangeTailP <= g_rangeTail ? rangeTailP : g_rangeTail;
if( rangeHead >= rangeTail ) {
break;
}
printf( "[%.2i] Checking range %llu <= i <= %llu...\n", taskId, rangeHead, rangeTail );
fflush( nullptr );
runTests( rangeHead, rangeTail, ConsonantLimit::unlimited );
runTests( rangeHead, rangeTail, ConsonantLimit::one );
}
printf( "[%.2i] Done.\n", taskId );
}
void launchTasks( unsigned n = 0 ) {
alignas(std::thread) union { uint8_t b[ sizeof(std::thread) ]; } threadBufs[ 64 ] {};
std::thread *threads[ countof( threadBufs ) ] {};
if( n < 1 ) {
if( ( n = std::thread::hardware_concurrency()/2 ) < 2 ) {
n = 2;
}
}
if( n > countof( threadBufs ) ) {
n = countof( threadBufs );
}
printf( "Launching %u threads...\n", n );
fflush( nullptr );
unsigned i;
for( i = 0; i < n; i += 1 ) {
threads[i] = new((void*)&threadBufs[i].b[0]) std::thread(thread_run_f);
}
for( i = 0; i < n; i += 1 ) {
threads[i]->join();
threads[i]->~thread();
}
}
void runDefaultTests() {
const uint64_t defaultRangeHead = 1'242'671'128'500'000'000ULL;
const uint64_t defaultRangeTail = 1'242'671'512'000'000'000ULL;
printf( "Running default tests:\n" );
printf( "*** Head ***\n" );
showNameDetails( defaultRangeHead );
printf( "*** Tail ***\n" );
showNameDetails( defaultRangeTail );
g_rangeHead = defaultRangeHead;
g_rangeTail = defaultRangeTail;
g_nextTaskId = 0;
g_nextRange = g_rangeHead;
launchTasks();
printf( "Finished running tests.\n" );
fflush( nullptr );
}
void kanaToJSON() {
const ConsonantLimit clArr[] {
ConsonantLimit::unlimited,
ConsonantLimit::one
};
uint8_t indbuf[ 64 ];
char strbuf[ 64 ];
printf( "[\n" );
const char *preobjprint = "";
for( uint64_t index = 0; index < countof(KanaInfo::g_kanaInfo); index += 1 ) {
size_t prevlen = 0;
const uint8_t *indices_begin = nullptr;
const uint8_t *indices_end = nullptr;
generateName( index, indbuf, countof(indbuf), indices_begin, indices_end );
assert( indices_begin != indices_end );
for( const auto cl : clArr ) {
nameIndicesToString( strbuf, countof(strbuf), indices_begin, indices_end, cl );
const size_t len = strlen( strbuf );
if( prevlen != len ) {
prevlen = len;
const KanaInfo &k = KanaInfo::g_kanaInfo[*indices_begin];
printf( "%s\t{\n", preobjprint );
printf( "\t\t\"hiragana\": \"%s\",\n", strbuf );
printf( "\t\t\"vowels\": %u,\n", (unsigned)k.vowels );
printf( "\t\t\"consonants\": %u,\n", (unsigned)k.consonants );
printf( "\t\t\"sum\": %u\n", (unsigned)k.sum() );
printf( "\t}" );
preobjprint = ",\n";
}
}
}
printf( "\n]\n" );
}
int main( int argc, char **argv ) {
if( argc <= 1 ) {
runDefaultTests();
return EXIT_SUCCESS;
}
for( int i = 1; i < argc; i += 1 ) {
const char *const arg = argv[i];
if( strcmp( arg, "--kana-to-json" ) == 0 ) {
kanaToJSON();
return EXIT_SUCCESS;
}
}
fprintf( stderr, "Unhandled arguments\n" );
return EXIT_FAILURE;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment