Last active
June 9, 2018 10:13
-
-
Save NotKyon/a113a8fca28bb95cdc7c9a648010e48e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <assert.h> | |
#include <stdint.h> | |
#include <stdlib.h> | |
#include <string.h> | |
#include <new> | |
#include <thread> | |
#include <atomic> | |
#define countof(X_) (sizeof((X_))/sizeof((X_)[0])) | |
constexpr unsigned numKana = 46; | |
constexpr unsigned numYouon = 21; | |
constexpr unsigned kiRow = 0; | |
constexpr unsigned shiRow = 1; | |
constexpr unsigned chiRow = 2; | |
constexpr unsigned niRow = 3; | |
constexpr unsigned hiRow = 4; | |
constexpr unsigned miRow = 5; | |
constexpr unsigned riRow = 6; | |
constexpr unsigned yaCol = 0; | |
constexpr unsigned yuCol = 1; | |
constexpr unsigned yoCol = 2; | |
constexpr unsigned youonIndex( unsigned row, unsigned column ) { | |
return row*3 + column; | |
} | |
constexpr unsigned youonRow( unsigned index ) { | |
return index/3; | |
} | |
constexpr unsigned youonColumn( unsigned index ) { | |
return index%3; | |
} | |
constexpr unsigned youonRowValue( unsigned row ) { | |
constexpr unsigned indexBase = 1; | |
constexpr unsigned offsetToI = 1; | |
constexpr unsigned bias = indexBase + offsetToI; | |
return | |
row < riRow ? row*5 + bias : | |
row == riRow ? 38 + bias : | |
0; | |
} | |
constexpr unsigned youonColumnValue( unsigned column ) { | |
constexpr unsigned indexBase = 1; | |
return | |
35 + column + indexBase; | |
} | |
constexpr unsigned youonValue( unsigned index ) { | |
return | |
youonRowValue( youonRow(index) ) + | |
youonColumnValue( youonColumn(index) ); | |
} | |
#pragma pack(push,1) | |
struct KanaInfo { | |
static const KanaInfo g_kanaInfo[ numKana + numYouon ]; | |
char romaji[4]; | |
uint8_t consonants:4; | |
uint8_t vowels:3; | |
uint8_t youon:1; | |
KanaInfo( const char *pszRomaji, bool isYouon = false ); | |
unsigned sum() const; | |
}; | |
#pragma pack(pop) | |
inline KanaInfo::KanaInfo( const char *pszRomaji, bool isYouon ) { | |
consonants = 0; | |
vowels = 0; | |
assert( pszRomaji != nullptr ); | |
const size_t len = strlen( pszRomaji ); | |
assert( len >= 1 ); | |
assert( len <= 3 ); | |
for( size_t i = 0; i <= len /* copy NUL char */; i += 1 ) { | |
const char c = pszRomaji[i]; | |
assert( c == '\0' || ( c >= 'a' && c <= 'z' ) ); | |
romaji[i] = c; | |
if( c=='a' || c=='e' || c=='i' || c=='o' || c=='u' ) { | |
vowels += 1; | |
} else if( c != '\0' ) { | |
consonants += 1; | |
} | |
} | |
youon = +isYouon; | |
} | |
inline unsigned KanaInfo::sum() const { | |
const unsigned index = (unsigned)ptrdiff_t( this - &g_kanaInfo[0] ); | |
if( index < numKana ) { | |
return 1 + index; | |
} | |
if( index < numKana + numYouon ) { | |
return youonValue( index - numKana ); | |
} | |
assert( false && "Unhandled case" ); | |
return 0; | |
} | |
const KanaInfo KanaInfo::g_kanaInfo[ countof(KanaInfo::g_kanaInfo) ] = { | |
#define K(X_) KanaInfo((X_),false) | |
K( "a"), K( "i"), K( "u"), K( "e"), K( "o"), | |
K("ka"), K("ki"), K("ku"), K("ke"), K("ko"), | |
K("sa"),K("shi"), K("su"), K("se"), K("so"), | |
K("ta"),K("chi"),K("tsu"), K("te"), K("to"), | |
K("na"), K("ni"), K("nu"), K("ne"), K("no"), | |
K("ha"), K("hi"), K("fu"), K("he"), K("ho"), | |
K("ma"), K("mi"), K("mu"), K("me"), K("mo"), | |
K("ya"), K("yu"), K("yo"), | |
K("ra"), K("ri"), K("ru"), K("re"), K("ro"), | |
K("wa"), K("wo"), | |
K("n"), | |
#undef K | |
#define K(X_) KanaInfo((X_),true) | |
K("kya"),K("kyu"),K("kyo"), | |
K("sha"),K("shu"),K("sho"), | |
K("cha"),K("chu"),K("cho"), | |
K("nya"),K("nyu"),K("nyo"), | |
K("hya"),K("hyu"),K("hyo"), | |
K("mya"),K("myu"),K("myo"), | |
K("rya"),K("ryu"),K("ryo") | |
#undef K | |
}; | |
enum class ConsonantLimit { | |
unlimited, | |
one | |
}; | |
struct NameAnalysis { | |
unsigned consonants = 0; | |
unsigned vowels = 0; | |
unsigned sum = 0; | |
}; | |
enum class NameTestResult { | |
pass, | |
tooFewConsonants, | |
tooManyConsonants, | |
tooFewVowels, | |
tooManyVowels, | |
smallSum, | |
largeSum | |
}; | |
const char *nameTestResultToString( NameTestResult r ) { | |
switch( r ) { | |
#define C(X_) case NameTestResult::X_: return #X_ | |
C(pass); | |
C(tooFewConsonants); | |
C(tooManyConsonants); | |
C(tooFewVowels); | |
C(tooManyVowels); | |
C(smallSum); | |
C(largeSum); | |
#undef C | |
} | |
return "\?\?\?"; | |
} | |
void analyzeName( NameAnalysis &dst, const uint8_t *indices_begin, const uint8_t *indices_end, ConsonantLimit cl = ConsonantLimit::unlimited ) { | |
assert( indices_begin != nullptr ); | |
assert( indices_end != nullptr ); | |
dst = NameAnalysis(); | |
// Calculate the sums | |
for( const uint8_t *p = indices_begin; p != indices_end; p += 1 ) { | |
assert( *p < countof(KanaInfo::g_kanaInfo) ); | |
const KanaInfo &k = KanaInfo::g_kanaInfo[*p]; | |
// 1 for あ, 2 for い, 3 for う, ..., 46 for ん | |
dst.sum += k.sum(); | |
// Increment the number of romanized consonants | |
const auto effectiveLimit = k.youon ? ConsonantLimit::unlimited : cl; | |
switch( effectiveLimit ) { | |
case ConsonantLimit::unlimited: | |
dst.consonants += (unsigned)k.consonants; | |
break; | |
case ConsonantLimit::one: | |
if( k.consonants != 0 ) { | |
dst.consonants += 1; | |
} | |
break; | |
} | |
// Increment the number of romanized vowels | |
dst.vowels += (unsigned)k.vowels; | |
} | |
} | |
void nameIndicesToString( char *dst, size_t dstMax, const uint8_t *indices_begin, const uint8_t *indices_end, ConsonantLimit cl = ConsonantLimit::unlimited ) { | |
assert( indices_begin != nullptr ); | |
assert( indices_end != nullptr ); | |
assert( dst != nullptr ); | |
assert( dstMax > size_t(ptrdiff_t(indices_end - indices_begin)) ); | |
size_t i = 0; | |
for( const uint8_t *p = indices_begin; p != indices_end; p += 1 ) { | |
assert( *p < countof(KanaInfo::g_kanaInfo) ); | |
const KanaInfo &k = KanaInfo::g_kanaInfo[*p]; | |
const size_t len = strlen(k.romaji); | |
assert( i + len < dstMax ); | |
for( size_t j = 0; j < len; j += 1 ) { | |
// skip middle characters if limited to one consonant | |
if( cl == ConsonantLimit::one && len > 2 && j > 0 && j < len - 1 && !k.youon ) { | |
continue; | |
} | |
dst[ i ] = k.romaji[ j ]; | |
i += 1; | |
} | |
} | |
assert( i < dstMax ); | |
dst[ i ] = '\0'; | |
} | |
NameTestResult testAnalyzedName( const NameAnalysis &src ) { | |
const unsigned expectedConsonants = 7; | |
const unsigned expectedVowels = 8; | |
const unsigned expectedSum = 134; | |
if( src.sum < expectedSum ) { | |
return NameTestResult::smallSum; | |
} else if( src.sum > expectedSum ) { | |
return NameTestResult::largeSum; | |
} | |
if( src.consonants < expectedConsonants ) { | |
return NameTestResult::tooFewConsonants; | |
} else if( src.consonants > expectedConsonants ) { | |
return NameTestResult::tooManyConsonants; | |
} | |
if( src.vowels < expectedVowels ) { | |
return NameTestResult::tooFewVowels; | |
} else if( src.vowels > expectedVowels ) { | |
return NameTestResult::tooManyVowels; | |
} | |
return NameTestResult::pass; | |
} | |
NameTestResult testName( const uint8_t *indices_begin, const uint8_t *indices_end, | |
ConsonantLimit cl = ConsonantLimit::unlimited ) { | |
NameAnalysis na; | |
analyzeName( na, indices_begin, indices_end, cl ); | |
return testAnalyzedName( na ); | |
} | |
void generateName( uint64_t index, uint8_t *buffer, size_t maxBuffer, | |
const uint8_t *&indices_begin, const uint8_t *&indices_end ) { | |
assert( buffer != nullptr ); | |
assert( maxBuffer > 16 ); // Let's be reasonable here, there shouldn't be a name larger than this that passes the test | |
auto n = index; | |
size_t i = 0; | |
while( i < maxBuffer ) { | |
buffer[i] = uint8_t( n%countof(KanaInfo::g_kanaInfo) ); | |
i += 1; | |
n /= countof(KanaInfo::g_kanaInfo); | |
if( !n ) { | |
break; | |
} | |
} | |
indices_begin = &buffer[0]; | |
indices_end = &buffer[i]; | |
} | |
NameTestResult generateAndTestName( uint64_t index, uint8_t *buffer, size_t maxBuffer, | |
const uint8_t *&indices_begin, const uint8_t *&indices_end, ConsonantLimit cl = ConsonantLimit::unlimited ) { | |
generateName( index, buffer, maxBuffer, indices_begin, indices_end ); | |
return testName( indices_begin, indices_end, cl ); | |
} | |
void showNameDetails( uint64_t index ) { | |
uint8_t indbuf[ 64 ]; | |
char strbuf[ 64 ]; | |
const ConsonantLimit clArr[] { | |
ConsonantLimit::unlimited, | |
ConsonantLimit::one | |
}; | |
for( const auto cl : clArr ) { | |
const uint8_t *indices_begin = nullptr; | |
const uint8_t *indices_end = nullptr; | |
NameAnalysis na; | |
generateName( index, indbuf, countof(indbuf), indices_begin, indices_end ); | |
analyzeName( na, indices_begin, indices_end, cl ); | |
const auto r = testAnalyzedName( na ); | |
nameIndicesToString( strbuf, countof(strbuf), indices_begin, indices_end, cl ); | |
printf( | |
"Name #%u mode %i: %s\n" | |
" Consonants: %u\n" | |
" Vowels: %u\n" | |
" Sum: %u\n" | |
" Result: %s (%i)\n\n", | |
(unsigned)index, int(cl), strbuf, | |
na.consonants, na.vowels, na.sum, | |
nameTestResultToString(r), int(r) ); | |
} | |
} | |
void runTests( uint64_t headIndex, uint64_t tailIndex, ConsonantLimit cl = ConsonantLimit::unlimited ) { | |
uint8_t indbuf[ 64 ]; | |
char strbuf[ 64 ]; | |
for( auto index = headIndex; index <= tailIndex; index += 1 ) { | |
const uint8_t *indices_begin = nullptr; | |
const uint8_t *indices_end = nullptr; | |
const NameTestResult r = | |
generateAndTestName( index, indbuf, countof(indbuf), | |
indices_begin, indices_end, cl ); | |
if( r == NameTestResult::pass ) { | |
nameIndicesToString( strbuf, countof(strbuf), indices_begin, indices_end, cl ); | |
printf( "%s\n", strbuf ); | |
fflush(nullptr); | |
} | |
} | |
} | |
uint64_t g_rangeHead = 0; | |
uint64_t g_rangeTail = 0; | |
static std::atomic<int> g_nextTaskId; | |
static std::atomic<uint64_t> g_nextRange; | |
static void thread_run_f() { | |
assert( g_rangeHead != 0 && g_rangeTail != 0 ); | |
assert( g_rangeTail > g_rangeHead ); | |
constexpr uint64_t rangeGranularity = 1'000'000'000; | |
const int taskId = 1 + g_nextTaskId.fetch_add( 1, std::memory_order_acq_rel ); | |
for(;;) { | |
const uint64_t rangeHead = g_nextRange.fetch_add( rangeGranularity, std::memory_order_acq_rel ); | |
const uint64_t rangeTailP = rangeHead + rangeGranularity; | |
const uint64_t rangeTail = rangeTailP <= g_rangeTail ? rangeTailP : g_rangeTail; | |
if( rangeHead >= rangeTail ) { | |
break; | |
} | |
printf( "[%.2i] Checking range %llu <= i <= %llu...\n", taskId, rangeHead, rangeTail ); | |
fflush( nullptr ); | |
runTests( rangeHead, rangeTail, ConsonantLimit::unlimited ); | |
runTests( rangeHead, rangeTail, ConsonantLimit::one ); | |
} | |
printf( "[%.2i] Done.\n", taskId ); | |
} | |
void launchTasks( unsigned n = 0 ) { | |
alignas(std::thread) union { uint8_t b[ sizeof(std::thread) ]; } threadBufs[ 64 ] {}; | |
std::thread *threads[ countof( threadBufs ) ] {}; | |
if( n < 1 ) { | |
if( ( n = std::thread::hardware_concurrency()/2 ) < 2 ) { | |
n = 2; | |
} | |
} | |
if( n > countof( threadBufs ) ) { | |
n = countof( threadBufs ); | |
} | |
printf( "Launching %u threads...\n", n ); | |
fflush( nullptr ); | |
unsigned i; | |
for( i = 0; i < n; i += 1 ) { | |
threads[i] = new((void*)&threadBufs[i].b[0]) std::thread(thread_run_f); | |
} | |
for( i = 0; i < n; i += 1 ) { | |
threads[i]->join(); | |
threads[i]->~thread(); | |
} | |
} | |
void runDefaultTests() { | |
const uint64_t defaultRangeHead = 1'242'671'128'500'000'000ULL; | |
const uint64_t defaultRangeTail = 1'242'671'512'000'000'000ULL; | |
printf( "Running default tests:\n" ); | |
printf( "*** Head ***\n" ); | |
showNameDetails( defaultRangeHead ); | |
printf( "*** Tail ***\n" ); | |
showNameDetails( defaultRangeTail ); | |
g_rangeHead = defaultRangeHead; | |
g_rangeTail = defaultRangeTail; | |
g_nextTaskId = 0; | |
g_nextRange = g_rangeHead; | |
launchTasks(); | |
printf( "Finished running tests.\n" ); | |
fflush( nullptr ); | |
} | |
void kanaToJSON() { | |
const ConsonantLimit clArr[] { | |
ConsonantLimit::unlimited, | |
ConsonantLimit::one | |
}; | |
uint8_t indbuf[ 64 ]; | |
char strbuf[ 64 ]; | |
printf( "[\n" ); | |
const char *preobjprint = ""; | |
for( uint64_t index = 0; index < countof(KanaInfo::g_kanaInfo); index += 1 ) { | |
size_t prevlen = 0; | |
const uint8_t *indices_begin = nullptr; | |
const uint8_t *indices_end = nullptr; | |
generateName( index, indbuf, countof(indbuf), indices_begin, indices_end ); | |
assert( indices_begin != indices_end ); | |
for( const auto cl : clArr ) { | |
nameIndicesToString( strbuf, countof(strbuf), indices_begin, indices_end, cl ); | |
const size_t len = strlen( strbuf ); | |
if( prevlen != len ) { | |
prevlen = len; | |
const KanaInfo &k = KanaInfo::g_kanaInfo[*indices_begin]; | |
printf( "%s\t{\n", preobjprint ); | |
printf( "\t\t\"hiragana\": \"%s\",\n", strbuf ); | |
printf( "\t\t\"vowels\": %u,\n", (unsigned)k.vowels ); | |
printf( "\t\t\"consonants\": %u,\n", (unsigned)k.consonants ); | |
printf( "\t\t\"sum\": %u\n", (unsigned)k.sum() ); | |
printf( "\t}" ); | |
preobjprint = ",\n"; | |
} | |
} | |
} | |
printf( "\n]\n" ); | |
} | |
int main( int argc, char **argv ) { | |
if( argc <= 1 ) { | |
runDefaultTests(); | |
return EXIT_SUCCESS; | |
} | |
for( int i = 1; i < argc; i += 1 ) { | |
const char *const arg = argv[i]; | |
if( strcmp( arg, "--kana-to-json" ) == 0 ) { | |
kanaToJSON(); | |
return EXIT_SUCCESS; | |
} | |
} | |
fprintf( stderr, "Unhandled arguments\n" ); | |
return EXIT_FAILURE; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment