Last active
August 29, 2015 14:04
-
-
Save LilinYume/e443f753db98fa4200e2 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <ios> | |
#include <iostream> | |
#include <stdexcept> | |
#include <new> | |
#include <locale> | |
#define CHAR_SIZE 1 | |
#define WCHAR_SIZE 2 | |
/* 概要: | |
cp932(JIS X 2080) の文字集合を1Byteのバイト・シーケンスとしてストリームで扱えるようにする。 | |
wide_charクラス型内部に 符号なしshortと 符号なしcharの配列(要素数2) 計2Byte分をメンバとして持つ。 | |
wide_char型オブジェクトは2Byte固定。 | |
リトルエンディアンを前提とする。 */ | |
/* 備考: | |
符号なしchar型 最大は255 (1Byte)であり 1Byteは8bitである必要あり。 | |
CHAR_BIT : 8 UCHAR_MAX : 0xff */ | |
/* wide_char クラス | |
クラス・オブジェクトのサイズは2Byte固定 | |
1Byteと2Byte文字両方を扱える様にする。 */ | |
class wide_char | |
{ | |
public: | |
// デフォルト・コンストラクタ | |
wide_char() | |
{ | |
this->code = 0; | |
this->letter[first] = 0; | |
this->letter[second] = 0; | |
} | |
// コピー・コンストラクタ | |
wide_char( const wide_char& l_ob ) | |
{ | |
this->code = l_ob.code; | |
this->letter[first] = l_ob.letter[first]; | |
this->letter[second] = l_ob.letter[second]; | |
} | |
// 代入演算子オーバーロード | |
wide_char& operator = ( const wide_char& r_ob ) | |
{ | |
this->code = r_ob.code; | |
this->letter[first] = r_ob.letter[first]; | |
this->letter[second] = r_ob.letter[second]; | |
} | |
/* 代入演算子オーバーロード | |
*/ | |
wide_char& operator = ( unsigned short int code ) | |
{ | |
this->code = code; | |
return *this; | |
} | |
friend wide_char* get_wstr( const wide_char& wstr ); | |
protected: | |
/* 無名共用体内の符号なしchar配列の添字用の列挙体 | |
first : 一つ目の要素用 | |
second : 2つ目要素用 */ | |
enum { first, second }; | |
union | |
{ | |
unsigned char letter[WCHAR_SIZE]; | |
unsigned short code; | |
}; | |
}; | |
class wchr_str : private wide_char | |
{ | |
private: | |
wide_char *wchr_ptr; | |
std::locale jp; | |
int len; | |
public: | |
// デフォルト・コンストラクタ | |
wchr_str() | |
{ | |
try { | |
wchr_ptr = new wide_char[1]; | |
} | |
catch ( std::bad_alloc e ) { | |
std::cerr << "allocation failed\n"; | |
exit( EXIT_FAILURE ); | |
} | |
jp = std::locale( "Japanese_Japan" ); | |
len = 0; | |
} | |
// コピー・コンストラクタ | |
wchr_str(const wchr_str& l_ob) | |
{ | |
this->wchr_ptr = l_ob.wchr_ptr; | |
this->len = l_ob.len; | |
} | |
const wide_char* operator=( const char* src ) | |
{ | |
const char* tmp = src; | |
wchr_ptr = alloc( tmp ); | |
copy( wchr_ptr, tmp ); | |
return wchr_ptr; | |
} | |
// デストラクタ | |
~wchr_str(){ if( wchr_ptr ) delete[] wchr_ptr; } | |
private: | |
int count( const char* src ) | |
{ | |
const char* tmp = src; | |
int cnt = 0; | |
while( *tmp != '\0' ) { | |
++tmp; | |
++cnt; | |
} | |
return cnt; | |
} | |
void copy( wide_char* dst, const char* src ) | |
{ | |
const char* tmp = src; | |
int size = count(tmp); | |
int i = 0; | |
while( *tmp != '\0' && i != size ) { | |
if ( ascii( tmp ) ) { | |
letter[first] = *tmp; | |
letter[second] = 0; | |
dst[i] = code; | |
} | |
else if ( half_width() ) { | |
letter[first] = *tmp; | |
letter[second] = 0; | |
dst[i] = code; | |
} | |
else { | |
letter[first] = *tmp; | |
letter[second] = *++tmp; | |
} | |
if ( full_width() ) { | |
dst[i] = code; | |
} | |
++i; | |
++tmp; | |
} | |
} | |
wide_char* alloc( const char* src ) | |
{ | |
try { | |
int current_size = count( src ); | |
if ( current_size > len ) { delete [] wchr_ptr; } | |
if ( current_size <= 0 ) { throw std::runtime_error("empty string"); } | |
wchr_ptr = new wide_char[current_size + 1]; | |
len = current_size; | |
} | |
catch( std::runtime_error& e ) { | |
e.what(); | |
exit( EXIT_FAILURE ); | |
} | |
catch( std::bad_alloc e ) { | |
std::cerr<<"alloc fail\n"; | |
exit( EXIT_FAILURE ); | |
} | |
return wchr_ptr; | |
} | |
// // ASCII [00~7F] | |
bool ascii( const char* ch ) | |
{ | |
if ( *ch > 0 && *ch <= 0x7f ){ | |
return true; | |
} | |
return false; | |
} | |
// 半角カナ [A1~DF] | |
bool half_width() | |
{ | |
if ( letter[second] == 0 | |
&& letter[first] >= 0xa1 | |
&& letter[first] <= 0xdf ) | |
{ return true; } | |
return false; | |
} | |
//JIS X 0208 [8140 ~ 9FFC] [E040 ~ FCFC] | |
bool full_width() | |
{ | |
if ( unit_1() || unit_2() ) return true; | |
return false; | |
} | |
/* [8140 ~ 9FFC] */ | |
bool unit_1() | |
{ | |
bool fst_ok, sec_ok; | |
fst_ok = sec_ok = false; | |
if ( letter[first] >= 0x81 | |
&& letter[first] <= 0x9f ) { fst_ok = true; } | |
if ( letter[second] >= 0x40 | |
&& letter[second] <= 0xfc ) { sec_ok = true; } | |
if ( fst_ok && sec_ok ) return true; | |
return false; | |
} | |
/* [E040 ~ FCFC] */ | |
bool unit_2() | |
{ | |
bool fst_ok, sec_ok; | |
fst_ok = sec_ok = false; | |
if ( letter[first] >= 0xe0 | |
&& letter[first] <= 0xfc ) { fst_ok = true; } | |
if ( letter[second] >= 0x40 | |
&& letter[second] <= 0xfc ) { sec_ok = true; } | |
if ( fst_ok && sec_ok ) return true; | |
return false; | |
} | |
}; | |
int main() | |
{ | |
wchr_str t; | |
t = "あabいcう"; | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment