Created
September 2, 2014 03:30
-
-
Save komasaru/4eb39e3ff397f6babdb8 to your computer and use it in GitHub Desktop.
C++ source code to count strings of UTF-8.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <string.h> | |
using namespace std; | |
/* | |
* [CLASS] Proc | |
*/ | |
class Proc | |
{ | |
int cntByte(unsigned char); // Count bytes | |
public: | |
int cntStr(const char*); // Count stings | |
}; | |
/* | |
* Count strings | |
*/ | |
int Proc::cntStr(const char *cStr) | |
{ | |
int i = 0, iCnt = 0; | |
while (cStr[i] != '\0') { | |
iCnt++; | |
i += cntByte(cStr[i]); | |
} | |
return iCnt; | |
} | |
/* | |
* Count bytes | |
*/ | |
int Proc::cntByte(unsigned char cChar) | |
{ | |
int iByte; | |
if ((cChar >= 0x00) && (cChar <= 0x7f)) { | |
iByte = 1; | |
} else if ((cChar >= 0xc2) && (cChar <= 0xdf)) { | |
iByte = 2; | |
} else if ((cChar >= 0xe0) && (cChar <= 0xef)) { | |
iByte = 3; | |
} else if ((cChar >= 0xf0) && (cChar <= 0xf7)) { | |
iByte = 4; | |
} else if ((cChar >= 0xf8) && (cChar <= 0xfb)) { | |
iByte = 5; | |
} else if ((cChar >= 0xfc) && (cChar <= 0xfd)) { | |
iByte = 6; | |
} else { | |
iByte = 0; | |
} | |
return iByte; | |
} | |
/* | |
* Execution | |
*/ | |
int main(){ | |
const char* cStr = "これは文字数 Count の テスト です。"; | |
int iCnt; | |
try { | |
Proc objMain; | |
iCnt = objMain.cntStr(cStr); | |
cout << "* " << cStr << "\n" | |
<< " => " << strlen(cStr) << " Bytes, " | |
<< iCnt << " Strings" << endl; | |
} catch (const char* e) { | |
cerr << "[EXCEPTION] " << e << endl; | |
return 1; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment