Skip to content

Instantly share code, notes, and snippets.

@komasaru
Created September 2, 2014 03:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save komasaru/2ea923e58aa112f4122f to your computer and use it in GitHub Desktop.
Save komasaru/2ea923e58aa112f4122f to your computer and use it in GitHub Desktop.
C++ source code to get substrings of UTF-8.
#include <iostream>
#include <string.h>
using namespace std;
/*
* [CLASS] Proc
*/
class Proc
{
int cntByte(unsigned char); // Count bytes
public:
char* subStr(const char*, int, int); // Get substring
};
/*
* Get substring
*/
char* Proc::subStr(const char *cStr, int iStart, int iLength)
{
static char cRes[1024];
char* pRes = cRes;
int i = 0, iPos = 0;
int iByte;
while (cStr[i] != '\0') {
iByte = cntByte(cStr[i]);
if (iStart <= iPos && iPos < iStart + iLength) {
memcpy(pRes, (cStr + i), iByte);
pRes += iByte;
}
i += iByte;
iPos++;
}
*pRes = '\0';
return cRes;
}
/*
* Count bytes
*/
int Proc::cntByte(unsigned char cChar)
{
int iByte;
if ((cChar >= 0x00) && (cChar <= 0x7f)) {
iByte = 1;
} else if ((cChar >= 0xc2) && (cChar <= 0xdf)) {
iByte = 2;
} else if ((cChar >= 0xe0) && (cChar <= 0xef)) {
iByte = 3;
} else if ((cChar >= 0xf0) && (cChar <= 0xf7)) {
iByte = 4;
} else if ((cChar >= 0xf8) && (cChar <= 0xfb)) {
iByte = 5;
} else if ((cChar >= 0xfc) && (cChar <= 0xfd)) {
iByte = 6;
} else {
iByte = 0;
}
return iByte;
}
/*
* Execution
*/
int main(){
const char* cStr = "コレハ 部分文字列取得の TEST です。";
char* cRes;
try {
Proc objMain;
cRes = objMain.subStr(cStr, 2, 13);
cout << "* " << cStr << "\n"
<< " => " << cRes << endl;
} catch (const char* e) {
cerr << "[EXCEPTION] " << e << endl;
return 1;
}
return 0;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment