qycyfjy/WindowsConsoleUnicode.md

## WindowsConsoleUnicode.md

      
    Raw
  

              WindowsConsoleUnicode.md
            
          
源文件用UTF-8，设置编译器选项（MSVC的/utf-8），让编译后的可执行文件中的字符串用UTF-8编码；
不要用chcp（不知道是啥？那太好了），不要在区域设置里设置那个Beta的Unicode UTF-8（不知道是啥？那太好了），但在main的最开始用::SetConsoleOutputCP(CP_UTF8);，也就是外部输入是ANSI，输出给用户是UTF-8；
程序内纯ASCII不能处理的一切以UTF-8或者UTF-16操作；
接上条，从命令行窗口得到的用户输入统一是ANSI，然后用MultiByteToWideChar把ANSI转成UTF-16，【可选】再用WideCharToMultiByte再转成UTF-8；
要打开文件或者其他与系统交互怎么办，要么直接用从用户那获取的ANSI编码（const char*）或者UTF-16（std::wstring, const wchar_t*）作为参数，要么把UTF-8转成UTF-16，怎么转？再看看MultiByteToWideChar的第一个参数。

#include <Windows.h> 
#include <fstream>
#include <iostream>
#include <stdlib.h> 
#include <string.h> 
#include <string>

std::string ConvertWideToUTF8(const std::wstring& wstr)
{
	int count = WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), wstr.length(), NULL, 0, NULL, NULL);
	std::string str(count, 0);
	WideCharToMultiByte(CP_UTF8, 0, wstr.c_str(), -1, &str[0], count, NULL, NULL);
	return str;
}

std::wstring ConvertACPToWide(const char* str)
{
	if (!str || strlen(str) == 0)
	{
		return {};
	}
	int count = MultiByteToWideChar(CP_ACP, 0, str, -1, NULL, 0);  // CP_ACP指代系统默认的ANSI Code Page
	count -= 1;
	std::wstring wstr(count, 0);
	MultiByteToWideChar(CP_ACP, 0, str, -1, &wstr[0], count);
	return wstr;
}

std::string GetUtf8String(const char* str)
{
	return ConvertWideToUTF8(ConvertACPToWide(str));
}

void print(const char* c, size_t n) {
	for (size_t i = 0; i < n; i++) {
		printf("%x ", (uint8_t)c[i]);
	}
	printf("\n");
}

int main(int argc, char* argv[])
{
	::SetConsoleOutputCP(CP_UTF8);

	if (argc > 1) {
		print(argv[1], strlen(argv[1]));
		std::string arg1 = GetUtf8String(argv[1]);
		print(arg1.c_str(), arg1.size());
		printf("%s\n", arg1.data());

		std::ifstream inANSI(argv[1], std::ios::binary);
		if (inANSI.is_open()) {  // 能正常打开
			char buf[64]{ 0 };
			inANSI.read(buf, 63);
			printf("%s", buf);
		}
		printf("\n");

		auto filename = ConvertACPToWide(argv[1]);  // UTF-16
		std::ifstream inUNICODE(filename.c_str(), std::ios::binary);
		if (inUNICODE.is_open()) {  // 能正常打开
			char buf[64]{ 0 };
			inUNICODE.read(buf, 63);
			printf("%s", buf);
		}
		printf("\n");

		std::ifstream inUTF8(arg1.c_str(), std::ios::binary);
		if (!inUTF8.is_open()) {  // 不能打开
			printf("无法打开。😭\n");
		}
	}

	std::string input;
	std::getline(std::cin, input);
	print(input.data(), input.size());
	auto path = ConvertWideToUTF8(ConvertACPToWide(input.c_str()));
	print(path.data(), path.size());

	const char* str = "你好，世界。の😀";
	printf("%s\n", str);

	(void)getchar();
}