Created
July 18, 2017 03:10
-
-
Save yohey03518/ad99d05864993b22e4cb8bad7a39f058 to your computer and use it in GitHub Desktop.
偵測檔案是否為Big5編碼
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Ref:http://blog.darkthread.net/post-2012-04-11-detect-big5-encoding.aspx | |
/// <summary> | |
/// 偵測檔案否為BIG5編碼 | |
/// </summary> | |
/// <param name="file">檔案路徑</param> | |
/// <returns></returns> | |
public static bool IsBig5Encoding(string file) | |
{ | |
// 檢查檔案是否存在 | |
if (!File.Exists(file)) | |
return false; | |
Encoding big5 = Encoding.GetEncoding("big5"); | |
byte[] bytes = File.ReadAllBytes(file); | |
return IsBig5Encoding(bytes); | |
} | |
/// <summary> | |
/// 偵測byte[]是否為BIG5編碼 | |
/// </summary> | |
/// <param name="bytes"></param> | |
/// <returns></returns> | |
public static bool IsBig5Encoding(byte[] bytes) | |
{ | |
// 950為BIG5之CodePage代碼 Ref:https://msdn.microsoft.com/zh-tw/library/windows/desktop/dd317756(v=vs.85).aspx | |
Encoding big5 = Encoding.GetEncoding(950); | |
//將byte[]轉為string再轉回byte[]看位元數是否有變 | |
// 若出現big5無法解析之字元有可能會變成'?',再轉回byte後會變成只有1byte(big5編碼之中文字為2byte) | |
// big5會先看第一個位元能不能單獨轉換成ASCII字元,不能的話會拉下一個位元看是否可以轉換 | |
// ,不能的話就會變成一個'?'(2byte->1byte),而造成位元數失真的情況,也因此可使用此判斷方式 | |
string convertToBig5String = big5.GetString(bytes); | |
return bytes.Length == | |
big5.GetByteCount(convertToBig5String); | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment