Skip to content

Instantly share code, notes, and snippets.

@911992
Created May 28, 2020 15:21
Show Gist options
  • Save 911992/4d60c3e91a0491eadbcfd59eea90c483 to your computer and use it in GitHub Desktop.
Save 911992/4d60c3e91a0491eadbcfd59eea90c483 to your computer and use it in GitHub Desktop.
/*
* Copyright (c) 2020, https://github.com/911992 All rights reserved.
* License BSD 3-Clause (https://opensource.org/licenses/BSD-3-Clause)
*/
/*
sample_code_java0
File: Charset_Test.java
Created on: May 28, 2020 7:19:32 PM
@author https://github.com/911992
History:
initial version: 0.1(20200528)
*/
package def;
/**
*
* @author https://github.com/911992
*/
public class Charset_Test {
public static void main(String[] args) throws java.lang.Exception {
String _hp_jp = "ハッピープログラミング";
byte _hp_jp_utf8_arr[] = _hp_jp.getBytes("utf-8");
System.out.printf("JP text len:%d , text bytes len(utf8):%d\n", _hp_jp.length(), _hp_jp_utf8_arr.length);
String _hp_en = "Happy Programming";
byte _hp_en_ascii[] = _hp_en.getBytes("utf-8");
System.out.printf("En text len:%d , text bytes len(utf8):%d\n", _hp_en.length(), _hp_en_ascii.length);
byte _hp_en_utf16[] = _hp_en.getBytes("utf-16be");
System.out.printf("Text bytes len(utf-16be):%d\n", _hp_en_utf16.length);
byte _hp_en_utf32[] = _hp_en.getBytes("utf-32le");
System.out.printf("Text bytes len(utf-32le):%d\n", _hp_en_utf32.length);
String _jp_tsu_str = "ッ"; //https://www.compart.com/en/unicode/U+30C3
char _jp_tsu_c = _jp_tsu_str.charAt(0);
byte _jp_tsu_arr[] = _jp_tsu_str.getBytes("utf-8");
System.out.printf("Tsu (local) charpoint hex:0x%x\n", (long) _jp_tsu_c);//30c3
StringBuilder _jp_tsu_arr_p = new StringBuilder();
StringBuilder _jp_tsu_arr_p_s = new StringBuilder();
for (int a = 0; a < _jp_tsu_arr.length; a++) {
_jp_tsu_arr_p.append(String.format("%02x", _jp_tsu_arr[a]));
if (a > 0) {
_jp_tsu_arr_p_s.append(", ");
}
_jp_tsu_arr_p_s.append(String.format("[0x%02x] ", _jp_tsu_arr[a]));
}
System.out.printf("Tsu (arr) bytes:%s (%s)\n", _jp_tsu_arr_p.toString(), _jp_tsu_arr_p_s.toString()); //e38383
}
// OUTPUT:
//
// JP text len:11 , text bytes len(utf8):33
// En text len:17 , text bytes len(utf8):17
// Text bytes len(utf-16be):34
// Text bytes len(utf-32le):68
// Tsu (local) charpoint hex:0x30c3
// Tsu (arr) bytes:e38383 ([0xe3] , [0x83] , [0x83] )
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment