tkanai/index.html

## index.html
<!DOCTYPE html>
<html lang="en">
<head>
<title>Text offset check sheet</title>
<meta charset="utf-8" />
<link href='http://fonts.googleapis.com/css?family=Slabo+27px' rel='stylesheet' type='text/css'>
<script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
<script type="text/javascript">
	$(function(){
		$('td.length').each(function(){
			var prev = this.parentNode.getElementsByClassName('rep')[0];
			var data = prev.innerText;
			data = prev.textContent;
			$(this).append(data.length);
		});
	});
</script>
<style type="text/css">
	body {font-family: 'Slabo 27px', serif;}
	td:nth-child(2){
		background-color:lightblue;
	}
	td:nth-child(4){
		background-color:lightblue;
	}
</style>
</style>
</head>
<body>
<table>
	<thead>
		<tr><th>Category</th><th>Unicode</th><th>Representation</th><th>Length</th></tr>
	</thead>
	<tbody>
		<tr><td>Basic Latin</td><td>U+0041</td><td class="rep">A</td><td class="length"></td></tr>
		<tr><td>Basic Latin</td><td>U+0066 U+0069</td><td class="rep">fi</td><td class="length"></td></tr>
		<tr><td>Latin-1 Supplement</td><td>U+00DC</td><td class="rep">Ü</td><td class="length"></td></tr>
		<tr><td>Controls (NBSP:character entity)</td><td>U+00A0</td><td class="rep">&nbsp;</td><td class="length"></td></tr>
		<tr><td>Combining Diacritical Marks</td><td>U+0041 U+0301</td><td class="rep">Á</td><td class="length"></td></tr>
		<tr><td>CJK</td><td>U+5409</td><td class="rep">吉</td><td class="length"></td></tr>
		<tr><td>CJK (BMP)</td><td>U+20BB7 (D842<sub>16</sub> DFB7<sub>16</sub>)</td><td class="rep">𠮷</td><td class="length"></td></tr>
		<tr><td>CJK</td><td>U+53F1</td><td class="rep">叱</td><td class="length"></td></tr>
		<tr><td>CJK (IVD)</td><td>U+53F1 U+E0101 (53F1<sub>16</sub> DB40<sub>16</sub> DD01<sub>16</sub>)</td><td class="rep">叱󠄁</td><td class="length"></td></tr>
		<tr><td>EMOJI</td><td>U+1F466 (D83D<sub>16</sub> DC66<sub>16</sub>)</td><td class="rep">👦</td><td class="length"></td></tr>
	</tbody>
</table>
</body>
</html>

## result.md

      
    Raw
  

              result.md
            
          
Language
U+0041
U+0066 U+0069
U+00DC
U+00A0
U+0041 U+0301
U+5409
U+20BB7
U+53F1
U+53F1 U+E0101
U+1F466


Javascript
1
2
1
1
2
1
2
1
3
2


Python 3.4.1 (Mac)
1
2
1
1
2
1
1
1
2
1


Python 2.7.5 (Mac)
1
2
1
1
2
1
2
1
3
2


Ruby 2.0.0 (Mac)
1
2
1
1
2
1
1
1
2
1


PHP 5.4.30 (Mac)
1
2
1
1
2
1
1
1
2
1


## textcheck.php
<?php
$targets = array('A', "fi", "Ü", " ", "Á", "吉", "𠮷", "叱", "叱󠄁", "👦");
for($i=0; $i<count($targets); $i++){
	echo " | ";
#DEBUG: To check whether each string is encoded in a correct Unicode code point or not.
#	echo base_convert(bin2hex(mb_convert_encoding($targets[$i], 'UTF-16BE', 'UTF-8')), 16, 16);

	echo iconv_strlen($targets[$i], "UTF-8");
}
echo " |\n";
?>

## textcheck.py
#coding: UTF-8

from __future__ import print_function

if __name__ == "__main__":
	targets = [u'A', u"fi", u"Ü", u" ", u"Á", u"吉", u"𠮷", u"叱", u"叱󠄁", u"👦"];
	for target in targets:
		print(' | %d' % len(target), end="")
##DEBUG: To check whether each string is encoded in a correct Unicode code point or not.
#		for c in target:
#			print('%04x' % ord(c), end="")
	print(' |');

## textcheck.rb
#encoding; utf-8
targets = ['A', "fi", "Ü", " ", "Á", "吉", "𠮷", "叱", "叱󠄁", "👦"];

for target in targets do
	print(" | ", target.length)
#DEBUG: To check whether each string is encoded in a correct Unicode code point or not.
#	v = target.encode('UTF-16BE')
#	v.codepoints {|cp| print cp.to_s(16) + " "}
end
print("| \n")
	<!DOCTYPE html>
	<html lang="en">
	<head>
	<title>Text offset check sheet</title>
	<meta charset="utf-8" />
	<link href='http://fonts.googleapis.com/css?family=Slabo+27px' rel='stylesheet' type='text/css'>
	<script type="text/javascript" src="http://ajax.googleapis.com/ajax/libs/jquery/2.1.1/jquery.min.js"></script>
	<script type="text/javascript">
	$(function(){
	$('td.length').each(function(){
	var prev = this.parentNode.getElementsByClassName('rep')[0];
	var data = prev.innerText;
	data = prev.textContent;
	$(this).append(data.length);
	});
	});
	</script>
	<style type="text/css">
	body {font-family: 'Slabo 27px', serif;}
	td:nth-child(2){
	background-color:lightblue;
	}
	td:nth-child(4){
	background-color:lightblue;
	}
	</style>
	</style>
	</head>
	<body>
	<table>
	<thead>
	<tr><th>Category</th><th>Unicode</th><th>Representation</th><th>Length</th></tr>
	</thead>
	<tbody>
	<tr><td>Basic Latin</td><td>U+0041</td><td class="rep">A</td><td class="length"></td></tr>
	<tr><td>Basic Latin</td><td>U+0066 U+0069</td><td class="rep">fi</td><td class="length"></td></tr>
	<tr><td>Latin-1 Supplement</td><td>U+00DC</td><td class="rep">Ü</td><td class="length"></td></tr>
	<tr><td>Controls (NBSP:character entity)</td><td>U+00A0</td><td class="rep"> </td><td class="length"></td></tr>
	<tr><td>Combining Diacritical Marks</td><td>U+0041 U+0301</td><td class="rep">Á</td><td class="length"></td></tr>
	<tr><td>CJK</td><td>U+5409</td><td class="rep">吉</td><td class="length"></td></tr>
	<tr><td>CJK (BMP)</td><td>U+20BB7 (D842<sub>16</sub> DFB7<sub>16</sub>)</td><td class="rep">𠮷</td><td class="length"></td></tr>
	<tr><td>CJK</td><td>U+53F1</td><td class="rep">叱</td><td class="length"></td></tr>
	<tr><td>CJK (IVD)</td><td>U+53F1 U+E0101 (53F1<sub>16</sub> DB40<sub>16</sub> DD01<sub>16</sub>)</td><td class="rep">叱󠄁</td><td class="length"></td></tr>
	<tr><td>EMOJI</td><td>U+1F466 (D83D<sub>16</sub> DC66<sub>16</sub>)</td><td class="rep">👦</td><td class="length"></td></tr>
	</tbody>
	</table>
	</body>
	</html>
Language	U+0041	U+0066 U+0069	U+00DC	U+00A0	U+0041 U+0301	U+5409	U+20BB7	U+53F1	U+53F1 U+E0101	U+1F466
Javascript	1	2	1	1	2	1	2	1	3	2
Python 3.4.1 (Mac)	1	2	1	1	2	1	1	1	2	1
Python 2.7.5 (Mac)	1	2	1	1	2	1	2	1	3	2
Ruby 2.0.0 (Mac)	1	2	1	1	2	1	1	1	2	1
PHP 5.4.30 (Mac)	1	2	1	1	2	1	1	1	2	1
	<?php
	$targets = array('A', "fi", "Ü", " ", "Á", "吉", "𠮷", "叱", "叱󠄁", "👦");
	for($i=0; $i<count($targets); $i++){
	echo " \| ";
	#DEBUG: To check whether each string is encoded in a correct Unicode code point or not.
	# echo base_convert(bin2hex(mb_convert_encoding($targets[$i], 'UTF-16BE', 'UTF-8')), 16, 16);

	echo iconv_strlen($targets[$i], "UTF-8");
	}
	echo " \|\n";
	?>
	#coding: UTF-8

	from __future__ import print_function

	if __name__ == "__main__":
	targets = [u'A', u"fi", u"Ü", u" ", u"Á", u"吉", u"𠮷", u"叱", u"叱󠄁", u"👦"];
	for target in targets:
	print(' \| %d' % len(target), end="")
	##DEBUG: To check whether each string is encoded in a correct Unicode code point or not.
	# for c in target:
	# print('%04x' % ord(c), end="")
	print(' \|');
	#encoding; utf-8
	targets = ['A', "fi", "Ü", " ", "Á", "吉", "𠮷", "叱", "叱󠄁", "👦"];

	for target in targets do
	print(" \| ", target.length)
	#DEBUG: To check whether each string is encoded in a correct Unicode code point or not.
	# v = target.encode('UTF-16BE')
	# v.codepoints {\|cp\| print cp.to_s(16) + " "}
	end
	print("\| \n")