Skip to content

Instantly share code, notes, and snippets.

@pleonex
Created September 28, 2014 01:03
Show Gist options
  • Save pleonex/c5b04eb9dcecd93628d7 to your computer and use it in GitHub Desktop.
Save pleonex/c5b04eb9dcecd93628d7 to your computer and use it in GitHub Desktop.
Encode / Decode a string with the Ninokuni default encoding table.
#!/bin/python
"""
ninostring.py: Encode / Decode a string with the Ninokuni default table.
Copyright (C) 2014 pleonex
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
"""
class TableEntry:
""" Reperesent an entry in the table encoding. """
def __init__(self, first_char, idx_start, num_chars):
""" Create a new instance of the class.
Args:
first_char (int list): First shift-jis char in the range.
idx_start (int): First encoding byte in the range.
num_chars (int): Number of chars in this range.
"""
self.first_char = unicode(first_char, 'shift-jis')
self.idx_start = idx_start
self.num_chars = num_chars
def sum_char(self, c, i):
""" Increments a char with a number.
Args:
c (char): Char to increment.
i (int): Increment.
Returns:
char: The char with the increment.
"""
return unichr(ord(c) + i)
def get_first_char(self):
""" Gets the first shift-jis char in the range.
Returns:
char: The first char in the range.
"""
return self.first_char
def get_last_char(self):
""" Gets the last shift-jis char in the range.
Returns:
char: The last char in the range.
"""
return self.sum_char(self.get_first_char(), self.get_num_chars())
def get_idx_start(self):
""" Gets the first encoding byte in the range.
Returns:
int: The first encoding byte in the range.
"""
return self.idx_start
def get_idx_end(self):
""" Gets the last encoding byte in the range.
Returns:
int: The last encoding byte in the range.
"""
return self.get_idx_start() + self.get_num_chars()
def get_num_chars(self):
""" Gets the number of chars in the range.
Returns:
int: The number of chars in the range.
"""
return self.num_chars
def contains_char(self, c):
""" Checks if the decoded char it's in this encoding entry range.
Args:
c (unicode): Char to check.
Returns:
bool: If the char is in the range.
"""
return c >= self.get_first_char() and c <= self.get_last_char()
def contains_idx(self, b):
""" Checks if the encoded integer it's in this encoding entry range.
Args:
b (int): Encoded integer to check.
Returns:
bool: If the encoded integer is in the range.
"""
return b >= self.get_idx_start() and b <= self.get_idx_end()
def decode_char(self, b):
""" Decode a encoded integer to a char using this range.
Args:
b (int): Encoded integer to decode.
Returns:
unicode: Decoded unicode char.
"""
return self.sum_char(self.get_first_char(), (b - self.get_idx_start()))
def encode_char(self, c):
""" Encode a char to a encoded integer using this range.
Args:
c (unicode): Char to encode.
Returns:
int: Encoded intenger.
"""
return self.get_idx_start() + (ord(c) - ord(self.get_first_char()))
def decode(bs, entries):
""" Decode a list of encoded integers to a string.
Args:
bs (int list): Encoded integer list.
entries (TableEntry list): Table to use to decode.
Returns:
unicode: Decoded unicode string.
"""
newS = unicode()
for b in bs:
# print str(b) + " ->",
# Search the table
table = None
for t in entries:
if (t.contains_idx(b)):
table = t
# Decode the integer
# print str(table.get_idx_start())
newS += table.decode_char(b)
return newS
def encode(ss, entries):
""" Encode a string to a list of encoded integers.
Args:
ss (unicode): Unico string to encode.
entries (TableEntry list): Table to use to encode.
Returns:
int list: Encoded integer list.
"""
newS = []
for c in ss:
# print c.encode('utf-8') + " ->",
# Search the table
table = None
for t in entries:
if (t.contains_char(c)):
table = t
# Encode the char
# print table.get_first_char().encode('utf-8')
newS.append(table.encode_char(c))
return newS
if __name__ == "__main__":
# Create the encoding table
entries = []
entries.append(TableEntry('\x81\x40', 0x01, 0x01))
entries.append(TableEntry('\x82\x4F', 0x02, 0x0A))
entries.append(TableEntry('\x82\x9F', 0x0C, 0x53))
entries.append(TableEntry('\x83\x40', 0x5F, 0x57))
entries.append(TableEntry('\x81\x5B', 0xB6, 0x01))
entries.append(TableEntry('\x81\x60', 0xB7, 0x01))
entries.append(TableEntry('\x81\x48', 0xB8, 0x02))
entries.append(TableEntry('\x81\x41', 0xBA, 0x01))
entries.append(TableEntry('\x81\x42', 0xBB, 0x01))
# Gets the string to encode in unicode format
s = unicode(u'\u30ce\u30fc\u30b3\u30e1\u30f3\u30c8')
print s.encode('utf-8')
# Encode it
enc = encode(s, entries)
print "[",
for e in enc:
print hex(e),
print "]"
# Decode it
dec = decode(enc, entries)
print dec.encode('utf-8')
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment