pleonex/ninostring.py

## ninostring.py
#!/bin/python
"""
    ninostring.py: Encode / Decode a string with the Ninokuni default table.
    Copyright (C) 2014  pleonex

    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 2 of the License, or
    (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""


class TableEntry:
    """ Reperesent an entry in the table encoding. """

    def __init__(self, first_char, idx_start, num_chars):
        """ Create a new instance of the class.

        Args:
          first_char (int list): First shift-jis char in the range.
          idx_start (int): First encoding byte in the range.
          num_chars  (int): Number of chars in this range.
        """
        self.first_char = unicode(first_char, 'shift-jis')
        self.idx_start = idx_start
        self.num_chars = num_chars

    def sum_char(self, c, i):
        """ Increments a char with a number.

        Args:
          c (char): Char to increment.
          i (int): Increment.

        Returns:
          char: The char with the increment.
        """
        return unichr(ord(c) + i)

    def get_first_char(self):
        """ Gets the first shift-jis char in the range.

        Returns:
          char: The first char in the range.
        """
        return self.first_char

    def get_last_char(self):
        """ Gets the last shift-jis char in the range.

        Returns:
          char: The last char in the range.
        """
        return self.sum_char(self.get_first_char(), self.get_num_chars())

    def get_idx_start(self):
        """ Gets the first encoding byte in the range.

        Returns:
          int: The first encoding byte in the range.
        """
        return self.idx_start

    def get_idx_end(self):
        """ Gets the last encoding byte in the range.

        Returns:
          int: The last encoding byte in the range.
        """
        return self.get_idx_start() + self.get_num_chars()

    def get_num_chars(self):
        """ Gets the number of chars in the range.

        Returns:
          int: The number of chars in the range.
        """
        return self.num_chars

    def contains_char(self, c):
        """ Checks if the decoded char it's in this encoding entry range.

        Args:
          c (unicode): Char to check.

        Returns:
          bool: If the char is in the range.
        """
        return c >= self.get_first_char() and c <= self.get_last_char()

    def contains_idx(self, b):
        """ Checks if the encoded integer it's in this encoding entry range.

        Args:
          b (int): Encoded integer to check.

        Returns:
          bool: If the encoded integer is in the range.
        """
        return b >= self.get_idx_start() and b <= self.get_idx_end()

    def decode_char(self, b):
        """ Decode a encoded integer to a char using this range.

        Args:
          b (int): Encoded integer to decode.

        Returns:
          unicode: Decoded unicode char.
        """
        return self.sum_char(self.get_first_char(), (b - self.get_idx_start()))

    def encode_char(self, c):
        """ Encode a char to a encoded integer using this range.

        Args:
          c (unicode): Char to encode.

        Returns:
          int: Encoded intenger.
        """
        return self.get_idx_start() + (ord(c) - ord(self.get_first_char()))


def decode(bs, entries):
    """ Decode a list of encoded integers to a string.

    Args:
      bs (int list): Encoded integer list.
      entries (TableEntry list): Table to use to decode.

    Returns:
      unicode: Decoded unicode string.
    """
    newS = unicode()
    for b in bs:
        # print str(b) + " ->",

        # Search the table
        table = None
        for t in entries:
            if (t.contains_idx(b)):
                table = t

        # Decode the integer
        # print str(table.get_idx_start())
        newS += table.decode_char(b)

    return newS


def encode(ss, entries):
    """ Encode a string to a list of encoded integers.

    Args:
      ss (unicode): Unico string to encode.
      entries (TableEntry list): Table to use to encode.

    Returns:
      int list: Encoded integer list.
    """
    newS = []
    for c in ss:
        # print c.encode('utf-8') + " ->",

        # Search the table
        table = None
        for t in entries:
            if (t.contains_char(c)):
                table = t

        # Encode the char
        # print table.get_first_char().encode('utf-8')
        newS.append(table.encode_char(c))

    return newS

if __name__ == "__main__":
    # Create the encoding table
    entries = []
    entries.append(TableEntry('\x81\x40', 0x01, 0x01))
    entries.append(TableEntry('\x82\x4F', 0x02, 0x0A))
    entries.append(TableEntry('\x82\x9F', 0x0C, 0x53))
    entries.append(TableEntry('\x83\x40', 0x5F, 0x57))
    entries.append(TableEntry('\x81\x5B', 0xB6, 0x01))
    entries.append(TableEntry('\x81\x60', 0xB7, 0x01))
    entries.append(TableEntry('\x81\x48', 0xB8, 0x02))
    entries.append(TableEntry('\x81\x41', 0xBA, 0x01))
    entries.append(TableEntry('\x81\x42', 0xBB, 0x01))

    # Gets the string to encode in unicode format
    s = unicode(u'\u30ce\u30fc\u30b3\u30e1\u30f3\u30c8')
    print s.encode('utf-8')

    # Encode it
    enc = encode(s, entries)
    print "[",
    for e in enc:
        print hex(e),
    print "]"

    # Decode it
    dec = decode(enc, entries)
    print dec.encode('utf-8')
	#!/bin/python
	"""
	ninostring.py: Encode / Decode a string with the Ninokuni default table.
	Copyright (C) 2014 pleonex

	This program is free software: you can redistribute it and/or modify
	it under the terms of the GNU General Public License as published by
	the Free Software Foundation, either version 2 of the License, or
	(at your option) any later version.

	This program is distributed in the hope that it will be useful,
	but WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
	GNU General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program. If not, see <http://www.gnu.org/licenses/>.
	"""


	class TableEntry:
	""" Reperesent an entry in the table encoding. """

	def __init__(self, first_char, idx_start, num_chars):
	""" Create a new instance of the class.

	Args:
	first_char (int list): First shift-jis char in the range.
	idx_start (int): First encoding byte in the range.
	num_chars (int): Number of chars in this range.
	"""
	self.first_char = unicode(first_char, 'shift-jis')
	self.idx_start = idx_start
	self.num_chars = num_chars

	def sum_char(self, c, i):
	""" Increments a char with a number.

	Args:
	c (char): Char to increment.
	i (int): Increment.

	Returns:
	char: The char with the increment.
	"""
	return unichr(ord(c) + i)

	def get_first_char(self):
	""" Gets the first shift-jis char in the range.

	Returns:
	char: The first char in the range.
	"""
	return self.first_char

	def get_last_char(self):
	""" Gets the last shift-jis char in the range.

	Returns:
	char: The last char in the range.
	"""
	return self.sum_char(self.get_first_char(), self.get_num_chars())

	def get_idx_start(self):
	""" Gets the first encoding byte in the range.

	Returns:
	int: The first encoding byte in the range.
	"""
	return self.idx_start

	def get_idx_end(self):
	""" Gets the last encoding byte in the range.

	Returns:
	int: The last encoding byte in the range.
	"""
	return self.get_idx_start() + self.get_num_chars()

	def get_num_chars(self):
	""" Gets the number of chars in the range.

	Returns:
	int: The number of chars in the range.
	"""
	return self.num_chars

	def contains_char(self, c):
	""" Checks if the decoded char it's in this encoding entry range.

	Args:
	c (unicode): Char to check.

	Returns:
	bool: If the char is in the range.
	"""
	return c >= self.get_first_char() and c <= self.get_last_char()

	def contains_idx(self, b):
	""" Checks if the encoded integer it's in this encoding entry range.

	Args:
	b (int): Encoded integer to check.

	Returns:
	bool: If the encoded integer is in the range.
	"""
	return b >= self.get_idx_start() and b <= self.get_idx_end()

	def decode_char(self, b):
	""" Decode a encoded integer to a char using this range.

	Args:
	b (int): Encoded integer to decode.

	Returns:
	unicode: Decoded unicode char.
	"""
	return self.sum_char(self.get_first_char(), (b - self.get_idx_start()))

	def encode_char(self, c):
	""" Encode a char to a encoded integer using this range.

	Args:
	c (unicode): Char to encode.

	Returns:
	int: Encoded intenger.
	"""
	return self.get_idx_start() + (ord(c) - ord(self.get_first_char()))


	def decode(bs, entries):
	""" Decode a list of encoded integers to a string.

	Args:
	bs (int list): Encoded integer list.
	entries (TableEntry list): Table to use to decode.

	Returns:
	unicode: Decoded unicode string.
	"""
	newS = unicode()
	for b in bs:
	# print str(b) + " ->",

	# Search the table
	table = None
	for t in entries:
	if (t.contains_idx(b)):
	table = t

	# Decode the integer
	# print str(table.get_idx_start())
	newS += table.decode_char(b)

	return newS


	def encode(ss, entries):
	""" Encode a string to a list of encoded integers.

	Args:
	ss (unicode): Unico string to encode.
	entries (TableEntry list): Table to use to encode.

	Returns:
	int list: Encoded integer list.
	"""
	newS = []
	for c in ss:
	# print c.encode('utf-8') + " ->",

	# Search the table
	table = None
	for t in entries:
	if (t.contains_char(c)):
	table = t

	# Encode the char
	# print table.get_first_char().encode('utf-8')
	newS.append(table.encode_char(c))

	return newS

	if __name__ == "__main__":
	# Create the encoding table
	entries = []
	entries.append(TableEntry('\x81\x40', 0x01, 0x01))
	entries.append(TableEntry('\x82\x4F', 0x02, 0x0A))
	entries.append(TableEntry('\x82\x9F', 0x0C, 0x53))
	entries.append(TableEntry('\x83\x40', 0x5F, 0x57))
	entries.append(TableEntry('\x81\x5B', 0xB6, 0x01))
	entries.append(TableEntry('\x81\x60', 0xB7, 0x01))
	entries.append(TableEntry('\x81\x48', 0xB8, 0x02))
	entries.append(TableEntry('\x81\x41', 0xBA, 0x01))
	entries.append(TableEntry('\x81\x42', 0xBB, 0x01))

	# Gets the string to encode in unicode format
	s = unicode(u'\u30ce\u30fc\u30b3\u30e1\u30f3\u30c8')
	print s.encode('utf-8')

	# Encode it
	enc = encode(s, entries)
	print "[",
	for e in enc:
	print hex(e),
	print "]"

	# Decode it
	dec = decode(enc, entries)
	print dec.encode('utf-8')