quoll/CodePoint.java

## CodePoint.java
package util;

import java.util.*;

/**
 * This method fills in where java.lang.Character can't manage 21 bit Unicode.
 * Represents Unicode Scalar Values, U+0000 to U+10FFFF
 *
 * @author Paul Gearon
 */
public class CodePoint implements java.io.Serializable, Comparable<CodePoint> {

  /** The serialization ID. */
  private static final long serialVersionUID = 3212781738993088980L;

  /** The internal representation. */
  private final int data;

  /** The maximum value of a Unicode code point. */
  public static final CodePoint MAX_VALUE = new CodePoint(Character.MAX_CODE_POINT);

  /** The minimum value of a Unicode code point. */
  public static final CodePoint MIN_VALUE = new CodePoint(Character.MIN_CODE_POINT);


  /**
   * Constructs a new CodePoint for a given Character value.
   * @param value The value to be represented by the CodePoint.
   */
  public CodePoint(Character value) {
    data = value.charValue();
  }


  /**
   * Constructs a new CodePoint for a given char value.
   * @param value The value to be represented by the CodePoint.
   */
  public CodePoint(char value) {
    data = value;
  }


  /**
   * Constructs a new CodePoint for a given Unicode scalar value.
   * @param value The Unicode scalar value of this CodePoint.
   */
  public CodePoint(int value) {
    if (!Character.isValidCodePoint(value)) throw new IllegalArgumentException("Code point out of range");
    data = value;
  }


  /**
   * Constructs a new codepoint for a given char array.
   * @param value The value to be represented by the codepoint in UTF-16.
   */
  public CodePoint(char[] value) {
    data = Character.codePointAt(value, 0);
  }


  /**
   * Constructs a new codepoint for a given Character array.
   * @param value The value to be represented by the codepoint in UTF-16.
   */
  public CodePoint(Character[] value) {
    char[] tmpValue;
    if (Character.isHighSurrogate(value[0])) {
      if (value.length < 2) throw new IllegalArgumentException("Malformed UniCode character array");
      tmpValue = new char[] { value[0], value[1] };
    } else {
      tmpValue = new char[] { value[0] };
    }
    data = Character.codePointAt(tmpValue, 0);
  }


  /**
   * Constructs a new codepoint from an offset into a given char array.
   * @param chars The UTF-16 array containing the CodePoints.
   * @param offset The offset into the array to read from.
   */
  public CodePoint(char[] chars, int offset) {
    data = Character.codePointAt(chars, offset);
  }


  /**
   * Constructs a new codepoint for a given Character array.
   * @param characters The UTF-16 array containing the CodePoints.
   * @param offset The offset into the array to read from.
   */
  public CodePoint(Character[] characters, int offset) {
    char[] tmpValue;
    if (Character.isHighSurrogate(characters[0].charValue())) {
      if (characters.length - offset < 2) throw new IllegalArgumentException("Malformed UniCode character array");
      tmpValue = new char[] { characters[offset], characters[offset + 1] };
    } else {
      tmpValue = new char[] { characters[offset] };
    }
    data = Character.codePointAt(tmpValue, 0);
  }


  /**
   * Gets a Unicode scalar value for use with java.lang.Character methods.
   * @return The UniCode scalar value of this CodePoint.
   */
  public int intValue() {
    return data;
  }


  /**
   * Convert this code point to its UTF-16 representation stored in a char array.
   * @return An array containing the UTF-16 representation of the current code point.
   */
  public char[] toChars() {
    return Character.toChars(data);
  }


  /**
   * Convert this code point to its UTF-16 representation stored in a provided char array.
   * @param dst An array of char in which the codePoint's UTF-16 value is stored.
   * @param dstIndex The start index into the dst array where the converted value is stored.
   * @return 1 if the code point is a BMP code point, 2 if the code point is a supplementary code point.
   */
  public int toChars(char[] dst, int dstIndex) {
    return Character.toChars(data, dst, dstIndex);
  }


  /**
   * Convert this code point to its UTF-16 representation stored in a Character array.
   * @return An array containing the UTF-16 representation of the current code point.
   */
  public Character[] toCharacters() {
    return toCharacters(Character.toChars(data));
  }


  /**
   * Convert this code point to its UTF-16 representation stored in a provided char array.
   * @param dst An array of char in which the codePoint's UTF-16 value is stored.
   * @param dstIndex The start index into the dst array where the converted value is stored.
   * @return 1 if the code point is a BMP code point, 2 if the code point is a supplementary code point.
   */
  public int toCharacters(Character[] dst, int dstIndex) {
    char[] tmpDst = new char[2];
    int size = Character.toChars(data, tmpDst, 0);
    dst[dstIndex] = tmpDst[0];
    if (size == 2) dst[dstIndex + 1] = tmpDst[1];
    return size;
  }


  /**
   * Tests for equality.
   * @param o The object to compare to.
   * @return <code>true</code> if o is equal to this object.
   */
  public boolean equals(Object o) {
    return (o instanceof CodePoint) && ((CodePoint)o).data == data;
  }


  /**
   * Gets a hashcode for this object.
   * @return An integer hash code.
   */
  public int hashCode() {
    return data;
  }


  /**
   * Performs a comparison on another CodePoint object.
   * @param c The CodePoint to compare to.
   * @return A negative integer, zero, or a positive integer as this object is less than, equal to,
   * or greater than the specified object.
   */
  public int compareTo(CodePoint c) {
    // bit ranges guarantee that we can take data-c.data, but this is poor practice in general
    if (data == c.data) return 0;
    if (data < c.data) return -1;
    return 1;
  }


  /**
   * Returns a String object representing this codepoint's value.
   * @return A String version of this unicode character.
   */
  public String toString() {
    return new String(toChars());
  }


  /**
   * Determines if the specified codepoint is lowercase.
   * <p>
   * A codepoint is lowercase if its general category type, provided
   * by <code>CodePoint.getType()</code>, is
   * <code>LOWERCASE_LETTER</code>.
   * <p>
   * The following are examples of lowercase codepoints:
   * <p><blockquote><pre>
   * a b c d e f g h i j k l m n o p q r s t u v w x y z
   * '&#92;u00DF' '&#92;u00E0' '&#92;u00E1' '&#92;u00E2' '&#92;u00E3' '&#92;u00E4' '&#92;u00E5' '&#92;u00E6'
   * '&#92;u00E7' '&#92;u00E8' '&#92;u00E9' '&#92;u00EA' '&#92;u00EB' '&#92;u00EC' '&#92;u00ED' '&#92;u00EE'
   * '&#92;u00EF' '&#92;u00F0' '&#92;u00F1' '&#92;u00F2' '&#92;u00F3' '&#92;u00F4' '&#92;u00F5' '&#92;u00F6'
   * '&#92;u00F8' '&#92;u00F9' '&#92;u00FA' '&#92;u00FB' '&#92;u00FC' '&#92;u00FD' '&#92;u00FE' '&#92;u00FF'
   * </pre></blockquote>
   * <p> Many other Unicode codepoints are lowercase too.
   * <p>
   *
   * @return  <code>true</code> if the codepoint is lowercase;
   *          <code>false</code> otherwise.
   */
  public boolean isLowerCase() {
    return Character.isLowerCase(data);
  }


  /**
   * Determines if the codepoint is an uppercase codepoint.
   * <p>
   * A codepoint is uppercase if its general category type, provided by
   * <code>sodePoint.getType()</code>, is <code>UPPERCASE_LETTER</code>.
   * <p>
   * The following are examples of uppercase codepoints:
   * <p><blockquote><pre>
   * A B C D E F G H I J K L M N O P Q R S T U V W X Y Z
   * '&#92;u00C0' '&#92;u00C1' '&#92;u00C2' '&#92;u00C3' '&#92;u00C4' '&#92;u00C5' '&#92;u00C6' '&#92;u00C7'
   * '&#92;u00C8' '&#92;u00C9' '&#92;u00CA' '&#92;u00CB' '&#92;u00CC' '&#92;u00CD' '&#92;u00CE' '&#92;u00CF'
   * '&#92;u00D0' '&#92;u00D1' '&#92;u00D2' '&#92;u00D3' '&#92;u00D4' '&#92;u00D5' '&#92;u00D6' '&#92;u00D8'
   * '&#92;u00D9' '&#92;u00DA' '&#92;u00DB' '&#92;u00DC' '&#92;u00DD' '&#92;u00DE'
   * </pre></blockquote>
   * <p> Many other Unicode codepoints are uppercase too.<p>
   *
   * @return  <code>true</code> if the codepoint is uppercase;
   *          <code>false</code> otherwise.
   */
  public boolean isUpperCase() {
    return Character.isUpperCase(data);
  }


  /**
   * Determines if the codepoint is a titlecase codepoint.
   * <p>
   * A codepoint is a titlecase codepoint if its general
   * category type, provided by <code>CodePoint.getType()</code>,
   * is <code>TITLECASE_LETTER</code>.
   * <p>
   * Some codepoints look like pairs of Latin letters. For example, there
   * is an uppercase letter that looks like "LJ" and has a corresponding
   * lowercase letter that looks like "lj". A third form, which looks like "Lj",
   * is the appropriate form to use when rendering a word in lowercase
   * with initial capitals, as for a book title.
   * <p>
   * These are some of the Unicode codepoints for which this method returns
   * <code>true</code>:
   * <ul>
   * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON</code>
   * <li><code>LATIN CAPITAL LETTER L WITH SMALL LETTER J</code>
   * <li><code>LATIN CAPITAL LETTER N WITH SMALL LETTER J</code>
   * <li><code>LATIN CAPITAL LETTER D WITH SMALL LETTER Z</code>
   * </ul>
   * <p> Many other Unicode codepoints are titlecase too.<p>
   *
   * @return  <code>true</code> if the codepoint is titlecase;
   *          <code>false</code> otherwise.
   */
  public boolean isTitleCase() {
    return Character.isTitleCase(data);
  }


  /**
   * Determines if the codepoint is a digit.
   * <p>
   * A codepoint is a digit if its general category type, provided
   * by <code>CodePoint.getType()</code>, is
   * <code>DECIMAL_DIGIT_NUMBER</code>.
   * <p>
   * Some Unicode codepoint ranges that contain digits:
   * <ul>
   * <li><code>'&#92;u0030'</code> through <code>'&#92;u0039'</code>,
   *     ISO-LATIN-1 digits (<code>'0'</code> through <code>'9'</code>)
   * <li><code>'&#92;u0660'</code> through <code>'&#92;u0669'</code>,
   *	   Arabic-Indic digits
   * <li><code>'&#92;u06F0'</code> through <code>'&#92;u06F9'</code>,
   * 	   Extended Arabic-Indic digits
   * <li><code>'&#92;u0966'</code> through <code>'&#92;u096F'</code>,
   *	   Devanagari digits
   * <li><code>'&#92;uFF10'</code> through <code>'&#92;uFF19'</code>,
   *	   Fullwidth digits
   * </ul>
   *
   * Many other codepoint ranges contain digits as well.
   *
   * @return  <code>true</code> if the codepoint is a digit;
   *          <code>false</code> otherwise.
   */
  public boolean isDigit() {
    return Character.isDigit(data);
  }


  /**
   * Determines if a codepoint is defined in Unicode.
   * <p>
   * A codepoint is defined if at least one of the following is true:
   * <ul>
   * <li>It has an entry in the UnicodeData file.
   * <li>It has a value in a range defined by the UnicodeData file.
   * </ul>
   *
   * @return  <code>true</code> if the codepoint has a defined meaning
   *          in Unicode; <code>false</code> otherwise.
   */
  public boolean isDefined() {
    return Character.isDefined(data);
  }


  /**
   * Determines if the codepoint is a letter.
   * <p>
   * A codepoint is considered to be a letter if its general
   * category type, provided by <code>CodePoint.getType()</code>,
   * is any of the following:
   * <ul>
   * <li> <code>UPPERCASE_LETTER</code>
   * <li> <code>LOWERCASE_LETTER</code>
   * <li> <code>TITLECASE_LETTER</code>
   * <li> <code>MODIFIER_LETTER</code>
   * <li> <code>OTHER_LETTER</code>
   * </ul>
   *
   * Not all letters have case. Many codepoints are
   * letters but are neither uppercase nor lowercase nor titlecase.
   *
   * @return  <code>true</code> if the codepoints is a letter;
   *          <code>false</code> otherwise.
   */
  public boolean isLetter() {
    return Character.isLetter(data);
  }


  /**
   * Determines if the codepoint is a letter or digit.
   * <p>
   * A codepoints is considered to be a letter or digit if either
   * <code>CodePoint.isLetter()</code> or
   * <code>CodePoint.isDigit()</code> returns
   * <code>true</code> for the codepoint.
   *
   * @return  <code>true</code> if the codepoint is a letter or digit;
   *          <code>false</code> otherwise.
   */
  public boolean isLetterOrDigit() {
    return Character.isLetterOrDigit(data);
  }


  /**
   * Determines if the codepoint is
   * permissible as the first codepoint in a Java identifier.
   * <p>
   * A codepoint may start a Java identifier if and only if
   * one of the following conditions is true:
   * <ul>
   * <li> {@link #isLetter() isLetter()} returns <code>true</code>
   * <li> {@link #getType() getType()} returns <code>LETTER_NUMBER</code>
   * <li> codepoint is a currency symbol (such as "$")
   * <li> codepoint is a connecting punctuation codepoint (such as "_").
   * </ul>
   *
   * @return  <code>true</code> if the codepoint may start a Java identifier;
   *          <code>false</code> otherwise.
   */
  public boolean isJavaIdentifierStart() {
    return Character.isJavaIdentifierStart(data);
  }


  /**
   * Determines if the codepoint may be part of a Java
   * identifier as other than the first codepoint.
   * <p>
   * A codepoint may be part of a Java identifier if any of the following
   * are true:
   * <ul>
   * <li>  it is a letter
   * <li>  it is a currency symbol (such as <code>'$'</code>)
   * <li>  it is a connecting punctuation codepoint (such as <code>'_'</code>)
   * <li>  it is a digit
   * <li>  it is a numeric letter (such as a Roman numeral codepoint)
   * <li>  it is a combining mark
   * <li>  it is a non-spacing mark
   * <li> <code>isIdentifierIgnorable</code> returns
   * <code>true</code> for the codepoint
   * </ul>
   *
   * @return <code>true</code> if the codepoint may be part of a
   * 		Java identifier; <code>false</code> otherwise.
   */
  public boolean isJavaIdentifierPart() {
    return Character.isJavaIdentifierPart(data);
  }


  /**
   * Determines if the codepoint is permissible as the
   * first codepoint in a Unicode identifier.
   * <p>
   * A codepoint may start a Unicode identifier if and only if
   * one of the following conditions is true:
   * <ul>
   * <li> {@link #isLetter() isLetter()} returns <code>true</code>
   * <li> {@link #getType() getType()} returns
   *      <code>LETTER_NUMBER</code>.
   * </ul>
   * @return  <code>true</code> if the codepoint may start a Unicode
   *          identifier; <code>false</code> otherwise.
   */
  public boolean isUnicodeIdentifierStart() {
    return Character.isUnicodeIdentifierStart(data);
  }


  /**
   * Determines if the codepoint may be part of a Unicode
   * identifier as other than the first codepoint.
   * <p>
   * A codepoint may be part of a Unicode identifier if and only if
   * one of the following statements is true:
   * <ul>
   * <li>  it is a letter
   * <li>  it is a connecting punctuation codepoint (such as <code>'_'</code>)
   * <li>  it is a digit
   * <li>  it is a numeric letter (such as a Roman numeral codepoint)
   * <li>  it is a combining mark
   * <li>  it is a non-spacing mark
   * <li> <code>isIdentifierIgnorable</code> returns
   * <code>true</code> for this codepoint.
   * </ul>
   *
   * @return  <code>true</code> if the codepoint may be part of a
   *          Unicode identifier; <code>false</code> otherwise.
   */
  public boolean isUnicodeIdentifierPart() {
    return Character.isUnicodeIdentifierPart(data);
  }


  /**
   * Determines if the codepoint should be regarded as
   * an ignorable codepoint in a Java identifier or a Unicode identifier.
   * <p>
   * The following Unicode codepoints are ignorable in a Java identifier
   * or a Unicode identifier:
   * <ul>
   * <li>ISO control codepoints that are not whitespace
   * <ul>
   * <li><code>'&#92;u0000'</code> through <code>'&#92;u0008'</code>
   * <li><code>'&#92;u000E'</code> through <code>'&#92;u001B'</code>
   * <li><code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>
   * </ul>
   *
   * <li>all codepoints that have the <code>FORMAT</code> general
   * category value
   * </ul>
   *
   * @return 	<code>true</code> if the codepoint is an ignorable control
   *          codepoint that may be part of a Java or Unicode identifier;
   *		 <code>false</code> otherwise.
   */
  public boolean isIdentifierIgnorable() {
    return Character.isIdentifierIgnorable(data);
  }


  /**
   * Converts the codepoint argument to lowercase using case
   * mapping information from the UnicodeData file.
   * <p>
   * Note that
   * <code>CodePoint.isLowerCase(CodePoint.toLowerCase())</code>
   * does not always return <code>true</code> for some ranges of
   * codepoint, particularly those that are symbols or ideographs.
   *
   * @return  the lowercase equivalent of the codepoint, if any;
   *          otherwise, the codepoint itself.
   */
  public CodePoint toLowerCase() {
    int lc = Character.toLowerCase(data);
    return lc == data ? this : new CodePoint(lc);
  }


  /**
   * Converts the codepoint argument to uppercase using case mapping
   * information from the UnicodeData file.
   * <p>
   * Note that
   * <code>CodePoint.isUpperCase(CodePoint.toUpperCase())</code>
   * does not always return <code>true</code> for some ranges of
   * codepoints, particularly those that are symbols or ideographs.
   *
   * @return  the uppercase equivalent of the codepoint, if any;
   *          otherwise, an equal codepoint.
   */
  public CodePoint toUpperCase() {
    int uc = Character.toUpperCase(data);
    return uc == data ? this : new CodePoint(uc);
  }


  /**
   * Converts the codepoint argument to titlecase using case mapping
   * information from the UnicodeData file. If a codepoint has no
   * explicit titlecase mapping and is not itself a titlecase codepoint
   * according to UnicodeData, then the uppercase mapping is
   * returned as an equivalent titlecase mapping. If the
   * codepoint is already titlecase, the same codepoint value will be
   * returned.
   * <p>
   * Note that
   * <code>codepoint.isTitleCase(codepoint.toTitleCase())</code>
   * does not always return <code>true</code> for some ranges of
   * codepoints.
   *
   * @return  the titlecase equivalent of the codepoint, if any;
   *          otherwise, an equal codepoint.
   */
  public CodePoint toTitleCase() {
    int tc = Character.toTitleCase(data);
    return tc == data ? this : new CodePoint(tc);
  }


  /**
   * Returns the numeric value of the codepoint in the specified radix.
   * <p>
   * If the radix is not in the range <code>MIN_RADIX</code>&nbsp;&lt;=
   * <code>radix</code>&nbsp;&lt;= <code>MAX_RADIX</code> or if the
   * value of the codepoint is not a valid digit in the specified
   * radix, <code>-1</code> is returned. A codepoint is a valid digit
   * if at least one of the following is true:
   * <ul>
   * <li>The method <code>isDigit</code> is <code>true</code> of the codepoint
   *     and the Unicode decimal digit value of the codepoint (or its
   *     single-codepoint decomposition) is less than the specified radix.
   *     In this case the decimal digit value is returned.
   * <li>The codepoint is one of the uppercase Latin letters
   *     <code>'A'</code> through <code>'Z'</code> and its code is less than
   *     <code>radix&nbsp;+ 'A'&nbsp;-&nbsp;10</code>.
   *     In this case, <code>code&nbsp;- 'A'&nbsp;+&nbsp;10</code>
   *     is returned.
   * <li>The codepoint is one of the lowercase Latin letters
   *     <code>'a'</code> through <code>'z'</code> and its code is less than
   *     <code>radix&nbsp;+ 'a'&nbsp;-&nbsp;10</code>.
   *     In this case, <code>code&nbsp;- 'a'&nbsp;+&nbsp;10</code>
   *     is returned.
   * </ul>
   *
   * @param   radix   the radix.
   * @return  the numeric value represented by the codepoint in the
   *          specified radix.
   */
  public int digit(int radix) {
    return Character.digit(data, radix);
  }


  /**
   * Returns the <code>int</code> value that the specified Unicode
   * codepoint represents. For example, the codepoint
   * <code>'&#92;u216C'</code> (the roman numeral fifty) will return
   * an int with a value of 50.
   * <p>
   * The letters A-Z in their uppercase (<code>'&#92;u0041'</code> through
   * <code>'&#92;u005A'</code>), lowercase
   * (<code>'&#92;u0061'</code> through <code>'&#92;u007A'</code>), and
   * full width variant (<code>'&#92;uFF21'</code> through
   * <code>'&#92;uFF3A'</code> and <code>'&#92;uFF41'</code> through
   * <code>'&#92;uFF5A'</code>) forms have numeric values from 10
   * through 35. This is independent of the Unicode specification,
   * which does not assign numeric values to these codepoint
   * values.
   * <p>
   * If the codepoint does not have a numeric value, then -1 is returned.
   * If the codepoint has a numeric value that cannot be represented as a
   * nonnegative integer (for example, a fractional value), then -2
   * is returned.
   *
   * @return  the numeric value of the codepoint, as a nonnegative <code>int</code>
   *           value; -2 if the codepoint has a numeric value that is not a
   *          nonnegative integer; -1 if the codepoint has no numeric value.
   */
  public int getNumericValue() {
    return Character.getNumericValue(data);
  }


  /**
   * Determines if the codepoint is a Unicode space codepoint.
   * A codepoint is considered to be a space codepoint if and only if
   * it is specified to be a space codepoint by the Unicode standard. This
   * method returns true if the codepoint's general category type is any of
   * the following:
   * <ul>
   * <li> <code>SPACE_SEPARATOR</code>
   * <li> <code>LINE_SEPARATOR</code>
   * <li> <code>PARAGRAPH_SEPARATOR</code>
   * </ul>
   *
   * @return 	<code>true</code> if the codepoint is a space codepoint;
   *		<code>false</code> otherwise.
   */
  public boolean isSpaceChar() {
    return Character.isSpaceChar(data);
  }


  /**
   * Determines if the codepoint is white space according to Java.
   * A codepoint is a Java whitespace codepoint if and only if it satisfies
   * one of the following criteria:
   * <ul>
   * <li> It is a Unicode space codepoint (<code>SPACE_SEPARATOR</code>,
   * 	    <code>LINE_SEPARATOR</code>, or <code>PARAGRAPH_SEPARATOR</code>)
   *      but is not also a non-breaking space (<code>'&#92;u00A0'</code>,
   *      <code>'&#92;u2007'</code>, <code>'&#92;u202F'</code>).
   * <li> It is <code>'&#92;u0009'</code>, HORIZONTAL TABULATION.
   * <li> It is <code>'&#92;u000A'</code>, LINE FEED.
   * <li> It is <code>'&#92;u000B'</code>, VERTICAL TABULATION.
   * <li> It is <code>'&#92;u000C'</code>, FORM FEED.
   * <li> It is <code>'&#92;u000D'</code>, CARRIAGE RETURN.
   * <li> It is <code>'&#92;u001C'</code>, FILE SEPARATOR.
   * <li> It is <code>'&#92;u001D'</code>, GROUP SEPARATOR.
   * <li> It is <code>'&#92;u001E'</code>, RECORD SEPARATOR.
   * <li> It is <code>'&#92;u001F'</code>, UNIT SEPARATOR.
   * </ul>
   *
   * @return  <code>true</code> if the codepoint is a Java whitespace
   *          codepoint; <code>false</code> otherwise.
   */
  public boolean isWhitespace() {
    return Character.isWhitespace(data);
  }


  /**
   * Determines if the codepoint is an ISO control
   * codepoint.  A codepoint is considered to be an ISO control
   * codepoint if its code is in the range <code>'&#92;u0000'</code>
   * through <code>'&#92;u001F'</code> or in the range
   * <code>'&#92;u007F'</code> through <code>'&#92;u009F'</code>.
   *
   * @return  <code>true</code> if the codepoint is an ISO control codepoint;
   *          <code>false</code> otherwise.
   */
  public boolean isISOControl() {
    return Character.isISOControl(data);
  }


  /**
   * Returns a value indicating a codepoint's general category.
   *
   * @return  a value of type <code>int</code> representing the
   *		codepoint's general category.
   */
  public int getType() {
    return Character.getType(data);
  }


  /**
   * Returns the Unicode directionality property for the given
   * codepoint.  codepoint directionality is used to calculate the
   * visual ordering of text. The directionality value of undefined
   * <code>char</code> values is <code>DIRECTIONALITY_UNDEFINED</code>.
   *
   * @return the directionality property of the <code>char</code> value.
   */
  public byte getDirectionality() {
    return Character.getDirectionality(data);
  }


  /**
   * Determines whether the codepoint is mirrored according to the
   * Unicode specification.  Mirrored codepoints should have their
   * glyphs horizontally mirrored when displayed in text that is
   * right-to-left.  For example, <code>'&#92;u0028'</code> LEFT
   * PARENTHESIS is semantically defined to be an <i>opening
   * parenthesis</i>.  This will appear as a "(" in text that is
   * left-to-right but as a ")" in text that is right-to-left.
   *
   * @return <code>true</code> if the codepoint is mirrored, <code>false</code>
   *         if the is not mirrored or is not defined.
   */
  public boolean isMirrored() {
    return Character.isMirrored(data);
  }


  /**
   * Determines the number of char values needed to represent this codepoint.
   * If the specified character is equal to or greater than 0x10000, then
   * the method returns 2. Otherwise, the method returns 1.
   */
  public int charCount() {
    return Character.charCount(data);
  }


  /**
   * Determines whether the specified codepoint is in the supplementary
   * character range. The method call is equivalent to the expression:
   * <pre><code>  codePoint &gt;= 0x10000 &amp;&amp; codePoint &lt;= 0x10ffff</code></pre>
   *
   * @return <code>true</code> if the specified codepoint is in the Unicode
   *         supplementary character range; <code>false</code> otherwise.
   */
  public boolean isSupplementaryCodePoint() {
    return Character.isSupplementaryCodePoint(data);
  }


  ////////////////////////////////////////////////////////////////////////////////
  /////// The following are array methods, for handling char and Character strings
  ////////////////////////////////////////////////////////////////////////////////

  /**
   * Convert an array of code points to an array of char in UTF-16.
   * @param codePoints The CodePoint array.
   * @return A UTF-16 array of char.
   */
  public static char[] toChars(CodePoint[] codePoints) {
    char[] utf = new char[charCount(codePoints)];
    for (int c = 0, u = 0; c < codePoints.length; c++) u += Character.toChars(codePoints[c].data, utf, u);
    return utf;
  }


  /**
   * Convert an array of code points to an array of Characters in UTF-16.  Prematurely optimized.
   * @param codePoints The CodePoint array.
   * @return A UTF-16 array of char.
   */
  public static Character[] toCharacters(CodePoint[] codePoints) {
    Character[] utf = new Character[charCount(codePoints)];
    // iterate through copying to a 2 element char array, before autoboxing into the correct Character elements
    char[] tmpUtf = new char[2];
    for (int c = 0, u = 0; c < codePoints.length; c++) {
      int len = Character.toChars(codePoints[c].data, tmpUtf, 0);
      utf[u] = tmpUtf[0];
      if (len == 2) utf[u + 1] = tmpUtf[1];
      u += len;
    }
    return utf;
  }


  /**
   * Converts an array of char to an array of CodePoints.
   * @param chars The char array in UTF-16.
   * @return A CodePoint array, with the decoded unicode characters.
   */
  public static CodePoint[] toCodePoints(char[] chars) {
    CodePoint[] codePoints = new CodePoint[Character.codePointCount(chars, 0, chars.length)];
    for (int i = 0; i < codePoints.length; i++) codePoints[i] = new CodePoint(chars[i]);
    return codePoints;
  }


  /**
   * Converts an array of characters to an array of CodePoints.
   * @param characters The character array in UTF-16.
   * @return A CodePoint array, with the decoded unicode characters.
   */
  public static CodePoint[] toCodePoints(Character[] characters) {
    return toCodePoints(toChars(characters));
  }


  /**
   * Converts a String to an array of CodePoints.
   * @param str The String to convert.
   * @return A CodePoint array, with the decoded unicode characters.
   */
  public static CodePoint[] toCodePoints(String str) {
    return toCodePoints(str.toCharArray());
  }


  /**
   * Returns a String object representing a CodePoint array.
   * @param codePoints An array of CodePoints to convert to a string.
   * @return A String version of a unicode codepoint array.
   */
  public static String toString(CodePoint[] codePoints) {
    return new String(toChars(codePoints));
  }


  /**
   * Returns a list of CodePoints backed by the supplied CodePoint array.
   * @param codePoints An array of CodePoints to convert to a List.
   * @return A {@link java.util.List} of CodePoints.
   */
  public static java.util.List<CodePoint> toList(CodePoint[] codePoints) {
    return java.util.Arrays.asList(codePoints);
  }


  /**
   * Returns a list of CodePoints which is the equivalent of a Unicode String.
   * @param str A String to convert to a list of CodePoints.
   * @return A {@link java.util.List} of CodePoints, in the same order as the
   *         characters in <code>str</code>.
   */
  public static java.util.List<CodePoint> toList(String str) {
    return java.util.Arrays.asList(toCodePoints(str));
  }


  /**
   * Converts a {@link java.util.Collection} of CodePoints into an array.
   * @return An array of CodePoints.
   */
  public static CodePoint[] toArray(java.util.Collection<CodePoint> codePointCollection) {
    return codePointCollection.toArray(new CodePoint[codePointCollection.size()]);
  }


  /**
   * Gets the number of char values needed to represent an array of CodePoints.
   * @param codePoints The array of CodePoints to measure.
   */
  public static int charCount(CodePoint[] codePoints) {
    int total = 0;
    for (int i = 0; i < codePoints.length; i++) total += Character.charCount(codePoints[i].data);
    return total;
  }


  ////////////////////////////////////////////////////////////////////////////////
  /////// The following are private helper methods, for converting char and
  /////// Character strings to the other type.
  ////////////////////////////////////////////////////////////////////////////////


  /**
   * Converts an array of java.lang.Character to an array of char.
   * @param characters The character array to convert.
   * @return An equivalent array of char.
   */
  private static final char[] toChars(Character[] characters) {
    char[] chars = new char[characters.length];
    for (int i = 0; i < characters.length; i++) chars[i] = characters[i];
    return chars;
  }


  /**
   * Converts an array of char to an array of java.lang.Character.
   * @param chars The char array to convert.
   * @return An equivalent array of Character.
   */
  private static final Character[] toCharacters(char[] chars) {
    Character[] characters = new Character[chars.length];
    for (int i = 0; i < chars.length; i++) characters[i] = chars[i];
    return characters;
  }

}