Skip to content

Instantly share code, notes, and snippets.

Embed
What would you like to do?
Iterating over the "letters" (codepoints, not chars) of a string.
package com.emmanueloga.cracking.arrays;
import java.util.Iterator;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;
class StringCodepointsIterable implements Iterable<String> {
public class StringCodepointsIterator implements Iterator<String> {
private int index = 0;
@Override
public void remove() {
throw new UnsupportedOperationException();
}
@Override
public boolean hasNext() {
return index < StringCodepointsIterable.this.string.length();
}
@Override
public String next() {
int codePoint = StringCodepointsIterable.this.string.codePointAt(index);
index += Character.charCount(codePoint);
return new String(Character.toChars(codePoint));
}
}
private final String string;
public StringCodepointsIterable(final String string) {
this.string = string;
}
@Override
public Iterator<String> iterator() {
return new StringCodepointsIterator();
}
}
public class IterateCodepoints {
public static void main(String... args) {
// Uses the beautiful Mathematical Capital Script C char:
// http://unicode-table.com/en/1D49E/
String unicode = new String(Character.toChars(0x1d49e)) + ":Hi";
System.out.print("String: " + unicode + "(length " + unicode.length()
+ ", " + unicode.codePointCount(0, unicode.length())
+ " codepoints)");
System.out.print("\ncharAt for each index in (0...str.length() - 1): ");
for (int i = 0; i < unicode.length(); i++) {
System.out.print(unicode.charAt(i));
System.out.print("_");
}
System.out.print("\nGuava's Lists.charactersOf: ");
ImmutableList<Character> chars = Lists.charactersOf(unicode);
for (Character c : chars) {
System.out.print(c);
System.out.print("_");
}
System.out.print("\nCODEPOINTS: ");
for (int i = 0; i < unicode.length();) {
int cp = unicode.codePointAt(i);
System.out.print(Character.toChars(cp));
System.out.print("_");
i += Character.isSupplementaryCodePoint(cp) ? 2 : 1;
}
System.out.print("\nCODEPOINTS INTSTREAM: ");
unicode.codePoints().forEach(c -> {
System.out.print(Character.toChars(c));
System.out.print("_");
});
System.out.print("\nStringCodepointsIterable: ");
for (String stringOfSingleCodepoing : new StringCodepointsIterable(
unicode)) {
System.out.print(stringOfSingleCodepoing);
System.out.print("_");
}
}
}
// Output:
// String: 𝒞:Hi(length 5, 4 codepoints)
// charAt for each index in (0...str.length() - 1): ?_?_:_H_i_
// Guava's Lists.charactersOf: ?_?_:_H_i_
// CODEPOINTS: 𝒞_:_H_i_
// CODEPOINTS INTSTREAM: 𝒞_:_H_i_
// StringCodepointsIterable: 𝒞_:_H_i_
You can’t perform that action at this time.