Created
July 10, 2012 23:13
-
-
Save grignaak/3086847 to your computer and use it in GitHub Desktop.
Lexicographically ordered numbers
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package net.deardeuff.ids.ordered; | |
import com.google.common.base.Preconditions; | |
import com.google.common.io.ByteArrayDataOutput; | |
import com.google.common.io.ByteStreams; | |
public final class Ordered64Binary { | |
private static final char TRAILING_CHAR = '.'; | |
private static final Ordered64EncodeMap encoding = | |
new Ordered64EncodeMap("-_").changeDecodingTo(TRAILING_CHAR, (byte)0); | |
public static byte[] decodeOrdered64Binary(final CharSequence in) { | |
final int len = in.length(); | |
final ByteArrayDataOutput out = ByteStreams.newDataOutput(computeOutputLength(in)); | |
for (int i = 0; i < len; i += 4) { | |
final int a = decode(in, i + 0), | |
b = decode(in, i + 1), | |
c = decode(in, i + 2), | |
d = decode(in, i + 3); | |
out.writeByte(a << 2 | b >> 4); | |
out.writeByte(b << 4 | c >> 2); | |
out.writeByte(c << 6 | d); | |
} | |
return out.toByteArray(); | |
} | |
private static int decode(final CharSequence chars, final int index) { | |
return encoding.decodeByte(chars.charAt(index)); | |
} | |
private static void checkValidRepresentation(final boolean condition, final CharSequence stringRep) { | |
Preconditions.checkArgument(condition, "Not an ordered64 string", stringRep); | |
} | |
public static String encodeOrdered64Binary(final byte[] val) { | |
final int inSize = val.length; | |
final int outSize = ((inSize + 2) / 3) * 4; | |
final StringBuilder out = new StringBuilder(outSize); | |
int pos = 0; | |
for (; pos < inSize - 2; pos += 3) { | |
encodeTriplet(out, val[pos], val[pos + 1], val[pos + 2]); | |
} | |
final int remaining = inSize - pos; | |
if (remaining == 1) { | |
encodeTriplet(out, val[pos], 0, 0); | |
out.setCharAt(outSize - 2, TRAILING_CHAR); | |
out.setCharAt(outSize - 1, TRAILING_CHAR); | |
} else if (remaining == 2) { | |
encodeTriplet(out, val[pos], val[pos + 1], 0); | |
out.setCharAt(outSize - 1, TRAILING_CHAR); | |
} | |
return out.toString(); | |
} | |
private static void encodeTriplet(final StringBuilder out, | |
final int a, final int b, final int c) { | |
out.append(encode(a >> 2)); | |
out.append(encode(a << 4 | (b >> 4) & 0xF)); | |
out.append(encode(b << 2 | (c >> 6) & 0x3)); | |
out.append(encode(c)); | |
} | |
private static char encode(final int i) { | |
return encoding.encodeByte(i & 0x3F); | |
} | |
private static int computeOutputLength(final CharSequence in) { | |
final int len = in.length(); | |
checkValidRepresentation((len & 3) == 0, in); | |
final int encodedLen = lastIndexOfAnyBut(in, TRAILING_CHAR) + 1; | |
checkValidRepresentation(encodedLen > 0, in); | |
final int padSize = len - encodedLen; | |
return len / 4 * 3 - padSize; | |
} | |
private static int lastIndexOfAnyBut(final CharSequence haystack, final char needle) { | |
for (int index = haystack.length() - 1; index >= 0; index--) { | |
if (haystack.charAt(index) != needle) | |
return index; | |
} | |
return -1; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package net.deardeuff.ids.ordered; | |
import java.util.Arrays; | |
import com.google.common.base.Preconditions; | |
final class Ordered64EncodeMap { | |
private final char[] encoding; | |
private final byte[] decoding; | |
Ordered64EncodeMap(final String specialChars) { | |
encoding = createEncodeMap(specialChars); | |
decoding = createDecodeMap(encoding); | |
} | |
private char[] createEncodeMap(final String specialChars) { | |
final char[] map = ("ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789" + specialChars).toCharArray(); | |
Preconditions.checkArgument(map.length == 64); | |
Arrays.sort(map); | |
return map; | |
} | |
private byte[] createDecodeMap(final char[] encoding) { | |
final byte[] map = new byte[128]; | |
Arrays.fill(map, (byte) -1); | |
for (byte i = 0; i < 64; i++) { | |
map[encoding[i]] = i; | |
} | |
return map; | |
} | |
Ordered64EncodeMap changeDecodingTo(int toDecode, byte decoded) { | |
decoding[toDecode] = decoded; | |
return this; | |
} | |
char encodeByte(final int b) { | |
return encoding[b]; | |
} | |
byte decodeByte(final char c) { | |
final byte decoded = decoding[c]; | |
Preconditions.checkArgument(c != -1, "Illegal input character"); | |
return decoded; | |
} | |
} | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package net.deardeuff.ids.ordered; | |
import static org.hamcrest.Matchers.lessThan; | |
import static org.junit.Assert.assertEquals; | |
import static org.junit.Assert.assertThat; | |
import java.io.IOException; | |
import java.net.URI; | |
import java.net.URL; | |
import java.util.Arrays; | |
import org.junit.Assume; | |
import org.junit.Test; | |
import org.junit.experimental.theories.DataPoint; | |
import org.junit.experimental.theories.DataPoints; | |
import org.junit.experimental.theories.Theories; | |
import org.junit.experimental.theories.Theory; | |
import org.junit.runner.RunWith; | |
import com.google.common.base.Preconditions; | |
import com.google.common.io.ByteArrayDataOutput; | |
import com.google.common.io.ByteStreams; | |
import com.google.common.primitives.Longs; | |
@RunWith(Theories.class) | |
public class OrderedIdTest { | |
// This class tests two different encodings | |
enum Encoder { | |
VariableLength { | |
String encode(final long val) { | |
return OrderedLongEncoder.encode(val); | |
} | |
@Override | |
long decode(final String val) { | |
return OrderedLongEncoder.decode(val); | |
} | |
}, | |
FixedLength { | |
String encode(final long val) { | |
return Ordered64Binary.encodeOrdered64Binary( | |
Longs.toByteArray(val ^ Long.MIN_VALUE)); | |
} | |
@Override | |
long decode(final String val) { | |
return Longs.fromByteArray(Ordered64Binary.decodeOrdered64Binary(val)) | |
^ Long.MIN_VALUE; | |
} | |
}; | |
abstract String encode(final long val); | |
abstract long decode(final String val); | |
} | |
@DataPoint | |
public static Encoder variable = Encoder.VariableLength; | |
@DataPoint | |
public static Encoder fixed = Encoder.FixedLength; | |
@DataPoints | |
public static long[] cases = new long[] { | |
Long.MIN_VALUE, | |
Long.MIN_VALUE + 1, | |
Long.MAX_VALUE >> 1, | |
Integer.MIN_VALUE, | |
Short.MIN_VALUE, | |
Byte.MIN_VALUE, | |
-10, | |
-1, | |
0, | |
1, | |
10, | |
32, | |
63, | |
64, | |
65, | |
Byte.MAX_VALUE, | |
Short.MAX_VALUE - 1, | |
Short.MAX_VALUE, | |
Short.MAX_VALUE + 1, | |
Integer.MAX_VALUE, | |
Long.MAX_VALUE >> 1, | |
Long.MAX_VALUE - 1, | |
Long.MAX_VALUE, | |
System.nanoTime(), | |
-System.nanoTime(), | |
System.currentTimeMillis(), | |
-System.currentTimeMillis(), | |
}; | |
@Test | |
public void shouldCalculateCorrectLength() { | |
assertEquals("1~", variable.encode(63L)); | |
assertEquals("210", variable.encode(64L)); | |
} | |
@Theory | |
public void shouldRemainOrdered(final Encoder encoder, long low, long high) { | |
Assume.assumeThat(low, lessThan(high)); | |
assertThat( | |
String.format("should be lower low:%x, high:%x", low, high), | |
encoder.encode(low), lessThan(encoder.encode(high))); | |
} | |
@Theory | |
public void doPrint(final long num) { | |
System.out.format("%16x : %s : %s\n", num, | |
fixed.encode(num), variable.encode(num)); | |
} | |
@Theory | |
public void shouldBeUrlEncodable(final Encoder encoder, final long num) throws Exception { | |
final String encoded = encoder.encode(num); | |
// Note: java.net.URLEncoder does not correctly encode URLs! :( | |
final URL url = new URI("http", "grant-id", "/" + encoded, encoded).toURL(); | |
assertEquals("/" + encoded, url.getPath()); | |
assertEquals(encoded, url.getRef()); | |
} | |
@Theory | |
public void shouldDecodeWhatWasEncoded(final Encoder encoder, final long num) throws IOException { | |
final String encoded = encoder.encode(num); | |
assertEquals(num, encoder.decode(encoded)); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package net.deardeuff.ids.ordered; | |
public final class OrderedLongEncoder { | |
private static final Ordered64EncodeMap encoding = new Ordered64EncodeMap("_~"); | |
private enum SignedEncoder { | |
POSITIVE { | |
protected long adjustSign(final long result) { return result; } | |
protected int sizeOfHeader() { return 1; } | |
}, | |
NEGATIVE { | |
protected long adjustSign(final long result) { return -result; } | |
protected int sizeOfHeader() { return 2; } | |
protected char encodeByte(final int b) { return super.encodeByte(63 - b); } | |
protected int decodeByte(final char c) { return 63 - super.decodeByte(c); } | |
}; | |
protected abstract long adjustSign(long result); | |
protected abstract int sizeOfHeader(); | |
protected char encodeByte(final int b) { return encoding.encodeByte(b); } | |
protected int decodeByte(final char c) { return encoding.decodeByte(c); } | |
String encode(final long num) { | |
final long abs = adjustSign(num); | |
final int len = encodedLength(abs); | |
final int sizeOfHeader = sizeOfHeader(); | |
final char[] out = new char[len + sizeOfHeader]; | |
out[0] = '-'; // will be replaced if positive | |
printTo(out, len, sizeOfHeader); | |
printTo(out, abs, out.length); | |
return new String(out); | |
} | |
private void printTo(final char[] buffer, final long num, final int bufferLength) { | |
int index = bufferLength - 1; | |
for (long n = num; n != 0; n >>>= 6, index--) { | |
final int remainder = (int)n & 63; | |
buffer[index] = encodeByte(remainder); | |
} | |
} | |
long decode(final CharSequence string) { | |
int index = sizeOfHeader() - 1; | |
final int length = decodeByte(string.charAt(index++)); | |
long result = 0; | |
for (final int end = index + length; index < end; index++) { | |
result = (result << 6) | decodeByte(string.charAt(index)); | |
} | |
return adjustSign(result); | |
} | |
static SignedEncoder encoderFromSign(final boolean isPositive) { | |
return isPositive ? SignedEncoder.POSITIVE : SignedEncoder.NEGATIVE; | |
} | |
} | |
public static String encode(final long num) { | |
if (num == 0) return "0"; | |
return SignedEncoder.encoderFromSign(num >= 0).encode(num); | |
} | |
public static long decode(final CharSequence string) { | |
return SignedEncoder.encoderFromSign(string.charAt(0) != '-').decode(string); | |
} | |
private static int encodedLength(final long num) { | |
int r = 0; | |
for (long n = num; n != 0; n >>>= 6) | |
r++; | |
return r; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment