Created
June 16, 2018 09:57
-
-
Save zhanghai/57ab1232405435fb1ef6e6af06b5dc36 to your computer and use it in GitHub Desktop.
Java Comparator for Natural Sort Order
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
* Copyright (c) 2018 Zhang Hai <Dreaming.in.Code.ZH@Gmail.com> | |
* | |
* Licensed under the Apache License, Version 2.0 (the "License"); | |
* you may not use this file except in compliance with the License. | |
* You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, software | |
* distributed under the License is distributed on an "AS IS" BASIS, | |
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
* See the License for the specific language governing permissions and | |
* limitations under the License. | |
*/ | |
import java.util.Comparator; | |
public class NaturalOrderComparator implements Comparator<String> { | |
private static final int DIGIT_RADIX = 10; | |
@Override | |
public int compare(String string1, String string2) { | |
int start1 = 0; | |
int start2 = 0; | |
int leadingZeroCompareResult = 0; | |
while (start1 < string1.length() && start2 < string2.length()) { | |
int codePoint1 = string1.codePointAt(start1); | |
int codePoint2 = string2.codePointAt(start2); | |
// Check if both code points are digits. | |
if (!Character.isDigit(codePoint1) || !Character.isDigit(codePoint2)) { | |
if (!codePointEqualsIgnoreCase(codePoint1, codePoint2)) { | |
return codePointCompareToIgnoreCase(codePoint1, codePoint2); | |
} | |
start1 = string1.offsetByCodePoints(start1, 1); | |
start2 = string2.offsetByCodePoints(start2, 1); | |
continue; | |
} | |
// Get end of current number. | |
int end1 = start1; | |
do { | |
end1 = string1.offsetByCodePoints(end1, 1); | |
} while (end1 < string1.length() && Character.isDigit(string1.codePointAt(end1))); | |
int end2 = start2; | |
do { | |
end2 = string2.offsetByCodePoints(end2, 1); | |
} while (end2 < string2.length() && Character.isDigit(string2.codePointAt(end2))); | |
// Get start of current number without leading zeros. | |
int noLeadingZeroStart1 = start1; | |
while (noLeadingZeroStart1 < end1 && Character.digit(string1.codePointAt( | |
noLeadingZeroStart1), DIGIT_RADIX) == 0) { | |
noLeadingZeroStart1 = string1.offsetByCodePoints(noLeadingZeroStart1, 1); | |
} | |
int noLeadingZeroStart2 = start2; | |
while (noLeadingZeroStart2 < end2 && Character.digit(string2.codePointAt( | |
noLeadingZeroStart2), DIGIT_RADIX) == 0) { | |
noLeadingZeroStart2 = string2.offsetByCodePoints(noLeadingZeroStart2, 1); | |
} | |
// If the two lengths of numbers (without leading zeros) differ, the shorter one comes | |
// first. | |
int noLeadingZeroLength1 = string1.codePointCount(noLeadingZeroStart1, end1); | |
int noLeadingZeroLength2 = string2.codePointCount(noLeadingZeroStart2, end2); | |
if (noLeadingZeroLength1 != noLeadingZeroLength2) { | |
return noLeadingZeroLength1 - noLeadingZeroLength2; | |
} | |
// If any two digits starting from the first non-zero ones differs, the less one comes | |
// first. | |
for (int digitIndex1 = noLeadingZeroStart1, digitIndex2 = noLeadingZeroStart2; | |
digitIndex1 < end1; digitIndex1 = string1.offsetByCodePoints(digitIndex1, 1), | |
digitIndex2 = string2.offsetByCodePoints(digitIndex2, 1)) { | |
int digit1 = Character.digit(string1.codePointAt(digitIndex1), DIGIT_RADIX); | |
int digit2 = Character.digit(string2.codePointAt(digitIndex2), DIGIT_RADIX); | |
if (digit1 != digit2) { | |
return digit1 - digit2; | |
} | |
} | |
// If the two numbers are the same, the one with less leading zeros (shorter) comes | |
// first. | |
int leadingZeroLength1 = string1.codePointCount(start1, noLeadingZeroStart1); | |
int leadingZeroLength2 = string2.codePointCount(start2, noLeadingZeroStart2); | |
if (leadingZeroLength1 != leadingZeroLength2) { | |
if (leadingZeroCompareResult == 0) { | |
leadingZeroCompareResult = leadingZeroLength1 - leadingZeroLength2; | |
} | |
} | |
start1 = end1; | |
start2 = end2; | |
} | |
// If one of the two strings is exhausted first, it comes first. | |
int remainingLength1 = string1.codePointCount(start1, string1.length()); | |
int remainingLength2 = string2.codePointCount(start2, string2.length()); | |
if (remainingLength1 != remainingLength2) { | |
return remainingLength1 - remainingLength2; | |
} | |
// The one with less leading zeros (shorter) comes first if others are the same. | |
if (leadingZeroCompareResult != 0) { | |
return leadingZeroCompareResult; | |
} | |
// Fall back to plain comparison. | |
return string1.compareTo(string2); | |
} | |
// @see String#regionMatches(boolean, int, String, int, int) | |
private static boolean codePointEqualsIgnoreCase(int codePoint1, int codePoint2) { | |
codePoint1 = Character.toUpperCase(codePoint1); | |
codePoint2 = Character.toUpperCase(codePoint2); | |
if (codePoint1 == codePoint2) { | |
return true; | |
} | |
codePoint1 = Character.toLowerCase(codePoint1); | |
codePoint2 = Character.toLowerCase(codePoint2); | |
return codePoint1 == codePoint2; | |
} | |
// @see String.CaseInsensitiveComparator#compare(String, String) | |
private static int codePointCompareToIgnoreCase(int codePoint1, int codePoint2) { | |
if (codePoint1 != codePoint2) { | |
codePoint1 = Character.toUpperCase(codePoint1); | |
codePoint2 = Character.toUpperCase(codePoint2); | |
if (codePoint1 != codePoint2) { | |
codePoint1 = Character.toUpperCase(codePoint1); | |
codePoint2 = Character.toUpperCase(codePoint2); | |
if (codePoint1 != codePoint2) { | |
return codePoint1 - codePoint2; | |
} | |
} | |
} | |
return 0; | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment