Created
July 12, 2021 15:29
-
-
Save flysand7/5b0d06171fd7a9cf42f68379b4dd4c97 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# This script correctly sorts strings that contain mix of characters | |
# and numbers. | |
# For example it is common for files in filesystems to | |
# have the following names: | |
# "file1" | |
# "file2" | |
# ... | |
# "file10" | |
# Though most operating systems sort blindly by comparing | |
# the values of code points, resulting in this pattern: | |
# "file1" | |
# "file10" | |
# ... | |
# "file2" | |
# ... | |
# This script is an implementation of a correct alpha-numeric sorting | |
# algorithm that avoids the problems above. The key difference is in | |
# `sorted` function, which checks whether a char is a digit, and if | |
# it is, it parses the number. | |
def is_digit(ch): | |
return '0' <= ch and ch <= '9' | |
# Note: letters always occur later than the numbers | |
# e.g. 'a0' < 'a10' < 'ab' < 'ac' | |
def sorted(L, R): | |
li = 0 | |
ri = 0 | |
while li < len(L) and ri < len(R): | |
leftIsDigit = is_digit(L[li]) | |
rightIsDigit = is_digit(R[ri]) | |
# If both chars are digits | |
if leftIsDigit and rightIsDigit: | |
# Parse the integers | |
leftNum = 0 | |
rightNum = 0 | |
while li < len(L) and is_digit(L[li]): | |
leftNum = 10*leftNum + int(L[li]) | |
li += 1 | |
while ri < len(R) and is_digit(R[ri]): | |
rightNum = 10*rightNum + int(R[ri]) | |
ri += 1 | |
# If the numbers aren't equal check if they're in order | |
# otherwise continue checking the strings | |
if leftNum != rightNum: | |
return leftNum < rightNum | |
# Letters occur later than digits | |
elif leftIsDigit and not rightIsDigit: | |
return True | |
elif not leftIsDigit and rightIsDigit: | |
return False | |
# Otherwise do it the classical way | |
elif L[li] != R[ri]: | |
return L[li] < R[ri] | |
else: | |
li += 1 | |
ri += 1 | |
# If one is a substring of another, shorter sequences | |
# must occur before longer sequences | |
return len(L) < len(R) | |
# Bubble sorting | |
def alphanumSort(A): | |
AL = len(A) | |
for i in range(AL): | |
for j in range(0, (AL-1)-i): | |
adjLeft = A[j] | |
adjRight = A[j+1] | |
if not sorted(adjLeft, adjRight): | |
A[j] = adjRight | |
A[j+1] = adjLeft | |
return A | |
## Usage | |
testArray = [ | |
'b', | |
'a', | |
'alpha', | |
'date2020-01-02', | |
'file2', | |
'file10', | |
'date2021-02-01', | |
'file1', | |
'beta', | |
'date2020-01-29', | |
] | |
ret = alphanumSort(testArray) | |
for el in testArray: | |
print(el) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment