Skip to content

Instantly share code, notes, and snippets.

@flysand7
Created July 12, 2021 15:29
Show Gist options
  • Save flysand7/5b0d06171fd7a9cf42f68379b4dd4c97 to your computer and use it in GitHub Desktop.
Save flysand7/5b0d06171fd7a9cf42f68379b4dd4c97 to your computer and use it in GitHub Desktop.
# This script correctly sorts strings that contain mix of characters
# and numbers.
# For example it is common for files in filesystems to
# have the following names:
# "file1"
# "file2"
# ...
# "file10"
# Though most operating systems sort blindly by comparing
# the values of code points, resulting in this pattern:
# "file1"
# "file10"
# ...
# "file2"
# ...
# This script is an implementation of a correct alpha-numeric sorting
# algorithm that avoids the problems above. The key difference is in
# `sorted` function, which checks whether a char is a digit, and if
# it is, it parses the number.
def is_digit(ch):
return '0' <= ch and ch <= '9'
# Note: letters always occur later than the numbers
# e.g. 'a0' < 'a10' < 'ab' < 'ac'
def sorted(L, R):
li = 0
ri = 0
while li < len(L) and ri < len(R):
leftIsDigit = is_digit(L[li])
rightIsDigit = is_digit(R[ri])
# If both chars are digits
if leftIsDigit and rightIsDigit:
# Parse the integers
leftNum = 0
rightNum = 0
while li < len(L) and is_digit(L[li]):
leftNum = 10*leftNum + int(L[li])
li += 1
while ri < len(R) and is_digit(R[ri]):
rightNum = 10*rightNum + int(R[ri])
ri += 1
# If the numbers aren't equal check if they're in order
# otherwise continue checking the strings
if leftNum != rightNum:
return leftNum < rightNum
# Letters occur later than digits
elif leftIsDigit and not rightIsDigit:
return True
elif not leftIsDigit and rightIsDigit:
return False
# Otherwise do it the classical way
elif L[li] != R[ri]:
return L[li] < R[ri]
else:
li += 1
ri += 1
# If one is a substring of another, shorter sequences
# must occur before longer sequences
return len(L) < len(R)
# Bubble sorting
def alphanumSort(A):
AL = len(A)
for i in range(AL):
for j in range(0, (AL-1)-i):
adjLeft = A[j]
adjRight = A[j+1]
if not sorted(adjLeft, adjRight):
A[j] = adjRight
A[j+1] = adjLeft
return A
## Usage
testArray = [
'b',
'a',
'alpha',
'date2020-01-02',
'file2',
'file10',
'date2021-02-01',
'file1',
'beta',
'date2020-01-29',
]
ret = alphanumSort(testArray)
for el in testArray:
print(el)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment