Skip to content

Instantly share code, notes, and snippets.

@luccabb
Last active April 5, 2020 19:14
Show Gist options
  • Save luccabb/9548f7ef3c3a3215eb88f3b496ea37ce to your computer and use it in GitHub Desktop.
Save luccabb/9548f7ef3c3a3215eb88f3b496ea37ce to your computer and use it in GitHub Desktop.
Doing a merge sort breakable in 'k' possible ways to find what is the best 'k' to reduce time complexity
def Right(i):
# right child index
return 2*i + 2
def Left(i):
# left child index
return 2*i +1
def Parent(i):
# parent index
return (i-1)//2
def maxHeapify(A,i):
# it takes the node with index i and puts it in the right position
# in the heap to make it a maxHeap
l = Left(i)
r = Right(i)
# since we already know that element in index i will be: smaller
# than the right/left child or higher than both
# we are storing the index of the higher child in 'largest'
# which will be the substitution that we need to make, in case that we need to do any substitution
if l < (len(A)) and A[l] > A[i]:
largest = l
else:
largest = i
if r < (len(A)) and A[r] > A[largest]:
largest = r
# now we already have 'largest' in the index that we want to do the substitution
# we change values if 'largest' is not our current value 'i'
# ultimately we recursivelly call maxHeapify in 'largest' to make sure
# that our element will buble down to the right place it should be
if largest != i:
h = A[i]
A[i] = A[largest]
A[largest]= h
maxHeapify(A, largest)
return A
def buildMaxHeap(A):
# calling maxHeapify for every node except the leaves
# so we build an array with the max heap property
for a in range(int(len(A)//2),-1,-1):
maxHeapify(A,a)
def heapSort(A):
# building the max heap
buildMaxHeap(A)
# getting the first element (it will always be the higher one)
for i in range(len(A)-1, -1,-1):
# putting it in the last position of the list
A[i],A[0] = A[0], A[i]
# max-heapifying the whole list, except the sorted part
# So the next highest value comes to the top
# For loop running for all the elements in the array
A[:i] = maxHeapify(A[:i], 0)
return A
def kWayMergeSort(array, k):
start = time.time()
# getting the place where we should split the array in k elements
h = len(array)//k
# starting the array used to save the splits
merged =[]
# returning array if len(array) = 1
if len(array) == 1:
end = time.time()
return array, end-start
# in case the number of divisions is equal or higher than the array
# we are splitting it into every possible element so that every element
# has their 'len' = 1
elif k >= len(array):
for e in range(0,len(array)):
merged.extend(kWayMergeSort(array[e:e+1],k)[0])
# if we need to split our original array
elif h != 0:
# splitting the array in the number of parts we need it to be splitted
for i in range(0,len(array),h):
# getting 'h' elements at a time (which is the number of elements in one split
# of the original array)
merged.extend(kWayMergeSort(array[i:i+h], k)[0])
# getting the splits in a single array and sending it to be merged
# all the 'array's will be already sorted since the function is recursive
# and they already passed by this process.
# 'merged' will contain 'k' arrays sorted and 'kMerge' will do the sort
end = time.time()
return kMerge(merged), end-start
def kMerge(array):
# heapSorting the array received, so it does not matter
# how many arrays and their number of elements to be sorted
# it will always be merged 'k' arrays at a single time
return heapSort(array)
# Test cases
assert kWayMergeSort([9,3,2,1,7,5,4,8,6],3)[0] == [1, 2, 3, 4, 5, 6, 7, 8, 9], 'Array not merged correctly'
assert kWayMergeSort([9,3,2,1,7,5,4,8,6],4)[0] == [1, 2, 3, 4, 5, 6, 7, 8, 9], 'Array not merged correctly'
assert kWayMergeSort([9,3,2,1,7,5,4,8,6],2)[0] == [1, 2, 3, 4, 5, 6, 7, 8, 9], 'Array not merged correctly'
assert kWayMergeSort([89,71,26,19,3,79,44,70,1,52,43,35,64,72,66,32,59,81,21,16,50,37,58,24,68,6,7,45,76,34], 2)[0] == [1, 3, 6, 7, 16, 19, 21, 24, 26, 32, 34, 35, 37, 43, 44, 45, 50, 52, 58, 59, 64, 66, 68, 70, 71, 72, 76, 79, 81, 89], 'Array not merged correctly'
assert kWayMergeSort([89,71,26,19,3,79,44,70,1,52,43,35,64,72,66,32,59,81,21,16,50,37,58,24,68,6,7,45,76,34], 7)[0] == [1, 3, 6, 7, 16, 19, 21, 24, 26, 32, 34, 35, 37, 43, 44, 45, 50, 52, 58, 59, 64, 66, 68, 70, 71, 72, 76, 79, 81, 89], 'Array not merged correctly'
assert kWayMergeSort([89,71,26,19,3,79,44,70,1,52,43,35,64,72,66,32,59,81,21,16,50,37,58,24,68,6,7,45,76,34], 13)[0] == [1, 3, 6, 7, 16, 19, 21, 24, 26, 32, 34, 35, 37, 43, 44, 45, 50, 52, 58, 59, 64, 66, 68, 70, 71, 72, 76, 79, 81, 89], 'Array not merged correctly'
# Best Value of K hypothesis -------------------
def creatingArray(n):
# Function to create a non-sorted array of size 'n'
array = []
# creating a list of range specified by the first argument in the function
for a in range(n):
array.append(a)
# Now we are going to make the 'n' size divided by 2 swaps
# so the array is not sorted correctly
for _ in range(int(len(array)/2)):
i1, i2 = random.sample(array, 2)
array[i1], array[i2] = array[i2], array[i1]
# returning an unsorted array of the size that was specified
return array
# P.S: To try different input sizes please change this variable
siz = 10
# creating array of size specified
array1 = creatingArray(siz)
# creating arrays to plot the graph
timeSort = []
kvalue=[]
# for loop to run for all the possible values of 'k'
for k in range(2,siz+1):
# getting the time and the 'k' in an array so it can be graphed
timeSort.append(kWayMergeSort(array1, k)[1])
kvalue.append(k)
#Graphing the results
plt.plot(kvalue,timeSort)
plt.legend(['K-way Merge Sort'])
plt.title('Time X K For input of size '+ str(siz))
plt.xlabel('K')
plt.ylabel('Time in seconds')
plt.show()
@luccabb
Copy link
Author

luccabb commented Apr 5, 2020

image
image
image
image
image
image

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment