Last active
April 5, 2020 19:14
-
-
Save luccabb/9548f7ef3c3a3215eb88f3b496ea37ce to your computer and use it in GitHub Desktop.
Doing a merge sort breakable in 'k' possible ways to find what is the best 'k' to reduce time complexity
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def Right(i): | |
# right child index | |
return 2*i + 2 | |
def Left(i): | |
# left child index | |
return 2*i +1 | |
def Parent(i): | |
# parent index | |
return (i-1)//2 | |
def maxHeapify(A,i): | |
# it takes the node with index i and puts it in the right position | |
# in the heap to make it a maxHeap | |
l = Left(i) | |
r = Right(i) | |
# since we already know that element in index i will be: smaller | |
# than the right/left child or higher than both | |
# we are storing the index of the higher child in 'largest' | |
# which will be the substitution that we need to make, in case that we need to do any substitution | |
if l < (len(A)) and A[l] > A[i]: | |
largest = l | |
else: | |
largest = i | |
if r < (len(A)) and A[r] > A[largest]: | |
largest = r | |
# now we already have 'largest' in the index that we want to do the substitution | |
# we change values if 'largest' is not our current value 'i' | |
# ultimately we recursivelly call maxHeapify in 'largest' to make sure | |
# that our element will buble down to the right place it should be | |
if largest != i: | |
h = A[i] | |
A[i] = A[largest] | |
A[largest]= h | |
maxHeapify(A, largest) | |
return A | |
def buildMaxHeap(A): | |
# calling maxHeapify for every node except the leaves | |
# so we build an array with the max heap property | |
for a in range(int(len(A)//2),-1,-1): | |
maxHeapify(A,a) | |
def heapSort(A): | |
# building the max heap | |
buildMaxHeap(A) | |
# getting the first element (it will always be the higher one) | |
for i in range(len(A)-1, -1,-1): | |
# putting it in the last position of the list | |
A[i],A[0] = A[0], A[i] | |
# max-heapifying the whole list, except the sorted part | |
# So the next highest value comes to the top | |
# For loop running for all the elements in the array | |
A[:i] = maxHeapify(A[:i], 0) | |
return A | |
def kWayMergeSort(array, k): | |
start = time.time() | |
# getting the place where we should split the array in k elements | |
h = len(array)//k | |
# starting the array used to save the splits | |
merged =[] | |
# returning array if len(array) = 1 | |
if len(array) == 1: | |
end = time.time() | |
return array, end-start | |
# in case the number of divisions is equal or higher than the array | |
# we are splitting it into every possible element so that every element | |
# has their 'len' = 1 | |
elif k >= len(array): | |
for e in range(0,len(array)): | |
merged.extend(kWayMergeSort(array[e:e+1],k)[0]) | |
# if we need to split our original array | |
elif h != 0: | |
# splitting the array in the number of parts we need it to be splitted | |
for i in range(0,len(array),h): | |
# getting 'h' elements at a time (which is the number of elements in one split | |
# of the original array) | |
merged.extend(kWayMergeSort(array[i:i+h], k)[0]) | |
# getting the splits in a single array and sending it to be merged | |
# all the 'array's will be already sorted since the function is recursive | |
# and they already passed by this process. | |
# 'merged' will contain 'k' arrays sorted and 'kMerge' will do the sort | |
end = time.time() | |
return kMerge(merged), end-start | |
def kMerge(array): | |
# heapSorting the array received, so it does not matter | |
# how many arrays and their number of elements to be sorted | |
# it will always be merged 'k' arrays at a single time | |
return heapSort(array) | |
# Test cases | |
assert kWayMergeSort([9,3,2,1,7,5,4,8,6],3)[0] == [1, 2, 3, 4, 5, 6, 7, 8, 9], 'Array not merged correctly' | |
assert kWayMergeSort([9,3,2,1,7,5,4,8,6],4)[0] == [1, 2, 3, 4, 5, 6, 7, 8, 9], 'Array not merged correctly' | |
assert kWayMergeSort([9,3,2,1,7,5,4,8,6],2)[0] == [1, 2, 3, 4, 5, 6, 7, 8, 9], 'Array not merged correctly' | |
assert kWayMergeSort([89,71,26,19,3,79,44,70,1,52,43,35,64,72,66,32,59,81,21,16,50,37,58,24,68,6,7,45,76,34], 2)[0] == [1, 3, 6, 7, 16, 19, 21, 24, 26, 32, 34, 35, 37, 43, 44, 45, 50, 52, 58, 59, 64, 66, 68, 70, 71, 72, 76, 79, 81, 89], 'Array not merged correctly' | |
assert kWayMergeSort([89,71,26,19,3,79,44,70,1,52,43,35,64,72,66,32,59,81,21,16,50,37,58,24,68,6,7,45,76,34], 7)[0] == [1, 3, 6, 7, 16, 19, 21, 24, 26, 32, 34, 35, 37, 43, 44, 45, 50, 52, 58, 59, 64, 66, 68, 70, 71, 72, 76, 79, 81, 89], 'Array not merged correctly' | |
assert kWayMergeSort([89,71,26,19,3,79,44,70,1,52,43,35,64,72,66,32,59,81,21,16,50,37,58,24,68,6,7,45,76,34], 13)[0] == [1, 3, 6, 7, 16, 19, 21, 24, 26, 32, 34, 35, 37, 43, 44, 45, 50, 52, 58, 59, 64, 66, 68, 70, 71, 72, 76, 79, 81, 89], 'Array not merged correctly' | |
# Best Value of K hypothesis ------------------- | |
def creatingArray(n): | |
# Function to create a non-sorted array of size 'n' | |
array = [] | |
# creating a list of range specified by the first argument in the function | |
for a in range(n): | |
array.append(a) | |
# Now we are going to make the 'n' size divided by 2 swaps | |
# so the array is not sorted correctly | |
for _ in range(int(len(array)/2)): | |
i1, i2 = random.sample(array, 2) | |
array[i1], array[i2] = array[i2], array[i1] | |
# returning an unsorted array of the size that was specified | |
return array | |
# P.S: To try different input sizes please change this variable | |
siz = 10 | |
# creating array of size specified | |
array1 = creatingArray(siz) | |
# creating arrays to plot the graph | |
timeSort = [] | |
kvalue=[] | |
# for loop to run for all the possible values of 'k' | |
for k in range(2,siz+1): | |
# getting the time and the 'k' in an array so it can be graphed | |
timeSort.append(kWayMergeSort(array1, k)[1]) | |
kvalue.append(k) | |
#Graphing the results | |
plt.plot(kvalue,timeSort) | |
plt.legend(['K-way Merge Sort']) | |
plt.title('Time X K For input of size '+ str(siz)) | |
plt.xlabel('K') | |
plt.ylabel('Time in seconds') | |
plt.show() |
Author
luccabb
commented
Apr 5, 2020
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment