Skip to content

Instantly share code, notes, and snippets.

@lironsade
Last active November 29, 2018 15:02
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save lironsade/4426f4f821a875b8115c4fc489eab64d to your computer and use it in GitHub Desktop.
Save lironsade/4426f4f821a875b8115c4fc489eab64d to your computer and use it in GitHub Desktop.
####################################################
# Measure times of executions for DB's External Sort
# Assumptions:
# - Your ExternalMemory.jar is in /tmp
# - fileGenerator.jar in /tmp
# What does it do?
# Generates the files 250, 500, 750, 1000 in /tmp.
# They should be in about the size of their name.
# Create the "/tmp/sortemp" directory if not exists
# Measures running time of your program for each
# type (B,C), substring, file size and generates
# appropriate graphs.
# It selects and sorts col 1.
####################################################
import os
import subprocess as sp
import time
import matplotlib.pyplot as plt
EX3_JAR = 'ExternalMemory.jar'
TMP_FOLDER = 'sortemp'
ROWS_PER_RANDOM = 12500000
FILE_SIZES = [250, 500, 750, 1000]
SUBSTR1 = 'a'
SUBSTR4 = 'xyzw'
SUBSTR7 = 'lironsa'
SUBSTRS = [SUBSTR1, SUBSTR4, SUBSTR7]
COLSORT = 1
COLSEL = 1
def genRandomFile(outFile, numCols, numRows):
sp.run(['java', '-jar', 'fileGenerator.jar', str(outFile), str(numCols), str(numRows)])
def runProg(ex, input_file, outFile, colNumSort, tmpFolder, colNumSel, substrSelect):
sp.run(['java', '-jar', '-Xmx50m', '-Xms50m', EX3_JAR, ex, str(input_file),
str(outFile), str(colNumSort), tmpFolder, str(colNumSel), substrSelect])
def generateFiles():
for i in [1,2,3,4]:
print("Generating file of size {} MB".format(str(i * 250)))
genRandomFile(str(i * 250), i, ROWS_PER_RANDOM)
def timesOf(substr):
print('Generating times of substr {}'.format(substr))
b_times = []
c_times = []
for f in FILE_SIZES:
print('Generating time for B, size {}'.format(str(f)))
startTime = time.time()
runProg('B', str(f), substr + str(f) + '.B', 1, 'sortemp', 1, substr)
runTime = time.time() - startTime
b_times.append(runTime)
print('Time was {}.'.format(str(runTime)))
print('Generating time for C, size {}'.format(str(f)))
startTime = time.time()
runProg('C', f, substr + str(f) + '.C', 1, 'sortemp', 1, substr)
c_times.append(time.time() - startTime)
return b_times, c_times
def genGraph(substr):
b_times, c_times = timesOf(substr)
plt.plot(FILE_SIZES, b_times, marker='o')
plt.plot(FILE_SIZES, c_times, marker='o')
plt.legend(['B', 'C'])
plt.title('ColNumSort = {}, ColNumSel = {}, SubstrSel = {}'.format
(str(COLSORT), str(COLSEL), substr))
plt.savefig(substr + '.png')
def genGraphs():
for substr in SUBSTRS:
genGraph(substr)
plt.figure()
if __name__ == '__main__':
os.chdir('/tmp')
generateFiles()
if not os.path.exists(TMP_FOLDER):
os.mkdir(TMP_FOLDER)
genGraphs()
@nadavvitri
Copy link

Very helpful! thanks!

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment