Read files in a folder and measure speed
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
"""Create text files of different sizes with random content.""" | |
import random | |
import string | |
import logging | |
import sys | |
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', | |
level=logging.DEBUG, | |
stream=sys.stdout) | |
def create_file(filename, size): | |
""" | |
Create files of a given size with random strings in them. | |
Use values over 10,000,0000 with caution as it might render your PC useless | |
for a couple of minutes. | |
@param filename: The path to the filename | |
@param size: Size in bytes | |
""" | |
with open(filename, 'w') as f: | |
symbols = string.letters + string.whitespace | |
random_string = ''.join(random.choice(symbols) for _ in range(size)) | |
f.write(random_string) | |
def main(): | |
for el in [10, 100, 1024, 10*1024, 100*1024]: | |
size = el*1024 | |
logging.info("Start creation of %i Byte file.", size) | |
create_file("%i.txt" % size, size) | |
if __name__ == '__main__': | |
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
""" | |
Read all files in current directory 100 times and measure the time it takes. | |
""" | |
from os import listdir | |
from os.path import isfile, join | |
import logging | |
import sys | |
import time | |
import random | |
import natsort | |
from numpy import average, median | |
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s', | |
level=logging.DEBUG, | |
stream=sys.stdout) | |
def read_file(path): | |
content = "" | |
with open(path) as f: | |
content = f.read() | |
return content | |
def print_results(times): | |
""" | |
:param times: A dictionary which maps file paths to lists of execution | |
times | |
""" | |
filenames = times.keys() | |
times = natsort.natsorted(times.items()) | |
maxlen = str(max([len(f) for f in filenames]) + 2) | |
fformatter = "{0:<" + maxlen + "}" | |
fheader = u"" + fformatter + u"{1:>6}{2:>10}{3:>10}{4:>10}" | |
header = fheader.format('file_name', 'min', 'max', 'median', 'average') | |
print(header) | |
print("-"*len(header)) | |
for filename, t in times: | |
min_str = "%0.4f" % min(t) | |
max_str = "%0.4f" % max(t) | |
med_str = "%0.4f" % median(t) | |
avg_str = "%0.4f" % average(t) | |
print(fheader.format(filename, min_str, max_str, med_str, avg_str)) | |
def main(): | |
mypath = '.' | |
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))] | |
times = {} | |
for filename in onlyfiles: | |
times[filename] = [] | |
# Make every filename occur 100 times in list | |
readlist = [] | |
for el in onlyfiles: | |
for _ in range(100): | |
readlist.append(el) | |
onlyfiles = readlist | |
random.shuffle(onlyfiles) | |
# Execute the test | |
for file_path in onlyfiles: | |
start_time = time.time() | |
content = read_file(file_path) | |
read_time = time.time() - start_time | |
print(content[:4] + content[-4:]) | |
logging.info("%s needed %0.2f seconds to read.", file_path, read_time) | |
times[file_path].append(read_time) | |
# Print the results | |
print_results(times) | |
if __name__ == '__main__': | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment