Skip to content

Instantly share code, notes, and snippets.

@MartinThoma
Created February 13, 2015 06:15
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 1 You must be signed in to fork a gist
  • Save MartinThoma/eb1e56405009839804e7 to your computer and use it in GitHub Desktop.
Save MartinThoma/eb1e56405009839804e7 to your computer and use it in GitHub Desktop.
Read files in a folder and measure speed
#!/usr/bin/env python
"""Create text files of different sizes with random content."""
import random
import string
import logging
import sys
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
level=logging.DEBUG,
stream=sys.stdout)
def create_file(filename, size):
"""
Create files of a given size with random strings in them.
Use values over 10,000,0000 with caution as it might render your PC useless
for a couple of minutes.
@param filename: The path to the filename
@param size: Size in bytes
"""
with open(filename, 'w') as f:
symbols = string.letters + string.whitespace
random_string = ''.join(random.choice(symbols) for _ in range(size))
f.write(random_string)
def main():
for el in [10, 100, 1024, 10*1024, 100*1024]:
size = el*1024
logging.info("Start creation of %i Byte file.", size)
create_file("%i.txt" % size, size)
if __name__ == '__main__':
main()
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Read all files in current directory 100 times and measure the time it takes.
"""
from os import listdir
from os.path import isfile, join
import logging
import sys
import time
import random
import natsort
from numpy import average, median
logging.basicConfig(format='%(asctime)s %(levelname)s %(message)s',
level=logging.DEBUG,
stream=sys.stdout)
def read_file(path):
content = ""
with open(path) as f:
content = f.read()
return content
def print_results(times):
"""
:param times: A dictionary which maps file paths to lists of execution
times
"""
filenames = times.keys()
times = natsort.natsorted(times.items())
maxlen = str(max([len(f) for f in filenames]) + 2)
fformatter = "{0:<" + maxlen + "}"
fheader = u"" + fformatter + u"{1:>6}{2:>10}{3:>10}{4:>10}"
header = fheader.format('file_name', 'min', 'max', 'median', 'average')
print(header)
print("-"*len(header))
for filename, t in times:
min_str = "%0.4f" % min(t)
max_str = "%0.4f" % max(t)
med_str = "%0.4f" % median(t)
avg_str = "%0.4f" % average(t)
print(fheader.format(filename, min_str, max_str, med_str, avg_str))
def main():
mypath = '.'
onlyfiles = [f for f in listdir(mypath) if isfile(join(mypath, f))]
times = {}
for filename in onlyfiles:
times[filename] = []
# Make every filename occur 100 times in list
readlist = []
for el in onlyfiles:
for _ in range(100):
readlist.append(el)
onlyfiles = readlist
random.shuffle(onlyfiles)
# Execute the test
for file_path in onlyfiles:
start_time = time.time()
content = read_file(file_path)
read_time = time.time() - start_time
print(content[:4] + content[-4:])
logging.info("%s needed %0.2f seconds to read.", file_path, read_time)
times[file_path].append(read_time)
# Print the results
print_results(times)
if __name__ == '__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment