Created
October 2, 2019 20:21
-
-
Save ohaddahan/ac1140f4a7b8b159defb0cd9b6af8f46 to your computer and use it in GitHub Desktop.
Algorithm Optimisation: A Real Example - My insights
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /usr/bin/env ruby | |
require 'fileutils' | |
require 'benchmark/ips' | |
DATA_DIR = "tmp_dir" | |
FileUtils.mkdir_p DATA_DIR | |
FILE_SIZES = [ | |
100000, | |
1000000, | |
10000000, | |
100000000 | |
] | |
def create_input_file(data_dir, file_name) | |
system("base64 /dev/urandom | head -c #{file_name} > #{data_dir}/#{file_name}.txt") | |
end | |
puts "Creating test data files..." | |
FILE_SIZES.each do |i| | |
puts "#{i} bytes" | |
create_input_file DATA_DIR, i | |
end | |
results = {} | |
FILE_SIZES.each_with_index do |i, idx| | |
puts "Running for #{i} #{idx} / #{FILE_SIZES.size}" | |
results[i] = Benchmark.ips do |x| | |
x.report("org") do | |
system("./read_file org #{DATA_DIR}/#{i}.txt 2>&1 1>/dev/null") | |
end | |
x.report("new") do | |
system("./read_file new #{DATA_DIR}/#{i}.txt 2>&1 1>/dev/null") | |
end | |
x.report("new2") do | |
system("./read_file new2 #{DATA_DIR}/#{i}.txt 2>&1 1>/dev/null") | |
end | |
end | |
end | |
def traverse_results(output_file, results) | |
fh = File.new(output_file, "wb") | |
methods = %w(org new new2) | |
results.each_pair do |k, v| | |
tmp = {} | |
v.entries.each do |entry| | |
tmp[entry.label] = (entry.microseconds / entry.iterations).to_f.round(1) | |
end | |
out = "#{k}" | |
methods.each do |m| | |
out << " , #{tmp[m]}" | |
end | |
fh.puts(out) | |
end | |
fh.close | |
end | |
output_file = "#{Time.now.to_s.gsub(/\s+/,'_')}.csv" | |
traverse_results(output_file, results) | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/zsh | |
set -x | |
gcc -g -o read_file main.c |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <stdio.h> | |
#include <sys/stat.h> | |
#include <stdlib.h> | |
#include <unistd.h> | |
#include <fcntl.h> | |
#include <string.h> | |
#include <stdlib.h> | |
#include <sys/stat.h> | |
#include <fcntl.h> | |
#include <time.h> | |
#define BUFSIZE 4096 | |
#define NANO 1000000000 | |
struct ret { | |
int len; | |
time_t tv_sec; /* seconds */ | |
long tv_nsec; /* nanoseconds */ | |
long long tv_total; | |
}; | |
size_t read_file(int fd, char **data); | |
size_t read_file_improved(int fd, char **data); | |
size_t read_file_improved_2(int fd, char **data); | |
void print_data(char* data); | |
int main(int argc, char *argv[]) | |
{ | |
size_t len; | |
char *data = NULL; | |
if (argc != 3) { | |
fprintf(stderr, "provide type and file\n"); | |
return 1; | |
} | |
printf("argc = %d , argv[1] = %s, argv[2] = '%s'\n", argc, argv[1], argv[2]); | |
int fd = open(argv[2], O_RDONLY); | |
struct ret ret_val; | |
struct timespec ts_start; | |
struct timespec ts_end; | |
clock_gettime(CLOCK_MONOTONIC, &ts_start); | |
if (strcmp(argv[1], "org") == 0) { | |
len = read_file(fd, &data); | |
} else if (strcmp(argv[1], "new") == 0) { | |
len = read_file_improved(fd, &data); | |
} else if (strcmp(argv[1], "new2") == 0) { | |
len = read_file_improved_2(fd, &data); | |
} else { | |
printf("Error in args\n"); | |
return 1; | |
} | |
clock_gettime(CLOCK_MONOTONIC, &ts_end); | |
ret_val.len = len; | |
ret_val.tv_sec = ts_end.tv_sec - ts_start.tv_sec; | |
ret_val.tv_nsec = ts_end.tv_nsec - ts_start.tv_nsec; | |
ret_val.tv_total = ret_val.tv_sec * NANO + ret_val.tv_nsec; | |
printf("%ld , %llu\n", len, ret_val.tv_total); | |
return 0; | |
} | |
size_t read_file(int fd, char **data) | |
{ | |
char buf[BUFSIZE]; | |
size_t len = 0; | |
ssize_t read_len; | |
char *tmp; | |
/* chunked read from fd into data */ | |
while ((read_len = read(fd, buf, BUFSIZE)) > 0) | |
{ | |
tmp = malloc(len + read_len); | |
memcpy(tmp, *data, len); | |
memcpy(tmp + len, buf, read_len); | |
free(*data); | |
*data = tmp; | |
len += read_len; | |
} | |
print_data(*data); | |
return len; | |
} | |
typedef struct node_t | |
{ | |
char *data; | |
int len; | |
struct node_t *next; | |
} node_t; | |
size_t read_file_improved(int fd, char **data) | |
{ | |
char buf[BUFSIZE]; | |
size_t read_len; | |
size_t len = 0; | |
/* build linked list containing chunks of data read from fd */ | |
node_t *head = NULL; | |
node_t *prev = NULL; | |
while ((read_len = read(fd, buf, BUFSIZE)) > 0) | |
{ | |
node_t *node = malloc(sizeof(node_t)); | |
node->data = malloc(read_len); | |
memcpy(node->data, buf, read_len); | |
node->len = read_len; | |
node->next = NULL; | |
/* first chunk */ | |
if (head == NULL) | |
{ | |
head = node; | |
} | |
/* second chunk onwards */ | |
if (prev != NULL) | |
{ | |
prev->next = node; | |
} | |
prev = node; | |
len += read_len; | |
} | |
/* copy each chunk into data once only */ | |
*data = malloc(len); | |
char *p = *data; | |
node_t *cur = head; | |
while (cur != NULL) | |
{ | |
memcpy(p, cur->data, cur->len); | |
p += cur->len; | |
cur = cur->next; | |
} | |
print_data(*data); | |
return len; | |
} | |
size_t read_file_improved_2(int fd, char **data) | |
{ | |
char buf[BUFSIZE]; | |
size_t read_len; | |
size_t len = 0; | |
size_t size; | |
struct stat st; | |
fstat(fd, &st); | |
size = st.st_size; | |
*data = malloc(size); | |
while ((read_len = read(fd, *data + len, BUFSIZE)) > 0) { | |
len += read_len; | |
} | |
print_data(*data); | |
return len; | |
} | |
void print_data(char *data) { | |
if (getenv("VERBOSE") != NULL) { | |
printf("\n%s\n", data); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment