Skip to content

Instantly share code, notes, and snippets.

@ohaddahan
Created October 2, 2019 20:21
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save ohaddahan/ac1140f4a7b8b159defb0cd9b6af8f46 to your computer and use it in GitHub Desktop.
Save ohaddahan/ac1140f4a7b8b159defb0cd9b6af8f46 to your computer and use it in GitHub Desktop.
Algorithm Optimisation: A Real Example - My insights
#! /usr/bin/env ruby
require 'fileutils'
require 'benchmark/ips'
DATA_DIR = "tmp_dir"
FileUtils.mkdir_p DATA_DIR
FILE_SIZES = [
100000,
1000000,
10000000,
100000000
]
def create_input_file(data_dir, file_name)
system("base64 /dev/urandom | head -c #{file_name} > #{data_dir}/#{file_name}.txt")
end
puts "Creating test data files..."
FILE_SIZES.each do |i|
puts "#{i} bytes"
create_input_file DATA_DIR, i
end
results = {}
FILE_SIZES.each_with_index do |i, idx|
puts "Running for #{i} #{idx} / #{FILE_SIZES.size}"
results[i] = Benchmark.ips do |x|
x.report("org") do
system("./read_file org #{DATA_DIR}/#{i}.txt 2>&1 1>/dev/null")
end
x.report("new") do
system("./read_file new #{DATA_DIR}/#{i}.txt 2>&1 1>/dev/null")
end
x.report("new2") do
system("./read_file new2 #{DATA_DIR}/#{i}.txt 2>&1 1>/dev/null")
end
end
end
def traverse_results(output_file, results)
fh = File.new(output_file, "wb")
methods = %w(org new new2)
results.each_pair do |k, v|
tmp = {}
v.entries.each do |entry|
tmp[entry.label] = (entry.microseconds / entry.iterations).to_f.round(1)
end
out = "#{k}"
methods.each do |m|
out << " , #{tmp[m]}"
end
fh.puts(out)
end
fh.close
end
output_file = "#{Time.now.to_s.gsub(/\s+/,'_')}.csv"
traverse_results(output_file, results)
#!/bin/zsh
set -x
gcc -g -o read_file main.c
#include <stdio.h>
#include <sys/stat.h>
#include <stdlib.h>
#include <unistd.h>
#include <fcntl.h>
#include <string.h>
#include <stdlib.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <time.h>
#define BUFSIZE 4096
#define NANO 1000000000
struct ret {
int len;
time_t tv_sec; /* seconds */
long tv_nsec; /* nanoseconds */
long long tv_total;
};
size_t read_file(int fd, char **data);
size_t read_file_improved(int fd, char **data);
size_t read_file_improved_2(int fd, char **data);
void print_data(char* data);
int main(int argc, char *argv[])
{
size_t len;
char *data = NULL;
if (argc != 3) {
fprintf(stderr, "provide type and file\n");
return 1;
}
printf("argc = %d , argv[1] = %s, argv[2] = '%s'\n", argc, argv[1], argv[2]);
int fd = open(argv[2], O_RDONLY);
struct ret ret_val;
struct timespec ts_start;
struct timespec ts_end;
clock_gettime(CLOCK_MONOTONIC, &ts_start);
if (strcmp(argv[1], "org") == 0) {
len = read_file(fd, &data);
} else if (strcmp(argv[1], "new") == 0) {
len = read_file_improved(fd, &data);
} else if (strcmp(argv[1], "new2") == 0) {
len = read_file_improved_2(fd, &data);
} else {
printf("Error in args\n");
return 1;
}
clock_gettime(CLOCK_MONOTONIC, &ts_end);
ret_val.len = len;
ret_val.tv_sec = ts_end.tv_sec - ts_start.tv_sec;
ret_val.tv_nsec = ts_end.tv_nsec - ts_start.tv_nsec;
ret_val.tv_total = ret_val.tv_sec * NANO + ret_val.tv_nsec;
printf("%ld , %llu\n", len, ret_val.tv_total);
return 0;
}
size_t read_file(int fd, char **data)
{
char buf[BUFSIZE];
size_t len = 0;
ssize_t read_len;
char *tmp;
/* chunked read from fd into data */
while ((read_len = read(fd, buf, BUFSIZE)) > 0)
{
tmp = malloc(len + read_len);
memcpy(tmp, *data, len);
memcpy(tmp + len, buf, read_len);
free(*data);
*data = tmp;
len += read_len;
}
print_data(*data);
return len;
}
typedef struct node_t
{
char *data;
int len;
struct node_t *next;
} node_t;
size_t read_file_improved(int fd, char **data)
{
char buf[BUFSIZE];
size_t read_len;
size_t len = 0;
/* build linked list containing chunks of data read from fd */
node_t *head = NULL;
node_t *prev = NULL;
while ((read_len = read(fd, buf, BUFSIZE)) > 0)
{
node_t *node = malloc(sizeof(node_t));
node->data = malloc(read_len);
memcpy(node->data, buf, read_len);
node->len = read_len;
node->next = NULL;
/* first chunk */
if (head == NULL)
{
head = node;
}
/* second chunk onwards */
if (prev != NULL)
{
prev->next = node;
}
prev = node;
len += read_len;
}
/* copy each chunk into data once only */
*data = malloc(len);
char *p = *data;
node_t *cur = head;
while (cur != NULL)
{
memcpy(p, cur->data, cur->len);
p += cur->len;
cur = cur->next;
}
print_data(*data);
return len;
}
size_t read_file_improved_2(int fd, char **data)
{
char buf[BUFSIZE];
size_t read_len;
size_t len = 0;
size_t size;
struct stat st;
fstat(fd, &st);
size = st.st_size;
*data = malloc(size);
while ((read_len = read(fd, *data + len, BUFSIZE)) > 0) {
len += read_len;
}
print_data(*data);
return len;
}
void print_data(char *data) {
if (getenv("VERBOSE") != NULL) {
printf("\n%s\n", data);
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment