Skip to content

Instantly share code, notes, and snippets.

@framp
Created October 28, 2012 20:29
Show Gist options
  • Save framp/9342d967ef6dc5f4d756 to your computer and use it in GitHub Desktop.
Save framp/9342d967ef6dc5f4d756 to your computer and use it in GitHub Desktop.
Testing different solutions for counting the occurrences of a given character inside a 100mb file (comparing performances between ack, STL streams on disk and STL streams with a file saved on a ramdisk).
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <ctime>
using namespace std;
void generate(char * filename, string range)
{
int rangeLength = range.length();
srand (time(NULL));
ofstream out;
out.open(filename);
for (int i=0; i<104857600; i++)
if (i%(rand()%10+60)==0)
out << "\n";
else
out << range[rand() % rangeLength];
out.close();
}
void count(char * filename, char search)
{
int counter = 0;
ifstream in;
in.open(filename);
while (in.good())
if (search == (char) in.get())
counter++;
in.close();
cout << counter << "\n";
}
void read(char * filename)
{
ifstream in;
in.open(filename);
while (in.good())
in.get();
in.close();
}
void usage(char * filename)
{
cout << "Usage:\t" << filename << " command file [char]\n\n"
"\t" << filename << " generate file\t"
<< "Generate a 100mb file\n"
"\t" << filename << " read file\t"
<< "Read a file\n"
"\t" << filename << " count file char\t"
<< "Count the occurrences of char inside file\n";
}
int main(int argc, char *argv[])
{
string range = "0123456789abcdefghijklmnopqrstuvwxyz (){}[]*+-/=<>#;.,";
switch(argc) {
case 3:
if (argv[1]==string("generate"))
generate(argv[2], range);
else if (argv[1]==string("read"))
read(argv[2]);
else
usage(argv[0]);
break;
case 4:
if (argv[1]==string("count"))
count(argv[2], argv[3][0]);
else
usage(argv[0]);
break;
default:
usage(argv[0]);
break;
}
}
[framp@a8 100mb]$ sh test.sh
Compiling tool...
Generating file...
Mounting a ramdisk...
Occurences:
FGREP+WC: 1913217
ACK: 899832
COUNT: 1913217
Testing...
ACK
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
899832
Samples: 50
Mean Avg: 3.6552
READ
Samples: 50
Mean Avg: 3.1254
COUNT
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
Samples: 50
Mean Avg: 3.0966
READRAMDISK
Samples: 50
Mean Avg: 1.3938
COUNTRAMDISK
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
1913217
Samples: 50
Mean Avg: 1.5296
#!/bin/bash
#Calculate the mean average of wall clock time from multiple /usr/bin/time results.
#http://stackoverflow.com/questions/8215482/mean-running-time-over-a-number-of-runs
function timeAverage {
file=${1}
cnt=0
if [ ${#file} -lt 1 ]; then
echo "you must specify a file containing output of /usr/bin/time results"
exit 1
elif [ ${#file} -gt 1 ]; then
samples=(`grep --color=never real ${file} | awk '{print $2}' | cut -dm -f2 | cut -ds -f1`)
for sample in `grep --color=never real ${file} | awk '{print $2}' | cut -dm -f2 | cut -ds -f1`; do
cnt=$(echo ${cnt}+${sample} | bc -l)
done
# Calculate the 'Mean' average (sum / samples).
mean_avg=$(echo ${cnt}/${#samples[@]} | bc -l)
mean_avg=$(echo ${mean_avg} | cut -b1-6)
printf "\tSamples:\t%s \n\tMean Avg:\t%s\n\n" ${#samples[@]} ${mean_avg}
fi
}
#Do 50 tests and calculate the average
function timeTest {
echo $1
shift
for i in {1..50}; do
sudo echo 3 | sudo tee /proc/sys/vm/drop_caches > /dev/null
time -a -o times.log -p $@
done
timeAverage times.log
rm times.log
}
if [ 100mb.cpp -nt 100mb ]; then
echo "Compiling tool..."
g++ 100mb.cpp -o 100mb
chmod +x 100mb
fi
echo "Generating file..."
./100mb generate 100mb.txt
echo "Mounting a ramdisk..."
mkdir -p ramdisk
sudo mount -t tmpfs -o nodev,nosuid,noexec,nodiratime,size=150M none ramdisk
cp 100mb.txt ramdisk/100mb.txt
echo ""
echo "Occurrences:"
echo "FGREP+WC:" $(fgrep -o ')' "100mb.txt" | wc -l)
echo "ACK:" $(ack -c '\)' "100mb.txt")
echo "COUNT:" $(./100mb count "100mb.txt" ')')
echo ""
echo "Testing..."
timeTest "ACK" ack -c '\)' 100mb.txt
timeTest "READ" ./100mb read "100mb.txt"
timeTest "COUNT" ./100mb count "100mb.txt" ')'
timeTest "READRAMDISK" ./100mb read "ramdisk/100mb.txt"
timeTest "COUNTRAMDISK" ./100mb count "ramdisk/100mb.txt" ')'
rm -f "ramdisk/100mb.txt" "100mb.txt"
sudo umount ramdisk
rm -rf ramdisk
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment