Created
October 28, 2012 20:29
-
-
Save framp/9342d967ef6dc5f4d756 to your computer and use it in GitHub Desktop.
Testing different solutions for counting the occurrences of a given character inside a 100mb file (comparing performances between ack, STL streams on disk and STL streams with a file saved on a ramdisk).
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <iostream> | |
#include <fstream> | |
#include <cstdlib> | |
#include <ctime> | |
using namespace std; | |
void generate(char * filename, string range) | |
{ | |
int rangeLength = range.length(); | |
srand (time(NULL)); | |
ofstream out; | |
out.open(filename); | |
for (int i=0; i<104857600; i++) | |
if (i%(rand()%10+60)==0) | |
out << "\n"; | |
else | |
out << range[rand() % rangeLength]; | |
out.close(); | |
} | |
void count(char * filename, char search) | |
{ | |
int counter = 0; | |
ifstream in; | |
in.open(filename); | |
while (in.good()) | |
if (search == (char) in.get()) | |
counter++; | |
in.close(); | |
cout << counter << "\n"; | |
} | |
void read(char * filename) | |
{ | |
ifstream in; | |
in.open(filename); | |
while (in.good()) | |
in.get(); | |
in.close(); | |
} | |
void usage(char * filename) | |
{ | |
cout << "Usage:\t" << filename << " command file [char]\n\n" | |
"\t" << filename << " generate file\t" | |
<< "Generate a 100mb file\n" | |
"\t" << filename << " read file\t" | |
<< "Read a file\n" | |
"\t" << filename << " count file char\t" | |
<< "Count the occurrences of char inside file\n"; | |
} | |
int main(int argc, char *argv[]) | |
{ | |
string range = "0123456789abcdefghijklmnopqrstuvwxyz (){}[]*+-/=<>#;.,"; | |
switch(argc) { | |
case 3: | |
if (argv[1]==string("generate")) | |
generate(argv[2], range); | |
else if (argv[1]==string("read")) | |
read(argv[2]); | |
else | |
usage(argv[0]); | |
break; | |
case 4: | |
if (argv[1]==string("count")) | |
count(argv[2], argv[3][0]); | |
else | |
usage(argv[0]); | |
break; | |
default: | |
usage(argv[0]); | |
break; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[framp@a8 100mb]$ sh test.sh | |
Compiling tool... | |
Generating file... | |
Mounting a ramdisk... | |
Occurences: | |
FGREP+WC: 1913217 | |
ACK: 899832 | |
COUNT: 1913217 | |
Testing... | |
ACK | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
899832 | |
Samples: 50 | |
Mean Avg: 3.6552 | |
READ | |
Samples: 50 | |
Mean Avg: 3.1254 | |
COUNT | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
Samples: 50 | |
Mean Avg: 3.0966 | |
READRAMDISK | |
Samples: 50 | |
Mean Avg: 1.3938 | |
COUNTRAMDISK | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
1913217 | |
Samples: 50 | |
Mean Avg: 1.5296 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/bin/bash | |
#Calculate the mean average of wall clock time from multiple /usr/bin/time results. | |
#http://stackoverflow.com/questions/8215482/mean-running-time-over-a-number-of-runs | |
function timeAverage { | |
file=${1} | |
cnt=0 | |
if [ ${#file} -lt 1 ]; then | |
echo "you must specify a file containing output of /usr/bin/time results" | |
exit 1 | |
elif [ ${#file} -gt 1 ]; then | |
samples=(`grep --color=never real ${file} | awk '{print $2}' | cut -dm -f2 | cut -ds -f1`) | |
for sample in `grep --color=never real ${file} | awk '{print $2}' | cut -dm -f2 | cut -ds -f1`; do | |
cnt=$(echo ${cnt}+${sample} | bc -l) | |
done | |
# Calculate the 'Mean' average (sum / samples). | |
mean_avg=$(echo ${cnt}/${#samples[@]} | bc -l) | |
mean_avg=$(echo ${mean_avg} | cut -b1-6) | |
printf "\tSamples:\t%s \n\tMean Avg:\t%s\n\n" ${#samples[@]} ${mean_avg} | |
fi | |
} | |
#Do 50 tests and calculate the average | |
function timeTest { | |
echo $1 | |
shift | |
for i in {1..50}; do | |
sudo echo 3 | sudo tee /proc/sys/vm/drop_caches > /dev/null | |
time -a -o times.log -p $@ | |
done | |
timeAverage times.log | |
rm times.log | |
} | |
if [ 100mb.cpp -nt 100mb ]; then | |
echo "Compiling tool..." | |
g++ 100mb.cpp -o 100mb | |
chmod +x 100mb | |
fi | |
echo "Generating file..." | |
./100mb generate 100mb.txt | |
echo "Mounting a ramdisk..." | |
mkdir -p ramdisk | |
sudo mount -t tmpfs -o nodev,nosuid,noexec,nodiratime,size=150M none ramdisk | |
cp 100mb.txt ramdisk/100mb.txt | |
echo "" | |
echo "Occurrences:" | |
echo "FGREP+WC:" $(fgrep -o ')' "100mb.txt" | wc -l) | |
echo "ACK:" $(ack -c '\)' "100mb.txt") | |
echo "COUNT:" $(./100mb count "100mb.txt" ')') | |
echo "" | |
echo "Testing..." | |
timeTest "ACK" ack -c '\)' 100mb.txt | |
timeTest "READ" ./100mb read "100mb.txt" | |
timeTest "COUNT" ./100mb count "100mb.txt" ')' | |
timeTest "READRAMDISK" ./100mb read "ramdisk/100mb.txt" | |
timeTest "COUNTRAMDISK" ./100mb count "ramdisk/100mb.txt" ')' | |
rm -f "ramdisk/100mb.txt" "100mb.txt" | |
sudo umount ramdisk | |
rm -rf ramdisk |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment