Skip to content

Instantly share code, notes, and snippets.

@morpheby
Last active August 29, 2015 14:01
Show Gist options
  • Save morpheby/b2f15a31dc733929a782 to your computer and use it in GitHub Desktop.
Save morpheby/b2f15a31dc733929a782 to your computer and use it in GitHub Desktop.
/*
* histogram.cpp
* Creates histogram of byte distribution in file
*
* Created by Илья Михальцов on 2014-05-27.
* Copyright 2014 Илья Михальцов. All rights reserved.
*/
// g++ -fopenmp -std=gnu++11 -O2 histogram.cpp -o histogram
#include <iostream>
#include <algorithm>
#include <vector>
#include <errno.h>
#include <iomanip>
#include <iterator>
#include <sys/ioctl.h>
#include <sys/types.h>
#include <sys/uio.h>
#include <unistd.h>
#include <fcntl.h>
#include <functional>
#include <math.h>
#include <sys/stat.h>
typedef unsigned char readValue_t;
#define READ_BLOCK 2048*1024
int main (int argc, const char **argv) {
int fd = 0;
if (argc == 2) {
fd = open(argv[1], O_RDONLY);
if (fd == -1)
return errno;
} else if (argc == 1) {
fd = STDIN_FILENO;
} else {
std::cerr << "Usage: " << argv[0] << " [filename]" << std::endl;
return 1;
}
readValue_t buf[READ_BLOCK];
std::vector<long long int> h (1 << (8 * sizeof(readValue_t)));
int columns;
int termWidth;
{
struct winsize w;
ioctl(STDOUT_FILENO, TIOCGWINSZ, &w);
termWidth = columns = w.ws_col;
columns -= sizeof(readValue_t) * 2 + 3 + 11;
}
int readBytes = 0;
long long int fileSize = 0;
long long int accumulator = 0;
struct stat fileStat;
fstat(fd, &fileStat);
fileSize = fileStat.st_size;
std::cout << std::endl;
do {
std::cout << "\r" << (double) accumulator / fileSize * 100 << "%" << " ";
std::cout.flush();
accumulator += readBytes = read(fd, buf, READ_BLOCK * sizeof(readValue_t));
if (!readBytes)
continue;
#pragma omp parallel for
for (int i = 0; i < readBytes/sizeof(readValue_t); ++i) {
readValue_t c = buf[i];
auto &v = h.at(c);
#pragma omp atomic
++v;
}
} while(readBytes == READ_BLOCK * sizeof(readValue_t));
std::cout << '\r';
long long int max = *std::max_element(h.begin(), h.end());
double coeff = (double)columns / max;
for (int i = 0; i < h.size(); ++i) {
int numAsterisks = coeff * h[i];
std::cout << std::hex << std::setw(sizeof(readValue_t) * 2) << std::right << i << ": " << std::dec;
std::fill_n(std::ostream_iterator<char>(std::cout, ""), numAsterisks, '*');
std::fill_n(std::ostream_iterator<char>(std::cout, ""), columns - numAsterisks + 1, ' ');
std::cout << '(' << std::fixed << std::setprecision(4) << std::setw(8) << std::right << (double)h[i]/fileSize * 100.0 << "%)";
std::cout << std::endl;
}
return 0;
}
@morpheby
Copy link
Author

OpenMP only makes it slower… And clang with O2 produces even faster result. Now what's the point of OpenMP, anyone?

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment