Skip to content

Instantly share code, notes, and snippets.

@marioroy
Last active July 17, 2023 02:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save marioroy/b06e741bd2685fae3c1fea94808bd1e3 to your computer and use it in GitHub Desktop.
Save marioroy/b06e741bd2685fae3c1fea94808bd1e3 to your computer and use it in GitHub Desktop.
Grep Count C++ OpenMP demonstrations for website
// Grep PCRE2 C++ OpenMP demonstration for website:
// https://cpufun.substack.com/p/processing-a-file-with-openmp
//
// By Mario Roy - May 19, 2023
// License: GNU General Public License.
// See http://www.gnu.org/licenses/ for license information.
// SPDX-License-Identifier: GPL-3.0-or-later
// Based on rtoa-pgatram-allinone2b.cpp:
// https://perlmonks.org/?node_id=11152225
//
// Obtain the fast_io library (required dependency):
// git clone --depth=1 https://github.com/cppfastio/fast_io
//
// Compile with g++ or clang++: (Select the library pcre2-8, pcre2-16, or pcre2-32)
// clang++ -o grep-count-pcre2 -std=c++20 -fopenmp -Wall -O3 -I fast_io/include grep-count-pcre2.cc -lpcre2-8
//
// Running:
// Usage: grep-count-pcre2 regular-expression file1 [ file2... ]
// time ./grep-count-pcre2 "[aA].*[eE].*[iI].*[oO].*[uU]" large.txt
#include <cstring>
#include <string>
#include <thread>
// C++ wrapper for PCRE2 Library
// https://github.com/jpcre2/jpcre2
// installation: sudo dnf install jpcre2-devel for RPM-based systems
#include <jpcre2.hpp>
// Select the basic data type (char, wchar_t, char16_t or char32_t)
typedef jpcre2::select<char> jp;
#ifdef _OPENMP
#include <omp.h>
#endif
#include <fast_io.h>
#include <fast_io_legacy.h>
// Helper function to get the next up value for integer division
static std::size_t divide_up(std::size_t dividend, std::size_t divisor)
{
if (dividend % divisor)
return dividend / divisor + 1;
else
return dividend / divisor;
}
#define MIN_CHUNK_SIZE 2097152
// Grep file, count matching lines
static void grep_file(
std::string_view fname, // in: file name
jp::Regex& re, // in: regular expression
int nthds // in: number of threads
)
{
try {
// Load entire file to memory through memory mapping.
using file_loader_type = fast_io::native_file_loader;
file_loader_type loader(fname, fast_io::open_mode::in | fast_io::open_mode::follow);
// Loop contiguous file container. Each thread process approximately 32 chunks.
std::size_t chunk_size = divide_up(loader.size(), nthds * 32);
if (chunk_size < MIN_CHUNK_SIZE) chunk_size = MIN_CHUNK_SIZE;
std::size_t num_chunks = divide_up(loader.size(), chunk_size);
char const *end{loader.data() + loader.size() - 1};
std::size_t lines = 0, matched_lines = 0;
std::string mode = (nthds > 1) ? "parallel" : "serial";
#pragma omp parallel for schedule(static, 1) reduction(+:lines, matched_lines)
for (std::size_t chunk_id = 0; chunk_id < num_chunks; ++chunk_id) {
char const *last{loader.data() + loader.size() - 1};
char const *first{loader.data()};
if (chunk_id < num_chunks - 1) {
last = first + (chunk_size * (chunk_id + 1) - 1);
if (*last != '\n') last = fast_io::find_lf(last, end);
}
if (chunk_id > 0) {
first += (chunk_size * chunk_id - 1);
if (*first != '\n') first = fast_io::find_lf(first, end);
++first;
}
#if 1
// Match all and tally the match count - faster than matching per line.
// This requires anchoring in the event a line has multiple matches.
// E.g. "^.*" prepended to pattern during regex construction.
matched_lines += re.match(std::basic_string(first, last - first + 1), "g");
while (first <= last) {
first = fast_io::find_lf(first, last);
++lines;
++first;
}
#else
while (first <= last) {
auto start_ptr{first}; first = fast_io::find_lf(first, last);
auto end_ptr{first};
// include trailing '\n'
if (re.match(std::basic_string(start_ptr, end_ptr - start_ptr + 1)))
++matched_lines;
++lines;
++first;
}
#endif
}
fast_io::io::println(
fname, ": ", mode, " (", nthds, ") Total Lines: ", lines,
", Matching Lines: ", matched_lines );
}
catch (fast_io::error e) {
fast_io::io::perrln("Error opening '", fname, "' : ", e);
};
}
int main(int argc, char* argv[])
{
if (argc < 3) {
if (argc > 0)
fast_io::io::perrln("Usage: grep-count-pcre2 regular-expression file...");
return 1;
}
#ifdef _OPENMP
// Determine the number of threads.
int nthds = std::thread::hardware_concurrency();
const char* env_nthds1 = std::getenv("OMP_NUM_THREADS");
const char* env_nthds2 = std::getenv("NUM_THREADS");
if (env_nthds1 && strlen(env_nthds1))
nthds = ::atoi(env_nthds1);
else if (env_nthds2 && strlen(env_nthds2))
nthds = ::atoi(env_nthds2);
omp_set_dynamic(false);
omp_set_num_threads(nthds);
#else
int nthds = 1;
#endif
jp::Regex re(std::string{"^.*"} + argv[1], "mS");
// anchor pattern to allow searching once per chunk
// 'm' modifier enables multi-line mode for the regex
// 'S' modifier is an optimization mode
if (!re) {
fast_io::io::perrln("Invalid regular expression: ", std::string_view(re.getErrorMessage()));
return 1;
}
int nfiles = argc - 2;
char** fname = &argv[2];
for (int i = 0; i < nfiles; ++i)
grep_file(fname[i], re, nthds);
return 0;
}
// Grep PCRE2 and Portable MemoryMap C++ OpenMP demonstration for website:
// https://cpufun.substack.com/p/processing-a-file-with-openmp
//
// By Mario Roy - May 19, 2023
// License: GNU General Public License.
// See http://www.gnu.org/licenses/ for license information.
// SPDX-License-Identifier: GPL-3.0-or-later
// Based on rtoa-pgatram-allinone2c.cpp:
// https://perlmonks.org/?node_id=11152306
//
// Obtain the Portable Memory Mapping C++ Class (required dependency):
// git clone --depth=1 https://github.com/stbrumme/portable-memory-mapping
// Copy two files to your source directory: *.cpp and *.h
// cp portable-memory-mapping/MemoryMapped.* .
//
// Compile with g++ or clang++: (Select the library pcre2-8, pcre2-16, or pcre2-32)
// g++ -o grep-count-pmap -std=c++20 -fopenmp -Wall -O3 MemoryMapped.cpp grep-count-pmap.cc -lpcre2-8
//
// Running:
// Usage: grep-count-pmap regular-expression file1 [ file2... ]
// time ./grep-count-pmap "[aA].*[eE].*[iI].*[oO].*[uU]" large.txt
#include <iostream>
#include <cstring>
#include <string>
#include <thread>
// C++ wrapper for PCRE2 Library
// https://github.com/jpcre2/jpcre2
// installation: sudo dnf install jpcre2-devel for RPM-based systems
#include <jpcre2.hpp>
// Select the basic data type (char, wchar_t, char16_t or char32_t)
typedef jpcre2::select<char> jp;
#ifdef _OPENMP
#include <omp.h>
#endif
#include "MemoryMapped.h"
// Helper function to find '\n'.
inline constexpr char const* find_lf(char const* first, char const* last)
{
while (first != last) {
if (*first == '\n') break;
++first;
}
return first;
}
// Helper function to get the next up value for integer division.
static long int divide_up(long int dividend, long int divisor)
{
if (dividend % divisor)
return dividend / divisor + 1;
else
return dividend / divisor;
}
#define MIN_CHUNK_SIZE 2097152
// Grep file, count matching lines
static void grep_file(
std::string_view fname, // in: file name
jp::Regex& re, // in: regular expression
int nthds // in: number of threads
)
{
// Map file to memory.
MemoryMapped loader(fname.data(), MemoryMapped::WholeFile, MemoryMapped::SequentialScan);
if (!loader.isValid()) {
std::cerr << "Error opening '" << fname << "' : " << strerror(errno) << '\n';
return;
}
// Get raw pointer to mapped memory.
char* data = (char*) loader.getData();
long int filesize = loader.size();
// Loop contiguous file container. Each thread process approximately 32 chunks.
long int chunk_size = divide_up(filesize, nthds * 32);
if (chunk_size < MIN_CHUNK_SIZE) chunk_size = MIN_CHUNK_SIZE;
long int num_chunks = divide_up(filesize, chunk_size);
char const *end{data + filesize - 1};
long int lines = 0, matched_lines = 0;
std::string mode = (nthds > 1) ? "parallel" : "serial";
#pragma omp parallel for schedule(static, 1) reduction(+:lines, matched_lines)
for (long int chunk_id = 0; chunk_id < num_chunks; ++chunk_id) {
char const *last{data + filesize - 1};
char const *first{data};
if (chunk_id < num_chunks - 1) {
last = first + (chunk_size * (chunk_id + 1) - 1);
if (*last != '\n') last = find_lf(last, end);
}
if (chunk_id > 0) {
first += (chunk_size * chunk_id - 1);
if (*first != '\n') first = find_lf(first, end);
++first;
}
#if 1
// Match all and tally the match count - faster than matching per line.
// This requires anchoring in the event a line has multiple matches.
// E.g. "^.*" prepended to pattern during regex construction.
matched_lines += re.match(std::basic_string(first, last - first + 1), "g");
// Count the number of lines in this chunk.
for (long int i = first - data; i < last - data + 1; i++) {
lines += (data[i] == '\n');
}
#else
// Process line-by-line.
while (first <= last) {
auto start_ptr{first}; first = find_lf(first, last);
auto end_ptr{first};
// include trailing '\n'
if (re.match(std::basic_string(start_ptr, end_ptr - start_ptr + 1)))
++matched_lines;
++lines;
++first;
}
#endif
}
loader.close();
std::cout \
<< fname << ": " << mode << " (" << nthds << ") Total Lines: " << lines \
<< ", Matching Lines: " << matched_lines << '\n';
}
int main(int argc, char* argv[])
{
if (argc < 3) {
if (argc > 0)
std::cerr << "Usage: grep-count-pmap regular-expression file..." << '\n';
return 1;
}
#ifdef _OPENMP
// Determine the number of threads.
int nthds = std::thread::hardware_concurrency();
const char* env_nthds1 = std::getenv("OMP_NUM_THREADS");
const char* env_nthds2 = std::getenv("NUM_THREADS");
if (env_nthds1 && strlen(env_nthds1))
nthds = ::atoi(env_nthds1);
else if (env_nthds2 && strlen(env_nthds2))
nthds = ::atoi(env_nthds2);
omp_set_dynamic(false);
omp_set_num_threads(nthds);
#else
int nthds = 1;
#endif
jp::Regex re(std::string{"^.*"} + argv[1], "mS");
// anchor pattern to allow searching once per chunk
// 'm' modifier enables multi-line mode for the regex
// 'S' modifier is an optimization mode
if (!re) {
std::cerr << "Invalid regular expression: " << re.getErrorMessage() << '\n';
return 1;
}
int nfiles = argc - 2;
char** fname = &argv[2];
for (int i = 0; i < nfiles; ++i)
grep_file(fname[i], re, nthds);
return 0;
}
// Grep RE2 C++ OpenMP demonstration for website:
// https://cpufun.substack.com/p/processing-a-file-with-openmp
//
// By Mario Roy - May 19, 2023
// License: GNU General Public License.
// See http://www.gnu.org/licenses/ for license information.
// SPDX-License-Identifier: GPL-3.0-or-later
// Based on rtoa-pgatram-allinone2b.cpp:
// https://perlmonks.org/?node_id=11152225
//
// Obtain the fast_io library (required dependency):
// git clone --depth=1 https://github.com/cppfastio/fast_io
//
// Compile with g++ or clang++:
// clang++ -o grep-count-re2 -std=c++20 -fopenmp -Wall -O3 -I fast_io/include grep-count-re2.cc -lre2
//
// Running:
// Usage: grep-count-re2 regular-expression file1 [ file2... ]
// time ./grep-count-re2 "[aA].*[eE].*[iI].*[oO].*[uU]" large.txt
#include <cstring>
#include <string>
#include <thread>
#include <re2/re2.h>
#ifdef _OPENMP
#include <omp.h>
#endif
#include <fast_io.h>
#include <fast_io_legacy.h>
// Helper function to get the next up value for integer division
static std::size_t divide_up(std::size_t dividend, std::size_t divisor)
{
if (dividend % divisor)
return dividend / divisor + 1;
else
return dividend / divisor;
}
#define MIN_CHUNK_SIZE 2097152
// Grep file, count matching lines
static void grep_file(
std::string_view fname, // in: file name
RE2 const& pattern, // in: regular expression
int nthds // in: number of threads
)
{
try {
// Load entire file to memory through memory mapping.
using file_loader_type = fast_io::native_file_loader;
file_loader_type loader(fname, fast_io::open_mode::in | fast_io::open_mode::follow);
// Loop contiguous file container. Each thread process max 1 chunk.
std::size_t chunk_size = divide_up(loader.size(), nthds * 1);
if (chunk_size < MIN_CHUNK_SIZE) chunk_size = MIN_CHUNK_SIZE;
std::size_t num_chunks = divide_up(loader.size(), chunk_size);
char const *end{loader.data() + loader.size() - 1};
std::size_t lines = 0, matched_lines = 0;
std::string mode = (nthds > 1) ? "parallel" : "serial";
#pragma omp parallel for schedule(static, 1) reduction(+:lines, matched_lines)
for (std::size_t chunk_id = 0; chunk_id < num_chunks; ++chunk_id) {
char const *last{loader.data() + loader.size() - 1};
char const *first{loader.data()};
if (chunk_id < num_chunks - 1) {
last = first + (chunk_size * (chunk_id + 1) - 1);
if (*last != '\n') last = fast_io::find_lf(last, end);
}
if (chunk_id > 0) {
first += (chunk_size * chunk_id - 1);
if (*first != '\n') first = fast_io::find_lf(first, end);
++first;
}
while (first <= last) {
auto start_ptr{first}; first = fast_io::find_lf(first, last);
auto end_ptr{first};
// include trailing '\n'
if (RE2::PartialMatch(std::string_view(start_ptr, end_ptr - start_ptr + 1), pattern))
++matched_lines;
++lines;
++first;
}
}
fast_io::io::println(
fname, ": ", mode, " (", nthds, ") Total Lines: ", lines,
", Matching Lines: ", matched_lines );
}
catch (fast_io::error e) {
fast_io::io::perrln("Error opening '", fname, "' : ", e);
};
}
int main(int argc, char* argv[])
{
if (argc < 3) {
if (argc > 0)
fast_io::io::perrln("Usage: grep-count-re2 regular-expression file...");
return 1;
}
#ifdef _OPENMP
// Determine the number of threads.
int nthds = std::thread::hardware_concurrency();
const char* env_nthds1 = std::getenv("OMP_NUM_THREADS");
const char* env_nthds2 = std::getenv("NUM_THREADS");
if (env_nthds1 && strlen(env_nthds1))
nthds = ::atoi(env_nthds1);
else if (env_nthds2 && strlen(env_nthds2))
nthds = ::atoi(env_nthds2);
omp_set_dynamic(false);
omp_set_num_threads(nthds);
#else
int nthds = 1;
#endif
RE2::Options opt;
opt.set_log_errors(false);
opt.set_case_sensitive(false);
// opt.set_encoding(RE2::Options::EncodingUTF8); // default encoding
opt.set_encoding(RE2::Options::EncodingLatin1);
RE2 pattern (argv[1], opt);
if (!pattern.ok()) {
fast_io::io::perrln("Invalid regular expression: ", std::string_view(pattern.error()));
return 1;
}
int nfiles = argc - 2;
char** fname = &argv[2];
for (int i = 0; i < nfiles; ++i)
grep_file(fname[i], pattern, nthds);
return 0;
}
@marioroy
Copy link
Author

marioroy commented May 17, 2023

omp_scan - https://github.com/parallel-runtimes/CPU-fun:

$ time OMP_NUM_THREADS=1 taskset -c 0-3 ./omp_scan serial "[aA].*[eE].*[iI].*[oO].*[uU]" < large.txt
serial (1) Total Lines: 500000, Matching Lines: 10456

real    0m1.180s
user    0m1.176s
sys     0m0.004s

$ time OMP_NUM_THREADS=4 taskset -c 0-3 ./omp_scan parallel "[aA].*[eE].*[iI].*[oO].*[uU]" < large.txt
parallel (4) Total Lines: 500000, Matching Lines: 10456

real    0m0.856s
user    0m3.417s
sys     0m0.000s

grep-count-re2:

$ time OMP_NUM_THREADS=1 taskset -c 0-3 ./grep-count-re2 "[aA].*[eE].*[iI].*[oO].*[uU]" large.txt
large.txt: serial (1) Total Lines: 500000, Matching Lines: 10456

real    0m0.092s
user    0m0.084s
sys     0m0.008s

$ time OMP_NUM_THREADS=4 taskset -c 0-3 ./grep-count-re2 "[aA].*[eE].*[iI].*[oO].*[uU]" large.txt
large.txt: parallel (4) Total Lines: 500000, Matching Lines: 10456

real    0m0.031s
user    0m0.095s
sys     0m0.008s

grep-count-pcre2:

$ time OMP_NUM_THREADS=1 taskset -c 0-3 ./grep-count-pcre2 "[aA].*[eE].*[iI].*[oO].*[uU]" large.txt
large.txt: serial (1) Total Lines: 500000, Matching Lines: 10456

real    0m0.082s
user    0m0.074s
sys     0m0.008s

$ time OMP_NUM_THREADS=4 taskset -c 0-3 ./grep-count-pcre2 "[aA].*[eE].*[iI].*[oO].*[uU]" large.txt
large.txt: parallel (4) Total Lines: 500000, Matching Lines: 10456

real    0m0.033s
user    0m0.089s
sys     0m0.015s

grep-count-pmap:

$ time OMP_NUM_THREADS=1 taskset -c 0-3 ./grep-count-pmap "[aA].*[eE].*[iI].*[oO].*[uU]" large.txt
large.txt: serial (1) Total Lines: 500000, Matching Lines: 10456

real    0m0.082s
user    0m0.074s
sys     0m0.008s

$ time OMP_NUM_THREADS=4 taskset -c 0-3 ./grep-count-pmap "[aA].*[eE].*[iI].*[oO].*[uU]" large.txt
large.txt: parallel (4) Total Lines: 500000, Matching Lines: 10456

real    0m0.026s
user    0m0.086s
sys     0m0.010s

grep:

$ time taskset -c 0-3 grep -c "[aA].*[eE].*[iI].*[oO].*[uU]" large.txt
10456

real    0m0.042s
user    0m0.038s
sys     0m0.004s

@marioroy
Copy link
Author

marioroy commented May 18, 2023

Results for 1 GB

for i in $(seq 1 42); do
  cat large.txt >> big.txt
done

grep-count-pmap:

$ time OMP_NUM_THREADS=1 taskset -c 0-31 ./grep-count-pmap "[aA].*[eE].*[iI].*[oO].*[uU]" big.txt
big.txt: serial (1) Total Lines: 21000000, Matching Lines: 439152

real    0m3.403s
user    0m3.221s
sys     0m0.182s

$ time OMP_NUM_THREADS=4 taskset -c 0-31 ./grep-count-pmap "[aA].*[eE].*[iI].*[oO].*[uU]" big.txt
big.txt: parallel (4) Total Lines: 21000000, Matching Lines: 439152

real    0m0.899s
user    0m3.404s
sys     0m0.172s

$ time OMP_NUM_THREADS=8 taskset -c 0-31 ./grep-count-pmap "[aA].*[eE].*[iI].*[oO].*[uU]" big.txt
big.txt: parallel (8) Total Lines: 21000000, Matching Lines: 439152

real    0m0.457s
user    0m3.450s
sys     0m0.141s

$ time OMP_NUM_THREADS=16 taskset -c 0-31 ./grep-count-pmap "[aA].*[eE].*[iI].*[oO].*[uU]" big.txt
big.txt: parallel (16) Total Lines: 21000000, Matching Lines: 439152

real    0m0.246s
user    0m3.613s
sys     0m0.189s

$ time OMP_NUM_THREADS=32 taskset -c 0-31 ./grep-count-pmap "[aA].*[eE].*[iI].*[oO].*[uU]" big.txt
big.txt: parallel (32) Total Lines: 21000000, Matching Lines: 439152

real    0m0.157s
user    0m4.246s
sys     0m0.296s

grep

$ time grep -c "[aA].*[eE].*[iI].*[oO].*[uU]" big.txt 
439152

real    0m1.742s
user    0m1.671s
sys     0m0.070s

@marioroy
Copy link
Author

See also, grep-count-chunk.cc at the Perl monastery. It chunks input similar to Perl MCE.

https://www.perlmonks.org/?node_id=11152325

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment