Last active
July 17, 2023 02:55
-
-
Save marioroy/b06e741bd2685fae3c1fea94808bd1e3 to your computer and use it in GitHub Desktop.
Grep Count C++ OpenMP demonstrations for website
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Grep PCRE2 C++ OpenMP demonstration for website: | |
// https://cpufun.substack.com/p/processing-a-file-with-openmp | |
// | |
// By Mario Roy - May 19, 2023 | |
// License: GNU General Public License. | |
// See http://www.gnu.org/licenses/ for license information. | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// Based on rtoa-pgatram-allinone2b.cpp: | |
// https://perlmonks.org/?node_id=11152225 | |
// | |
// Obtain the fast_io library (required dependency): | |
// git clone --depth=1 https://github.com/cppfastio/fast_io | |
// | |
// Compile with g++ or clang++: (Select the library pcre2-8, pcre2-16, or pcre2-32) | |
// clang++ -o grep-count-pcre2 -std=c++20 -fopenmp -Wall -O3 -I fast_io/include grep-count-pcre2.cc -lpcre2-8 | |
// | |
// Running: | |
// Usage: grep-count-pcre2 regular-expression file1 [ file2... ] | |
// time ./grep-count-pcre2 "[aA].*[eE].*[iI].*[oO].*[uU]" large.txt | |
#include <cstring> | |
#include <string> | |
#include <thread> | |
// C++ wrapper for PCRE2 Library | |
// https://github.com/jpcre2/jpcre2 | |
// installation: sudo dnf install jpcre2-devel for RPM-based systems | |
#include <jpcre2.hpp> | |
// Select the basic data type (char, wchar_t, char16_t or char32_t) | |
typedef jpcre2::select<char> jp; | |
#ifdef _OPENMP | |
#include <omp.h> | |
#endif | |
#include <fast_io.h> | |
#include <fast_io_legacy.h> | |
// Helper function to get the next up value for integer division | |
static std::size_t divide_up(std::size_t dividend, std::size_t divisor) | |
{ | |
if (dividend % divisor) | |
return dividend / divisor + 1; | |
else | |
return dividend / divisor; | |
} | |
#define MIN_CHUNK_SIZE 2097152 | |
// Grep file, count matching lines | |
static void grep_file( | |
std::string_view fname, // in: file name | |
jp::Regex& re, // in: regular expression | |
int nthds // in: number of threads | |
) | |
{ | |
try { | |
// Load entire file to memory through memory mapping. | |
using file_loader_type = fast_io::native_file_loader; | |
file_loader_type loader(fname, fast_io::open_mode::in | fast_io::open_mode::follow); | |
// Loop contiguous file container. Each thread process approximately 32 chunks. | |
std::size_t chunk_size = divide_up(loader.size(), nthds * 32); | |
if (chunk_size < MIN_CHUNK_SIZE) chunk_size = MIN_CHUNK_SIZE; | |
std::size_t num_chunks = divide_up(loader.size(), chunk_size); | |
char const *end{loader.data() + loader.size() - 1}; | |
std::size_t lines = 0, matched_lines = 0; | |
std::string mode = (nthds > 1) ? "parallel" : "serial"; | |
#pragma omp parallel for schedule(static, 1) reduction(+:lines, matched_lines) | |
for (std::size_t chunk_id = 0; chunk_id < num_chunks; ++chunk_id) { | |
char const *last{loader.data() + loader.size() - 1}; | |
char const *first{loader.data()}; | |
if (chunk_id < num_chunks - 1) { | |
last = first + (chunk_size * (chunk_id + 1) - 1); | |
if (*last != '\n') last = fast_io::find_lf(last, end); | |
} | |
if (chunk_id > 0) { | |
first += (chunk_size * chunk_id - 1); | |
if (*first != '\n') first = fast_io::find_lf(first, end); | |
++first; | |
} | |
#if 1 | |
// Match all and tally the match count - faster than matching per line. | |
// This requires anchoring in the event a line has multiple matches. | |
// E.g. "^.*" prepended to pattern during regex construction. | |
matched_lines += re.match(std::basic_string(first, last - first + 1), "g"); | |
while (first <= last) { | |
first = fast_io::find_lf(first, last); | |
++lines; | |
++first; | |
} | |
#else | |
while (first <= last) { | |
auto start_ptr{first}; first = fast_io::find_lf(first, last); | |
auto end_ptr{first}; | |
// include trailing '\n' | |
if (re.match(std::basic_string(start_ptr, end_ptr - start_ptr + 1))) | |
++matched_lines; | |
++lines; | |
++first; | |
} | |
#endif | |
} | |
fast_io::io::println( | |
fname, ": ", mode, " (", nthds, ") Total Lines: ", lines, | |
", Matching Lines: ", matched_lines ); | |
} | |
catch (fast_io::error e) { | |
fast_io::io::perrln("Error opening '", fname, "' : ", e); | |
}; | |
} | |
int main(int argc, char* argv[]) | |
{ | |
if (argc < 3) { | |
if (argc > 0) | |
fast_io::io::perrln("Usage: grep-count-pcre2 regular-expression file..."); | |
return 1; | |
} | |
#ifdef _OPENMP | |
// Determine the number of threads. | |
int nthds = std::thread::hardware_concurrency(); | |
const char* env_nthds1 = std::getenv("OMP_NUM_THREADS"); | |
const char* env_nthds2 = std::getenv("NUM_THREADS"); | |
if (env_nthds1 && strlen(env_nthds1)) | |
nthds = ::atoi(env_nthds1); | |
else if (env_nthds2 && strlen(env_nthds2)) | |
nthds = ::atoi(env_nthds2); | |
omp_set_dynamic(false); | |
omp_set_num_threads(nthds); | |
#else | |
int nthds = 1; | |
#endif | |
jp::Regex re(std::string{"^.*"} + argv[1], "mS"); | |
// anchor pattern to allow searching once per chunk | |
// 'm' modifier enables multi-line mode for the regex | |
// 'S' modifier is an optimization mode | |
if (!re) { | |
fast_io::io::perrln("Invalid regular expression: ", std::string_view(re.getErrorMessage())); | |
return 1; | |
} | |
int nfiles = argc - 2; | |
char** fname = &argv[2]; | |
for (int i = 0; i < nfiles; ++i) | |
grep_file(fname[i], re, nthds); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Grep PCRE2 and Portable MemoryMap C++ OpenMP demonstration for website: | |
// https://cpufun.substack.com/p/processing-a-file-with-openmp | |
// | |
// By Mario Roy - May 19, 2023 | |
// License: GNU General Public License. | |
// See http://www.gnu.org/licenses/ for license information. | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// Based on rtoa-pgatram-allinone2c.cpp: | |
// https://perlmonks.org/?node_id=11152306 | |
// | |
// Obtain the Portable Memory Mapping C++ Class (required dependency): | |
// git clone --depth=1 https://github.com/stbrumme/portable-memory-mapping | |
// Copy two files to your source directory: *.cpp and *.h | |
// cp portable-memory-mapping/MemoryMapped.* . | |
// | |
// Compile with g++ or clang++: (Select the library pcre2-8, pcre2-16, or pcre2-32) | |
// g++ -o grep-count-pmap -std=c++20 -fopenmp -Wall -O3 MemoryMapped.cpp grep-count-pmap.cc -lpcre2-8 | |
// | |
// Running: | |
// Usage: grep-count-pmap regular-expression file1 [ file2... ] | |
// time ./grep-count-pmap "[aA].*[eE].*[iI].*[oO].*[uU]" large.txt | |
#include <iostream> | |
#include <cstring> | |
#include <string> | |
#include <thread> | |
// C++ wrapper for PCRE2 Library | |
// https://github.com/jpcre2/jpcre2 | |
// installation: sudo dnf install jpcre2-devel for RPM-based systems | |
#include <jpcre2.hpp> | |
// Select the basic data type (char, wchar_t, char16_t or char32_t) | |
typedef jpcre2::select<char> jp; | |
#ifdef _OPENMP | |
#include <omp.h> | |
#endif | |
#include "MemoryMapped.h" | |
// Helper function to find '\n'. | |
inline constexpr char const* find_lf(char const* first, char const* last) | |
{ | |
while (first != last) { | |
if (*first == '\n') break; | |
++first; | |
} | |
return first; | |
} | |
// Helper function to get the next up value for integer division. | |
static long int divide_up(long int dividend, long int divisor) | |
{ | |
if (dividend % divisor) | |
return dividend / divisor + 1; | |
else | |
return dividend / divisor; | |
} | |
#define MIN_CHUNK_SIZE 2097152 | |
// Grep file, count matching lines | |
static void grep_file( | |
std::string_view fname, // in: file name | |
jp::Regex& re, // in: regular expression | |
int nthds // in: number of threads | |
) | |
{ | |
// Map file to memory. | |
MemoryMapped loader(fname.data(), MemoryMapped::WholeFile, MemoryMapped::SequentialScan); | |
if (!loader.isValid()) { | |
std::cerr << "Error opening '" << fname << "' : " << strerror(errno) << '\n'; | |
return; | |
} | |
// Get raw pointer to mapped memory. | |
char* data = (char*) loader.getData(); | |
long int filesize = loader.size(); | |
// Loop contiguous file container. Each thread process approximately 32 chunks. | |
long int chunk_size = divide_up(filesize, nthds * 32); | |
if (chunk_size < MIN_CHUNK_SIZE) chunk_size = MIN_CHUNK_SIZE; | |
long int num_chunks = divide_up(filesize, chunk_size); | |
char const *end{data + filesize - 1}; | |
long int lines = 0, matched_lines = 0; | |
std::string mode = (nthds > 1) ? "parallel" : "serial"; | |
#pragma omp parallel for schedule(static, 1) reduction(+:lines, matched_lines) | |
for (long int chunk_id = 0; chunk_id < num_chunks; ++chunk_id) { | |
char const *last{data + filesize - 1}; | |
char const *first{data}; | |
if (chunk_id < num_chunks - 1) { | |
last = first + (chunk_size * (chunk_id + 1) - 1); | |
if (*last != '\n') last = find_lf(last, end); | |
} | |
if (chunk_id > 0) { | |
first += (chunk_size * chunk_id - 1); | |
if (*first != '\n') first = find_lf(first, end); | |
++first; | |
} | |
#if 1 | |
// Match all and tally the match count - faster than matching per line. | |
// This requires anchoring in the event a line has multiple matches. | |
// E.g. "^.*" prepended to pattern during regex construction. | |
matched_lines += re.match(std::basic_string(first, last - first + 1), "g"); | |
// Count the number of lines in this chunk. | |
for (long int i = first - data; i < last - data + 1; i++) { | |
lines += (data[i] == '\n'); | |
} | |
#else | |
// Process line-by-line. | |
while (first <= last) { | |
auto start_ptr{first}; first = find_lf(first, last); | |
auto end_ptr{first}; | |
// include trailing '\n' | |
if (re.match(std::basic_string(start_ptr, end_ptr - start_ptr + 1))) | |
++matched_lines; | |
++lines; | |
++first; | |
} | |
#endif | |
} | |
loader.close(); | |
std::cout \ | |
<< fname << ": " << mode << " (" << nthds << ") Total Lines: " << lines \ | |
<< ", Matching Lines: " << matched_lines << '\n'; | |
} | |
int main(int argc, char* argv[]) | |
{ | |
if (argc < 3) { | |
if (argc > 0) | |
std::cerr << "Usage: grep-count-pmap regular-expression file..." << '\n'; | |
return 1; | |
} | |
#ifdef _OPENMP | |
// Determine the number of threads. | |
int nthds = std::thread::hardware_concurrency(); | |
const char* env_nthds1 = std::getenv("OMP_NUM_THREADS"); | |
const char* env_nthds2 = std::getenv("NUM_THREADS"); | |
if (env_nthds1 && strlen(env_nthds1)) | |
nthds = ::atoi(env_nthds1); | |
else if (env_nthds2 && strlen(env_nthds2)) | |
nthds = ::atoi(env_nthds2); | |
omp_set_dynamic(false); | |
omp_set_num_threads(nthds); | |
#else | |
int nthds = 1; | |
#endif | |
jp::Regex re(std::string{"^.*"} + argv[1], "mS"); | |
// anchor pattern to allow searching once per chunk | |
// 'm' modifier enables multi-line mode for the regex | |
// 'S' modifier is an optimization mode | |
if (!re) { | |
std::cerr << "Invalid regular expression: " << re.getErrorMessage() << '\n'; | |
return 1; | |
} | |
int nfiles = argc - 2; | |
char** fname = &argv[2]; | |
for (int i = 0; i < nfiles; ++i) | |
grep_file(fname[i], re, nthds); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Grep RE2 C++ OpenMP demonstration for website: | |
// https://cpufun.substack.com/p/processing-a-file-with-openmp | |
// | |
// By Mario Roy - May 19, 2023 | |
// License: GNU General Public License. | |
// See http://www.gnu.org/licenses/ for license information. | |
// SPDX-License-Identifier: GPL-3.0-or-later | |
// Based on rtoa-pgatram-allinone2b.cpp: | |
// https://perlmonks.org/?node_id=11152225 | |
// | |
// Obtain the fast_io library (required dependency): | |
// git clone --depth=1 https://github.com/cppfastio/fast_io | |
// | |
// Compile with g++ or clang++: | |
// clang++ -o grep-count-re2 -std=c++20 -fopenmp -Wall -O3 -I fast_io/include grep-count-re2.cc -lre2 | |
// | |
// Running: | |
// Usage: grep-count-re2 regular-expression file1 [ file2... ] | |
// time ./grep-count-re2 "[aA].*[eE].*[iI].*[oO].*[uU]" large.txt | |
#include <cstring> | |
#include <string> | |
#include <thread> | |
#include <re2/re2.h> | |
#ifdef _OPENMP | |
#include <omp.h> | |
#endif | |
#include <fast_io.h> | |
#include <fast_io_legacy.h> | |
// Helper function to get the next up value for integer division | |
static std::size_t divide_up(std::size_t dividend, std::size_t divisor) | |
{ | |
if (dividend % divisor) | |
return dividend / divisor + 1; | |
else | |
return dividend / divisor; | |
} | |
#define MIN_CHUNK_SIZE 2097152 | |
// Grep file, count matching lines | |
static void grep_file( | |
std::string_view fname, // in: file name | |
RE2 const& pattern, // in: regular expression | |
int nthds // in: number of threads | |
) | |
{ | |
try { | |
// Load entire file to memory through memory mapping. | |
using file_loader_type = fast_io::native_file_loader; | |
file_loader_type loader(fname, fast_io::open_mode::in | fast_io::open_mode::follow); | |
// Loop contiguous file container. Each thread process max 1 chunk. | |
std::size_t chunk_size = divide_up(loader.size(), nthds * 1); | |
if (chunk_size < MIN_CHUNK_SIZE) chunk_size = MIN_CHUNK_SIZE; | |
std::size_t num_chunks = divide_up(loader.size(), chunk_size); | |
char const *end{loader.data() + loader.size() - 1}; | |
std::size_t lines = 0, matched_lines = 0; | |
std::string mode = (nthds > 1) ? "parallel" : "serial"; | |
#pragma omp parallel for schedule(static, 1) reduction(+:lines, matched_lines) | |
for (std::size_t chunk_id = 0; chunk_id < num_chunks; ++chunk_id) { | |
char const *last{loader.data() + loader.size() - 1}; | |
char const *first{loader.data()}; | |
if (chunk_id < num_chunks - 1) { | |
last = first + (chunk_size * (chunk_id + 1) - 1); | |
if (*last != '\n') last = fast_io::find_lf(last, end); | |
} | |
if (chunk_id > 0) { | |
first += (chunk_size * chunk_id - 1); | |
if (*first != '\n') first = fast_io::find_lf(first, end); | |
++first; | |
} | |
while (first <= last) { | |
auto start_ptr{first}; first = fast_io::find_lf(first, last); | |
auto end_ptr{first}; | |
// include trailing '\n' | |
if (RE2::PartialMatch(std::string_view(start_ptr, end_ptr - start_ptr + 1), pattern)) | |
++matched_lines; | |
++lines; | |
++first; | |
} | |
} | |
fast_io::io::println( | |
fname, ": ", mode, " (", nthds, ") Total Lines: ", lines, | |
", Matching Lines: ", matched_lines ); | |
} | |
catch (fast_io::error e) { | |
fast_io::io::perrln("Error opening '", fname, "' : ", e); | |
}; | |
} | |
int main(int argc, char* argv[]) | |
{ | |
if (argc < 3) { | |
if (argc > 0) | |
fast_io::io::perrln("Usage: grep-count-re2 regular-expression file..."); | |
return 1; | |
} | |
#ifdef _OPENMP | |
// Determine the number of threads. | |
int nthds = std::thread::hardware_concurrency(); | |
const char* env_nthds1 = std::getenv("OMP_NUM_THREADS"); | |
const char* env_nthds2 = std::getenv("NUM_THREADS"); | |
if (env_nthds1 && strlen(env_nthds1)) | |
nthds = ::atoi(env_nthds1); | |
else if (env_nthds2 && strlen(env_nthds2)) | |
nthds = ::atoi(env_nthds2); | |
omp_set_dynamic(false); | |
omp_set_num_threads(nthds); | |
#else | |
int nthds = 1; | |
#endif | |
RE2::Options opt; | |
opt.set_log_errors(false); | |
opt.set_case_sensitive(false); | |
// opt.set_encoding(RE2::Options::EncodingUTF8); // default encoding | |
opt.set_encoding(RE2::Options::EncodingLatin1); | |
RE2 pattern (argv[1], opt); | |
if (!pattern.ok()) { | |
fast_io::io::perrln("Invalid regular expression: ", std::string_view(pattern.error())); | |
return 1; | |
} | |
int nfiles = argc - 2; | |
char** fname = &argv[2]; | |
for (int i = 0; i < nfiles; ++i) | |
grep_file(fname[i], pattern, nthds); | |
return 0; | |
} |
Results for 1 GB
for i in $(seq 1 42); do
cat large.txt >> big.txt
done
grep-count-pmap:
$ time OMP_NUM_THREADS=1 taskset -c 0-31 ./grep-count-pmap "[aA].*[eE].*[iI].*[oO].*[uU]" big.txt
big.txt: serial (1) Total Lines: 21000000, Matching Lines: 439152
real 0m3.403s
user 0m3.221s
sys 0m0.182s
$ time OMP_NUM_THREADS=4 taskset -c 0-31 ./grep-count-pmap "[aA].*[eE].*[iI].*[oO].*[uU]" big.txt
big.txt: parallel (4) Total Lines: 21000000, Matching Lines: 439152
real 0m0.899s
user 0m3.404s
sys 0m0.172s
$ time OMP_NUM_THREADS=8 taskset -c 0-31 ./grep-count-pmap "[aA].*[eE].*[iI].*[oO].*[uU]" big.txt
big.txt: parallel (8) Total Lines: 21000000, Matching Lines: 439152
real 0m0.457s
user 0m3.450s
sys 0m0.141s
$ time OMP_NUM_THREADS=16 taskset -c 0-31 ./grep-count-pmap "[aA].*[eE].*[iI].*[oO].*[uU]" big.txt
big.txt: parallel (16) Total Lines: 21000000, Matching Lines: 439152
real 0m0.246s
user 0m3.613s
sys 0m0.189s
$ time OMP_NUM_THREADS=32 taskset -c 0-31 ./grep-count-pmap "[aA].*[eE].*[iI].*[oO].*[uU]" big.txt
big.txt: parallel (32) Total Lines: 21000000, Matching Lines: 439152
real 0m0.157s
user 0m4.246s
sys 0m0.296s
grep
$ time grep -c "[aA].*[eE].*[iI].*[oO].*[uU]" big.txt
439152
real 0m1.742s
user 0m1.671s
sys 0m0.070s
See also, grep-count-chunk.cc at the Perl monastery. It chunks input similar to Perl MCE.
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
omp_scan - https://github.com/parallel-runtimes/CPU-fun:
grep-count-re2:
grep-count-pcre2:
grep-count-pmap:
grep: