marioroy/grep-count-pcre2.cc

## grep-count-pcre2.cc
// Grep PCRE2 C++ OpenMP demonstration for website:
//   https://cpufun.substack.com/p/processing-a-file-with-openmp
//
// By Mario Roy - May 19, 2023
//   License: GNU General Public License.
//   See http://www.gnu.org/licenses/ for license information.
//   SPDX-License-Identifier: GPL-3.0-or-later
// Based on rtoa-pgatram-allinone2b.cpp:
//   https://perlmonks.org/?node_id=11152225
//
// Obtain the fast_io library (required dependency):
//   git clone --depth=1 https://github.com/cppfastio/fast_io
//
// Compile with g++ or clang++: (Select the library pcre2-8, pcre2-16, or pcre2-32)
//   clang++ -o grep-count-pcre2 -std=c++20 -fopenmp -Wall -O3 -I fast_io/include grep-count-pcre2.cc -lpcre2-8
//
// Running:
//   Usage: grep-count-pcre2 regular-expression file1 [ file2... ]
//   time ./grep-count-pcre2 "[aA].*[eE].*[iI].*[oO].*[uU]" large.txt

#include <cstring>
#include <string>
#include <thread>

// C++ wrapper for PCRE2 Library
// https://github.com/jpcre2/jpcre2
// installation: sudo dnf install jpcre2-devel for RPM-based systems
#include <jpcre2.hpp>

// Select the basic data type (char, wchar_t, char16_t or char32_t)
typedef jpcre2::select<char> jp;

#ifdef _OPENMP
#include <omp.h>
#endif

#include <fast_io.h>
#include <fast_io_legacy.h>

// Helper function to get the next up value for integer division
static std::size_t divide_up(std::size_t dividend, std::size_t divisor)
{
   if (dividend % divisor)
      return dividend / divisor + 1;
   else
      return dividend / divisor;
}

#define MIN_CHUNK_SIZE 2097152

// Grep file, count matching lines
static void grep_file(
   std::string_view fname,      // in: file name
   jp::Regex& re,               // in: regular expression
   int nthds                    // in: number of threads
)
{
   try {
      // Load entire file to memory through memory mapping.
      using file_loader_type = fast_io::native_file_loader;
      file_loader_type loader(fname, fast_io::open_mode::in | fast_io::open_mode::follow);

      // Loop contiguous file container. Each thread process approximately 32 chunks.
      std::size_t chunk_size = divide_up(loader.size(), nthds * 32);
      if (chunk_size < MIN_CHUNK_SIZE) chunk_size = MIN_CHUNK_SIZE;
      std::size_t num_chunks = divide_up(loader.size(), chunk_size);

      char const *end{loader.data() + loader.size() - 1};
      std::size_t lines = 0, matched_lines = 0;
      std::string mode = (nthds > 1) ? "parallel" : "serial";

      #pragma omp parallel for schedule(static, 1) reduction(+:lines, matched_lines)
      for (std::size_t chunk_id = 0; chunk_id < num_chunks; ++chunk_id) {
         char const *last{loader.data() + loader.size() - 1};
         char const *first{loader.data()};

         if (chunk_id < num_chunks - 1) {
            last = first + (chunk_size * (chunk_id + 1) - 1);
            if (*last != '\n') last = fast_io::find_lf(last, end);
         }
         if (chunk_id > 0) {
            first += (chunk_size * chunk_id - 1);
            if (*first != '\n') first = fast_io::find_lf(first, end);

            ++first;
         }
#if 1
         // Match all and tally the match count - faster than matching per line.
         // This requires anchoring in the event a line has multiple matches.
         // E.g. "^.*" prepended to pattern during regex construction.
         matched_lines += re.match(std::basic_string(first, last - first + 1), "g");

         while (first <= last) {
            first = fast_io::find_lf(first, last);

            ++lines;
            ++first;
         }
#else
         while (first <= last) {
            auto start_ptr{first}; first = fast_io::find_lf(first, last);
            auto end_ptr{first};

            // include trailing '\n'
            if (re.match(std::basic_string(start_ptr, end_ptr - start_ptr + 1)))
               ++matched_lines;

            ++lines;
            ++first;
         }
#endif
      }

      fast_io::io::println(
         fname, ": ", mode, " (", nthds, ") Total Lines: ", lines,
         ", Matching Lines: ", matched_lines );
   }
   catch (fast_io::error e) {
      fast_io::io::perrln("Error opening '", fname, "' : ", e);
   };
}

int main(int argc, char* argv[])
{
   if (argc < 3) {
      if (argc > 0)
         fast_io::io::perrln("Usage: grep-count-pcre2 regular-expression file...");
      return 1;
   }

#ifdef _OPENMP
   // Determine the number of threads.
   int nthds = std::thread::hardware_concurrency();
   const char* env_nthds1 = std::getenv("OMP_NUM_THREADS");
   const char* env_nthds2 = std::getenv("NUM_THREADS");

   if (env_nthds1 && strlen(env_nthds1))
      nthds = ::atoi(env_nthds1);
   else if (env_nthds2 && strlen(env_nthds2))
      nthds = ::atoi(env_nthds2);

   omp_set_dynamic(false);
   omp_set_num_threads(nthds);
#else
   int nthds = 1;
#endif

   jp::Regex re(std::string{"^.*"} + argv[1], "mS");
   // anchor pattern to allow searching once per chunk
   // 'm' modifier enables multi-line mode for the regex
   // 'S' modifier is an optimization mode
   if (!re) {
      fast_io::io::perrln("Invalid regular expression: ", std::string_view(re.getErrorMessage()));
      return 1;
   }

   int    nfiles = argc - 2;
   char** fname  = &argv[2];

   for (int i = 0; i < nfiles; ++i)
      grep_file(fname[i], re, nthds);

   return 0;
}

## grep-count-pmap.cc
// Grep PCRE2 and Portable MemoryMap C++ OpenMP demonstration for website:
//   https://cpufun.substack.com/p/processing-a-file-with-openmp
//
// By Mario Roy - May 19, 2023
//   License: GNU General Public License.
//   See http://www.gnu.org/licenses/ for license information.
//   SPDX-License-Identifier: GPL-3.0-or-later
// Based on rtoa-pgatram-allinone2c.cpp:
//   https://perlmonks.org/?node_id=11152306
//
// Obtain the Portable Memory Mapping C++ Class (required dependency):
//   git clone --depth=1 https://github.com/stbrumme/portable-memory-mapping
// Copy two files to your source directory: *.cpp and *.h
//   cp portable-memory-mapping/MemoryMapped.* .
//
// Compile with g++ or clang++: (Select the library pcre2-8, pcre2-16, or pcre2-32)
//   g++ -o grep-count-pmap -std=c++20 -fopenmp -Wall -O3 MemoryMapped.cpp grep-count-pmap.cc -lpcre2-8
//
// Running:
//   Usage: grep-count-pmap regular-expression file1 [ file2... ]
//   time ./grep-count-pmap "[aA].*[eE].*[iI].*[oO].*[uU]" large.txt

#include <iostream>
#include <cstring>
#include <string>
#include <thread>

// C++ wrapper for PCRE2 Library
// https://github.com/jpcre2/jpcre2
// installation: sudo dnf install jpcre2-devel for RPM-based systems
#include <jpcre2.hpp>

// Select the basic data type (char, wchar_t, char16_t or char32_t)
typedef jpcre2::select<char> jp;

#ifdef _OPENMP
#include <omp.h>
#endif

#include "MemoryMapped.h"

// Helper function to find '\n'.
inline constexpr char const* find_lf(char const* first, char const* last)
{
   while (first != last) {
      if (*first == '\n') break;
      ++first;
   }
   return first;
}

// Helper function to get the next up value for integer division.
static long int divide_up(long int dividend, long int divisor)
{
   if (dividend % divisor)
      return dividend / divisor + 1;
   else
      return dividend / divisor;
}

#define MIN_CHUNK_SIZE 2097152

// Grep file, count matching lines
static void grep_file(
   std::string_view fname,      // in: file name
   jp::Regex& re,               // in: regular expression
   int nthds                    // in: number of threads
)
{
   // Map file to memory.
   MemoryMapped loader(fname.data(), MemoryMapped::WholeFile, MemoryMapped::SequentialScan);
   if (!loader.isValid()) {
      std::cerr << "Error opening '" << fname << "' : " << strerror(errno) << '\n';
      return;
   }

   // Get raw pointer to mapped memory.
   char* data = (char*) loader.getData();
   long int filesize = loader.size();

   // Loop contiguous file container. Each thread process approximately 32 chunks.
   long int chunk_size = divide_up(filesize, nthds * 32);
   if (chunk_size < MIN_CHUNK_SIZE) chunk_size = MIN_CHUNK_SIZE;
   long int num_chunks = divide_up(filesize, chunk_size);

   char const *end{data + filesize - 1};
   long int lines = 0, matched_lines = 0;
   std::string mode = (nthds > 1) ? "parallel" : "serial";

   #pragma omp parallel for schedule(static, 1) reduction(+:lines, matched_lines)
   for (long int chunk_id = 0; chunk_id < num_chunks; ++chunk_id) {
      char const *last{data + filesize - 1};
      char const *first{data};

      if (chunk_id < num_chunks - 1) {
         last = first + (chunk_size * (chunk_id + 1) - 1);
         if (*last != '\n') last = find_lf(last, end);
      }
      if (chunk_id > 0) {
         first += (chunk_size * chunk_id - 1);
         if (*first != '\n') first = find_lf(first, end);

         ++first;
      }

#if 1
      // Match all and tally the match count - faster than matching per line.
      // This requires anchoring in the event a line has multiple matches.
      // E.g. "^.*" prepended to pattern during regex construction.
      matched_lines += re.match(std::basic_string(first, last - first + 1), "g");

      // Count the number of lines in this chunk.
      for (long int i = first - data; i < last - data + 1; i++) {
         lines += (data[i] == '\n');
      }
#else
      // Process line-by-line.
      while (first <= last) {
         auto start_ptr{first}; first = find_lf(first, last);
         auto end_ptr{first};

         // include trailing '\n'
         if (re.match(std::basic_string(start_ptr, end_ptr - start_ptr + 1)))
            ++matched_lines;

         ++lines;
         ++first;
      }
#endif
   }

   loader.close();

   std::cout \
      << fname << ": " << mode << " (" << nthds << ") Total Lines: " << lines \
      << ", Matching Lines: " << matched_lines << '\n';
}

int main(int argc, char* argv[])
{
   if (argc < 3) {
      if (argc > 0)
         std::cerr << "Usage: grep-count-pmap regular-expression file..." << '\n';
      return 1;
   }

#ifdef _OPENMP
   // Determine the number of threads.
   int nthds = std::thread::hardware_concurrency();
   const char* env_nthds1 = std::getenv("OMP_NUM_THREADS");
   const char* env_nthds2 = std::getenv("NUM_THREADS");

   if (env_nthds1 && strlen(env_nthds1))
      nthds = ::atoi(env_nthds1);
   else if (env_nthds2 && strlen(env_nthds2))
      nthds = ::atoi(env_nthds2);

   omp_set_dynamic(false);
   omp_set_num_threads(nthds);
#else
   int nthds = 1;
#endif

   jp::Regex re(std::string{"^.*"} + argv[1], "mS");
   // anchor pattern to allow searching once per chunk
   // 'm' modifier enables multi-line mode for the regex
   // 'S' modifier is an optimization mode
   if (!re) {
      std::cerr << "Invalid regular expression: " << re.getErrorMessage() << '\n';
      return 1;
   }

   int    nfiles = argc - 2;
   char** fname  = &argv[2];

   for (int i = 0; i < nfiles; ++i)
      grep_file(fname[i], re, nthds);

   return 0;
}

## grep-count-re2.cc
// Grep RE2 C++ OpenMP demonstration for website:
//   https://cpufun.substack.com/p/processing-a-file-with-openmp
//
// By Mario Roy - May 19, 2023
//   License: GNU General Public License.
//   See http://www.gnu.org/licenses/ for license information.
//   SPDX-License-Identifier: GPL-3.0-or-later
// Based on rtoa-pgatram-allinone2b.cpp:
//   https://perlmonks.org/?node_id=11152225
//
// Obtain the fast_io library (required dependency):
//   git clone --depth=1 https://github.com/cppfastio/fast_io
//
// Compile with g++ or clang++:
//   clang++ -o grep-count-re2 -std=c++20 -fopenmp -Wall -O3 -I fast_io/include grep-count-re2.cc -lre2
//
// Running:
//   Usage: grep-count-re2 regular-expression file1 [ file2... ]
//   time ./grep-count-re2 "[aA].*[eE].*[iI].*[oO].*[uU]" large.txt

#include <cstring>
#include <string>
#include <thread>
#include <re2/re2.h>

#ifdef _OPENMP
#include <omp.h>
#endif

#include <fast_io.h>
#include <fast_io_legacy.h>

// Helper function to get the next up value for integer division
static std::size_t divide_up(std::size_t dividend, std::size_t divisor)
{
   if (dividend % divisor)
      return dividend / divisor + 1;
   else
      return dividend / divisor;
}

#define MIN_CHUNK_SIZE 2097152

// Grep file, count matching lines
static void grep_file(
   std::string_view fname,      // in: file name
   RE2 const& pattern,          // in: regular expression
   int nthds                    // in: number of threads
)
{
   try {
      // Load entire file to memory through memory mapping.
      using file_loader_type = fast_io::native_file_loader;
      file_loader_type loader(fname, fast_io::open_mode::in | fast_io::open_mode::follow);

      // Loop contiguous file container. Each thread process max 1 chunk.
      std::size_t chunk_size = divide_up(loader.size(), nthds * 1);
      if (chunk_size < MIN_CHUNK_SIZE) chunk_size = MIN_CHUNK_SIZE;
      std::size_t num_chunks = divide_up(loader.size(), chunk_size);

      char const *end{loader.data() + loader.size() - 1};
      std::size_t lines = 0, matched_lines = 0;
      std::string mode = (nthds > 1) ? "parallel" : "serial";

      #pragma omp parallel for schedule(static, 1) reduction(+:lines, matched_lines)
      for (std::size_t chunk_id = 0; chunk_id < num_chunks; ++chunk_id) {
         char const *last{loader.data() + loader.size() - 1};
         char const *first{loader.data()};

         if (chunk_id < num_chunks - 1) {
            last = first + (chunk_size * (chunk_id + 1) - 1);
            if (*last != '\n') last = fast_io::find_lf(last, end);
         }
         if (chunk_id > 0) {
            first += (chunk_size * chunk_id - 1);
            if (*first != '\n') first = fast_io::find_lf(first, end);

            ++first;
         }

         while (first <= last) {
            auto start_ptr{first}; first = fast_io::find_lf(first, last);
            auto end_ptr{first};

            // include trailing '\n'
            if (RE2::PartialMatch(std::string_view(start_ptr, end_ptr - start_ptr + 1), pattern))
               ++matched_lines;

            ++lines;
            ++first;
         }
      }

      fast_io::io::println(
         fname, ": ", mode, " (", nthds, ") Total Lines: ", lines,
         ", Matching Lines: ", matched_lines );
   }
   catch (fast_io::error e) {
      fast_io::io::perrln("Error opening '", fname, "' : ", e);
   };
}

int main(int argc, char* argv[])
{
   if (argc < 3) {
      if (argc > 0)
         fast_io::io::perrln("Usage: grep-count-re2 regular-expression file...");
      return 1;
   }

#ifdef _OPENMP
   // Determine the number of threads.
   int nthds = std::thread::hardware_concurrency();
   const char* env_nthds1 = std::getenv("OMP_NUM_THREADS");
   const char* env_nthds2 = std::getenv("NUM_THREADS");

   if (env_nthds1 && strlen(env_nthds1))
      nthds = ::atoi(env_nthds1);
   else if (env_nthds2 && strlen(env_nthds2))
      nthds = ::atoi(env_nthds2);

   omp_set_dynamic(false);
   omp_set_num_threads(nthds);
#else
   int nthds = 1;
#endif

   RE2::Options opt;
   opt.set_log_errors(false);
   opt.set_case_sensitive(false);
// opt.set_encoding(RE2::Options::EncodingUTF8); // default encoding
   opt.set_encoding(RE2::Options::EncodingLatin1);

   RE2 pattern (argv[1], opt);
   if (!pattern.ok()) {
      fast_io::io::perrln("Invalid regular expression: ", std::string_view(pattern.error()));
      return 1;
   }

   int    nfiles = argc - 2;
   char** fname  = &argv[2];

   for (int i = 0; i < nfiles; ++i)
      grep_file(fname[i], pattern, nthds);

   return 0;
}
	// Grep PCRE2 C++ OpenMP demonstration for website:
	// https://cpufun.substack.com/p/processing-a-file-with-openmp
	//
	// By Mario Roy - May 19, 2023
	// License: GNU General Public License.
	// See http://www.gnu.org/licenses/ for license information.
	// SPDX-License-Identifier: GPL-3.0-or-later
	// Based on rtoa-pgatram-allinone2b.cpp:
	// https://perlmonks.org/?node_id=11152225
	//
	// Obtain the fast_io library (required dependency):
	// git clone --depth=1 https://github.com/cppfastio/fast_io
	//
	// Compile with g++ or clang++: (Select the library pcre2-8, pcre2-16, or pcre2-32)
	// clang++ -o grep-count-pcre2 -std=c++20 -fopenmp -Wall -O3 -I fast_io/include grep-count-pcre2.cc -lpcre2-8
	//
	// Running:
	// Usage: grep-count-pcre2 regular-expression file1 [ file2... ]
	// time ./grep-count-pcre2 "[aA].[eE].[iI].[oO].[uU]" large.txt

	#include <cstring>
	#include <string>
	#include <thread>

	// C++ wrapper for PCRE2 Library
	// https://github.com/jpcre2/jpcre2
	// installation: sudo dnf install jpcre2-devel for RPM-based systems
	#include <jpcre2.hpp>

	// Select the basic data type (char, wchar_t, char16_t or char32_t)
	typedef jpcre2::select<char> jp;

	#ifdef _OPENMP
	#include <omp.h>
	#endif

	#include <fast_io.h>
	#include <fast_io_legacy.h>

	// Helper function to get the next up value for integer division
	static std::size_t divide_up(std::size_t dividend, std::size_t divisor)
	{
	if (dividend % divisor)
	return dividend / divisor + 1;
	else
	return dividend / divisor;
	}

	#define MIN_CHUNK_SIZE 2097152

	// Grep file, count matching lines
	static void grep_file(
	std::string_view fname, // in: file name
	jp::Regex& re, // in: regular expression
	int nthds // in: number of threads
	)
	{
	try {
	// Load entire file to memory through memory mapping.
	using file_loader_type = fast_io::native_file_loader;
	file_loader_type loader(fname, fast_io::open_mode::in \| fast_io::open_mode::follow);

	// Loop contiguous file container. Each thread process approximately 32 chunks.
	std::size_t chunk_size = divide_up(loader.size(), nthds * 32);
	if (chunk_size < MIN_CHUNK_SIZE) chunk_size = MIN_CHUNK_SIZE;
	std::size_t num_chunks = divide_up(loader.size(), chunk_size);

	char const *end{loader.data() + loader.size() - 1};
	std::size_t lines = 0, matched_lines = 0;
	std::string mode = (nthds > 1) ? "parallel" : "serial";

	#pragma omp parallel for schedule(static, 1) reduction(+:lines, matched_lines)
	for (std::size_t chunk_id = 0; chunk_id < num_chunks; ++chunk_id) {
	char const *last{loader.data() + loader.size() - 1};
	char const *first{loader.data()};

	if (chunk_id < num_chunks - 1) {
	last = first + (chunk_size * (chunk_id + 1) - 1);
	if (*last != '\n') last = fast_io::find_lf(last, end);
	}
	if (chunk_id > 0) {
	first += (chunk_size * chunk_id - 1);
	if (*first != '\n') first = fast_io::find_lf(first, end);

	++first;
	}
	#if 1
	// Match all and tally the match count - faster than matching per line.
	// This requires anchoring in the event a line has multiple matches.
	// E.g. "^.*" prepended to pattern during regex construction.
	matched_lines += re.match(std::basic_string(first, last - first + 1), "g");

	while (first <= last) {
	first = fast_io::find_lf(first, last);

	++lines;
	++first;
	}
	#else
	while (first <= last) {
	auto start_ptr{first}; first = fast_io::find_lf(first, last);
	auto end_ptr{first};

	// include trailing '\n'
	if (re.match(std::basic_string(start_ptr, end_ptr - start_ptr + 1)))
	++matched_lines;

	++lines;
	++first;
	}
	#endif
	}

	fast_io::io::println(
	fname, ": ", mode, " (", nthds, ") Total Lines: ", lines,
	", Matching Lines: ", matched_lines );
	}
	catch (fast_io::error e) {
	fast_io::io::perrln("Error opening '", fname, "' : ", e);
	};
	}

	int main(int argc, char* argv[])
	{
	if (argc < 3) {
	if (argc > 0)
	fast_io::io::perrln("Usage: grep-count-pcre2 regular-expression file...");
	return 1;
	}

	#ifdef _OPENMP
	// Determine the number of threads.
	int nthds = std::thread::hardware_concurrency();
	const char* env_nthds1 = std::getenv("OMP_NUM_THREADS");
	const char* env_nthds2 = std::getenv("NUM_THREADS");

	if (env_nthds1 && strlen(env_nthds1))
	nthds = ::atoi(env_nthds1);
	else if (env_nthds2 && strlen(env_nthds2))
	nthds = ::atoi(env_nthds2);

	omp_set_dynamic(false);
	omp_set_num_threads(nthds);
	#else
	int nthds = 1;
	#endif

	jp::Regex re(std::string{"^.*"} + argv[1], "mS");
	// anchor pattern to allow searching once per chunk
	// 'm' modifier enables multi-line mode for the regex
	// 'S' modifier is an optimization mode
	if (!re) {
	fast_io::io::perrln("Invalid regular expression: ", std::string_view(re.getErrorMessage()));
	return 1;
	}

	int nfiles = argc - 2;
	char** fname = &argv[2];

	for (int i = 0; i < nfiles; ++i)
	grep_file(fname[i], re, nthds);

	return 0;
	}
	// Grep PCRE2 and Portable MemoryMap C++ OpenMP demonstration for website:
	// https://cpufun.substack.com/p/processing-a-file-with-openmp
	//
	// By Mario Roy - May 19, 2023
	// License: GNU General Public License.
	// See http://www.gnu.org/licenses/ for license information.
	// SPDX-License-Identifier: GPL-3.0-or-later
	// Based on rtoa-pgatram-allinone2c.cpp:
	// https://perlmonks.org/?node_id=11152306
	//
	// Obtain the Portable Memory Mapping C++ Class (required dependency):
	// git clone --depth=1 https://github.com/stbrumme/portable-memory-mapping
	// Copy two files to your source directory: .cpp and .h
	// cp portable-memory-mapping/MemoryMapped.* .
	//
	// Compile with g++ or clang++: (Select the library pcre2-8, pcre2-16, or pcre2-32)
	// g++ -o grep-count-pmap -std=c++20 -fopenmp -Wall -O3 MemoryMapped.cpp grep-count-pmap.cc -lpcre2-8
	//
	// Running:
	// Usage: grep-count-pmap regular-expression file1 [ file2... ]
	// time ./grep-count-pmap "[aA].[eE].[iI].[oO].[uU]" large.txt

	#include <iostream>
	#include <cstring>
	#include <string>
	#include <thread>

	// C++ wrapper for PCRE2 Library
	// https://github.com/jpcre2/jpcre2
	// installation: sudo dnf install jpcre2-devel for RPM-based systems
	#include <jpcre2.hpp>

	// Select the basic data type (char, wchar_t, char16_t or char32_t)
	typedef jpcre2::select<char> jp;

	#ifdef _OPENMP
	#include <omp.h>
	#endif

	#include "MemoryMapped.h"

	// Helper function to find '\n'.
	inline constexpr char const* find_lf(char const* first, char const* last)
	{
	while (first != last) {
	if (*first == '\n') break;
	++first;
	}
	return first;
	}

	// Helper function to get the next up value for integer division.
	static long int divide_up(long int dividend, long int divisor)
	{
	if (dividend % divisor)
	return dividend / divisor + 1;
	else
	return dividend / divisor;
	}

	#define MIN_CHUNK_SIZE 2097152

	// Grep file, count matching lines
	static void grep_file(
	std::string_view fname, // in: file name
	jp::Regex& re, // in: regular expression
	int nthds // in: number of threads
	)
	{
	// Map file to memory.
	MemoryMapped loader(fname.data(), MemoryMapped::WholeFile, MemoryMapped::SequentialScan);
	if (!loader.isValid()) {
	std::cerr << "Error opening '" << fname << "' : " << strerror(errno) << '\n';
	return;
	}

	// Get raw pointer to mapped memory.
	char* data = (char*) loader.getData();
	long int filesize = loader.size();

	// Loop contiguous file container. Each thread process approximately 32 chunks.
	long int chunk_size = divide_up(filesize, nthds * 32);
	if (chunk_size < MIN_CHUNK_SIZE) chunk_size = MIN_CHUNK_SIZE;
	long int num_chunks = divide_up(filesize, chunk_size);

	char const *end{data + filesize - 1};
	long int lines = 0, matched_lines = 0;
	std::string mode = (nthds > 1) ? "parallel" : "serial";

	#pragma omp parallel for schedule(static, 1) reduction(+:lines, matched_lines)
	for (long int chunk_id = 0; chunk_id < num_chunks; ++chunk_id) {
	char const *last{data + filesize - 1};
	char const *first{data};

	if (chunk_id < num_chunks - 1) {
	last = first + (chunk_size * (chunk_id + 1) - 1);
	if (*last != '\n') last = find_lf(last, end);
	}
	if (chunk_id > 0) {
	first += (chunk_size * chunk_id - 1);
	if (*first != '\n') first = find_lf(first, end);

	++first;
	}

	#if 1
	// Match all and tally the match count - faster than matching per line.
	// This requires anchoring in the event a line has multiple matches.
	// E.g. "^.*" prepended to pattern during regex construction.
	matched_lines += re.match(std::basic_string(first, last - first + 1), "g");

	// Count the number of lines in this chunk.
	for (long int i = first - data; i < last - data + 1; i++) {
	lines += (data[i] == '\n');
	}
	#else
	// Process line-by-line.
	while (first <= last) {
	auto start_ptr{first}; first = find_lf(first, last);
	auto end_ptr{first};

	// include trailing '\n'
	if (re.match(std::basic_string(start_ptr, end_ptr - start_ptr + 1)))
	++matched_lines;

	++lines;
	++first;
	}
	#endif
	}

	loader.close();

	std::cout \
	<< fname << ": " << mode << " (" << nthds << ") Total Lines: " << lines \
	<< ", Matching Lines: " << matched_lines << '\n';
	}

	int main(int argc, char* argv[])
	{
	if (argc < 3) {
	if (argc > 0)
	std::cerr << "Usage: grep-count-pmap regular-expression file..." << '\n';
	return 1;
	}

	#ifdef _OPENMP
	// Determine the number of threads.
	int nthds = std::thread::hardware_concurrency();
	const char* env_nthds1 = std::getenv("OMP_NUM_THREADS");
	const char* env_nthds2 = std::getenv("NUM_THREADS");

	if (env_nthds1 && strlen(env_nthds1))
	nthds = ::atoi(env_nthds1);
	else if (env_nthds2 && strlen(env_nthds2))
	nthds = ::atoi(env_nthds2);

	omp_set_dynamic(false);
	omp_set_num_threads(nthds);
	#else
	int nthds = 1;
	#endif

	jp::Regex re(std::string{"^.*"} + argv[1], "mS");
	// anchor pattern to allow searching once per chunk
	// 'm' modifier enables multi-line mode for the regex
	// 'S' modifier is an optimization mode
	if (!re) {
	std::cerr << "Invalid regular expression: " << re.getErrorMessage() << '\n';
	return 1;
	}

	int nfiles = argc - 2;
	char** fname = &argv[2];

	for (int i = 0; i < nfiles; ++i)
	grep_file(fname[i], re, nthds);

	return 0;
	}
	// Grep RE2 C++ OpenMP demonstration for website:
	// https://cpufun.substack.com/p/processing-a-file-with-openmp
	//
	// By Mario Roy - May 19, 2023
	// License: GNU General Public License.
	// See http://www.gnu.org/licenses/ for license information.
	// SPDX-License-Identifier: GPL-3.0-or-later
	// Based on rtoa-pgatram-allinone2b.cpp:
	// https://perlmonks.org/?node_id=11152225
	//
	// Obtain the fast_io library (required dependency):
	// git clone --depth=1 https://github.com/cppfastio/fast_io
	//
	// Compile with g++ or clang++:
	// clang++ -o grep-count-re2 -std=c++20 -fopenmp -Wall -O3 -I fast_io/include grep-count-re2.cc -lre2
	//
	// Running:
	// Usage: grep-count-re2 regular-expression file1 [ file2... ]
	// time ./grep-count-re2 "[aA].[eE].[iI].[oO].[uU]" large.txt

	#include <cstring>
	#include <string>
	#include <thread>
	#include <re2/re2.h>

	#ifdef _OPENMP
	#include <omp.h>
	#endif

	#include <fast_io.h>
	#include <fast_io_legacy.h>

	// Helper function to get the next up value for integer division
	static std::size_t divide_up(std::size_t dividend, std::size_t divisor)
	{
	if (dividend % divisor)
	return dividend / divisor + 1;
	else
	return dividend / divisor;
	}

	#define MIN_CHUNK_SIZE 2097152

	// Grep file, count matching lines
	static void grep_file(
	std::string_view fname, // in: file name
	RE2 const& pattern, // in: regular expression
	int nthds // in: number of threads
	)
	{
	try {
	// Load entire file to memory through memory mapping.
	using file_loader_type = fast_io::native_file_loader;
	file_loader_type loader(fname, fast_io::open_mode::in \| fast_io::open_mode::follow);

	// Loop contiguous file container. Each thread process max 1 chunk.
	std::size_t chunk_size = divide_up(loader.size(), nthds * 1);
	if (chunk_size < MIN_CHUNK_SIZE) chunk_size = MIN_CHUNK_SIZE;
	std::size_t num_chunks = divide_up(loader.size(), chunk_size);

	char const *end{loader.data() + loader.size() - 1};
	std::size_t lines = 0, matched_lines = 0;
	std::string mode = (nthds > 1) ? "parallel" : "serial";

	#pragma omp parallel for schedule(static, 1) reduction(+:lines, matched_lines)
	for (std::size_t chunk_id = 0; chunk_id < num_chunks; ++chunk_id) {
	char const *last{loader.data() + loader.size() - 1};
	char const *first{loader.data()};

	if (chunk_id < num_chunks - 1) {
	last = first + (chunk_size * (chunk_id + 1) - 1);
	if (*last != '\n') last = fast_io::find_lf(last, end);
	}
	if (chunk_id > 0) {
	first += (chunk_size * chunk_id - 1);
	if (*first != '\n') first = fast_io::find_lf(first, end);

	++first;
	}

	while (first <= last) {
	auto start_ptr{first}; first = fast_io::find_lf(first, last);
	auto end_ptr{first};

	// include trailing '\n'
	if (RE2::PartialMatch(std::string_view(start_ptr, end_ptr - start_ptr + 1), pattern))
	++matched_lines;

	++lines;
	++first;
	}
	}

	fast_io::io::println(
	fname, ": ", mode, " (", nthds, ") Total Lines: ", lines,
	", Matching Lines: ", matched_lines );
	}
	catch (fast_io::error e) {
	fast_io::io::perrln("Error opening '", fname, "' : ", e);
	};
	}

	int main(int argc, char* argv[])
	{
	if (argc < 3) {
	if (argc > 0)
	fast_io::io::perrln("Usage: grep-count-re2 regular-expression file...");
	return 1;
	}

	#ifdef _OPENMP
	// Determine the number of threads.
	int nthds = std::thread::hardware_concurrency();
	const char* env_nthds1 = std::getenv("OMP_NUM_THREADS");
	const char* env_nthds2 = std::getenv("NUM_THREADS");

	if (env_nthds1 && strlen(env_nthds1))
	nthds = ::atoi(env_nthds1);
	else if (env_nthds2 && strlen(env_nthds2))
	nthds = ::atoi(env_nthds2);

	omp_set_dynamic(false);
	omp_set_num_threads(nthds);
	#else
	int nthds = 1;
	#endif

	RE2::Options opt;
	opt.set_log_errors(false);
	opt.set_case_sensitive(false);
	// opt.set_encoding(RE2::Options::EncodingUTF8); // default encoding
	opt.set_encoding(RE2::Options::EncodingLatin1);

	RE2 pattern (argv[1], opt);
	if (!pattern.ok()) {
	fast_io::io::perrln("Invalid regular expression: ", std::string_view(pattern.error()));
	return 1;
	}

	int nfiles = argc - 2;
	char** fname = &argv[2];

	for (int i = 0; i < nfiles; ++i)
	grep_file(fname[i], pattern, nthds);

	return 0;
	}